Changes
On December 2, 2024 at 6:20:39 PM UTC, admin:
-
Changed value of field
doi_date_published
to2024-12-02
in CommonCrawl -
Changed value of field
doi_status
toTrue
in CommonCrawl -
Added resource Original Metadata to CommonCrawl
f | 1 | { | f | 1 | { |
2 | "access_rights": "", | 2 | "access_rights": "", | ||
3 | "author": "Hu Xu", | 3 | "author": "Hu Xu", | ||
4 | "author_email": "", | 4 | "author_email": "", | ||
5 | "citation": [ | 5 | "citation": [ | ||
6 | "https://doi.org/10.48550/arXiv.2103.10531", | 6 | "https://doi.org/10.48550/arXiv.2103.10531", | ||
7 | "https://doi.org/10.48550/arXiv.2210.05613", | 7 | "https://doi.org/10.48550/arXiv.2210.05613", | ||
8 | "https://doi.org/10.48550/arXiv.2310.15552", | 8 | "https://doi.org/10.48550/arXiv.2310.15552", | ||
9 | "https://doi.org/10.48550/arXiv.2311.13857" | 9 | "https://doi.org/10.48550/arXiv.2311.13857" | ||
10 | ], | 10 | ], | ||
11 | "creator_user_id": "17755db4-395a-4b3b-ac09-e8e3484ca700", | 11 | "creator_user_id": "17755db4-395a-4b3b-ac09-e8e3484ca700", | ||
12 | "defined_in": "https://doi.org/10.48550/arXiv.2309.16671", | 12 | "defined_in": "https://doi.org/10.48550/arXiv.2309.16671", | ||
13 | "doi": "10.57702/ygkh0gji", | 13 | "doi": "10.57702/ygkh0gji", | ||
n | 14 | "doi_date_published": null, | n | 14 | "doi_date_published": "2024-12-02", |
15 | "doi_publisher": "TIB", | 15 | "doi_publisher": "TIB", | ||
n | 16 | "doi_status": false, | n | 16 | "doi_status": true, |
17 | "domain": "https://service.tib.eu/ldmservice", | 17 | "domain": "https://service.tib.eu/ldmservice", | ||
18 | "extra_authors": [ | 18 | "extra_authors": [ | ||
19 | { | 19 | { | ||
20 | "extra_author": "Saining Xie", | 20 | "extra_author": "Saining Xie", | ||
21 | "orcid": "" | 21 | "orcid": "" | ||
22 | }, | 22 | }, | ||
23 | { | 23 | { | ||
24 | "extra_author": "Xiaoqing Ellen Tan", | 24 | "extra_author": "Xiaoqing Ellen Tan", | ||
25 | "orcid": "" | 25 | "orcid": "" | ||
26 | }, | 26 | }, | ||
27 | { | 27 | { | ||
28 | "extra_author": "Po-Yao Huang", | 28 | "extra_author": "Po-Yao Huang", | ||
29 | "orcid": "" | 29 | "orcid": "" | ||
30 | }, | 30 | }, | ||
31 | { | 31 | { | ||
32 | "extra_author": "Russell Howes", | 32 | "extra_author": "Russell Howes", | ||
33 | "orcid": "" | 33 | "orcid": "" | ||
34 | }, | 34 | }, | ||
35 | { | 35 | { | ||
36 | "extra_author": "Vasu Sharma", | 36 | "extra_author": "Vasu Sharma", | ||
37 | "orcid": "" | 37 | "orcid": "" | ||
38 | }, | 38 | }, | ||
39 | { | 39 | { | ||
40 | "extra_author": "Shang-Wen Li", | 40 | "extra_author": "Shang-Wen Li", | ||
41 | "orcid": "" | 41 | "orcid": "" | ||
42 | }, | 42 | }, | ||
43 | { | 43 | { | ||
44 | "extra_author": "Gargi Ghosh", | 44 | "extra_author": "Gargi Ghosh", | ||
45 | "orcid": "" | 45 | "orcid": "" | ||
46 | }, | 46 | }, | ||
47 | { | 47 | { | ||
48 | "extra_author": "Luke Zettlemoyer", | 48 | "extra_author": "Luke Zettlemoyer", | ||
49 | "orcid": "" | 49 | "orcid": "" | ||
50 | }, | 50 | }, | ||
51 | { | 51 | { | ||
52 | "extra_author": "Christoph Feichtenhofer", | 52 | "extra_author": "Christoph Feichtenhofer", | ||
53 | "orcid": "" | 53 | "orcid": "" | ||
54 | } | 54 | } | ||
55 | ], | 55 | ], | ||
56 | "groups": [ | 56 | "groups": [ | ||
57 | { | 57 | { | ||
58 | "description": "", | 58 | "description": "", | ||
59 | "display_name": "Corpora", | 59 | "display_name": "Corpora", | ||
60 | "id": "667794ed-334c-4cf5-b72d-f6aa6878b951", | 60 | "id": "667794ed-334c-4cf5-b72d-f6aa6878b951", | ||
61 | "image_display_url": "", | 61 | "image_display_url": "", | ||
62 | "name": "corpora", | 62 | "name": "corpora", | ||
63 | "title": "Corpora" | 63 | "title": "Corpora" | ||
64 | }, | 64 | }, | ||
65 | { | 65 | { | ||
66 | "description": "", | 66 | "description": "", | ||
67 | "display_name": "Data Collection", | 67 | "display_name": "Data Collection", | ||
68 | "id": "e5407d18-d3c2-4737-9f55-070f579cca99", | 68 | "id": "e5407d18-d3c2-4737-9f55-070f579cca99", | ||
69 | "image_display_url": "", | 69 | "image_display_url": "", | ||
70 | "name": "data-collection", | 70 | "name": "data-collection", | ||
71 | "title": "Data Collection" | 71 | "title": "Data Collection" | ||
72 | }, | 72 | }, | ||
73 | { | 73 | { | ||
74 | "description": "", | 74 | "description": "", | ||
75 | "display_name": "Text Data", | 75 | "display_name": "Text Data", | ||
76 | "id": "30d52e93-891a-463b-a577-99a00396f012", | 76 | "id": "30d52e93-891a-463b-a577-99a00396f012", | ||
77 | "image_display_url": "", | 77 | "image_display_url": "", | ||
78 | "name": "text-data", | 78 | "name": "text-data", | ||
79 | "title": "Text Data" | 79 | "title": "Text Data" | ||
80 | }, | 80 | }, | ||
81 | { | 81 | { | ||
82 | "description": "", | 82 | "description": "", | ||
83 | "display_name": "Text Pre-training", | 83 | "display_name": "Text Pre-training", | ||
84 | "id": "6031e61f-b53c-4812-a65c-89f08254f08c", | 84 | "id": "6031e61f-b53c-4812-a65c-89f08254f08c", | ||
85 | "image_display_url": "", | 85 | "image_display_url": "", | ||
86 | "name": "text-pre-training", | 86 | "name": "text-pre-training", | ||
87 | "title": "Text Pre-training" | 87 | "title": "Text Pre-training" | ||
88 | }, | 88 | }, | ||
89 | { | 89 | { | ||
90 | "description": "", | 90 | "description": "", | ||
91 | "display_name": "Web Scraping", | 91 | "display_name": "Web Scraping", | ||
92 | "id": "94b6a507-ebba-4c82-a532-8fccffb91dab", | 92 | "id": "94b6a507-ebba-4c82-a532-8fccffb91dab", | ||
93 | "image_display_url": "", | 93 | "image_display_url": "", | ||
94 | "name": "web-scraping", | 94 | "name": "web-scraping", | ||
95 | "title": "Web Scraping" | 95 | "title": "Web Scraping" | ||
96 | } | 96 | } | ||
97 | ], | 97 | ], | ||
98 | "id": "b6df4f21-4506-4b09-b0c8-ee8bf4771a9b", | 98 | "id": "b6df4f21-4506-4b09-b0c8-ee8bf4771a9b", | ||
99 | "isopen": false, | 99 | "isopen": false, | ||
100 | "landing_page": "https://commoncrawl.org/", | 100 | "landing_page": "https://commoncrawl.org/", | ||
101 | "license_title": null, | 101 | "license_title": null, | ||
102 | "link_orkg": "", | 102 | "link_orkg": "", | ||
103 | "metadata_created": "2024-12-02T18:20:37.491047", | 103 | "metadata_created": "2024-12-02T18:20:37.491047", | ||
n | 104 | "metadata_modified": "2024-12-02T18:20:37.491053", | n | 104 | "metadata_modified": "2024-12-02T18:20:37.857537", |
105 | "name": "commoncrawl", | 105 | "name": "commoncrawl", | ||
106 | "notes": "CommonCrawl is a non-profit organization that provides a | 106 | "notes": "CommonCrawl is a non-profit organization that provides a | ||
107 | large corpus of web pages for research and development purposes.", | 107 | large corpus of web pages for research and development purposes.", | ||
n | 108 | "num_resources": 0, | n | 108 | "num_resources": 1, |
109 | "num_tags": 10, | 109 | "num_tags": 10, | ||
110 | "organization": { | 110 | "organization": { | ||
111 | "approval_status": "approved", | 111 | "approval_status": "approved", | ||
112 | "created": "2024-11-25T12:11:38.292601", | 112 | "created": "2024-11-25T12:11:38.292601", | ||
113 | "description": "", | 113 | "description": "", | ||
114 | "id": "079d46db-32df-4b48-91f3-0a8bc8f69559", | 114 | "id": "079d46db-32df-4b48-91f3-0a8bc8f69559", | ||
115 | "image_url": "", | 115 | "image_url": "", | ||
116 | "is_organization": true, | 116 | "is_organization": true, | ||
117 | "name": "no-organization", | 117 | "name": "no-organization", | ||
118 | "state": "active", | 118 | "state": "active", | ||
119 | "title": "No Organization", | 119 | "title": "No Organization", | ||
120 | "type": "organization" | 120 | "type": "organization" | ||
121 | }, | 121 | }, | ||
122 | "owner_org": "079d46db-32df-4b48-91f3-0a8bc8f69559", | 122 | "owner_org": "079d46db-32df-4b48-91f3-0a8bc8f69559", | ||
123 | "private": false, | 123 | "private": false, | ||
124 | "relationships_as_object": [], | 124 | "relationships_as_object": [], | ||
125 | "relationships_as_subject": [], | 125 | "relationships_as_subject": [], | ||
t | 126 | "resources": [], | t | 126 | "resources": [ |
127 | { | ||||
128 | "cache_last_updated": null, | ||||
129 | "cache_url": null, | ||||
130 | "created": "2024-12-02T18:38:42", | ||||
131 | "data": [ | ||||
132 | "dcterms:title", | ||||
133 | "dcterms:accessRights", | ||||
134 | "dcterms:creator", | ||||
135 | "dcterms:description", | ||||
136 | "dcterms:issued", | ||||
137 | "dcterms:language", | ||||
138 | "dcterms:identifier", | ||||
139 | "dcat:theme", | ||||
140 | "dcterms:type", | ||||
141 | "dcat:keyword", | ||||
142 | "dcat:landingPage", | ||||
143 | "dcterms:hasVersion", | ||||
144 | "dcterms:format", | ||||
145 | "mls:task", | ||||
146 | "datacite:isDescribedBy" | ||||
147 | ], | ||||
148 | "description": "The json representation of the dataset with its | ||||
149 | distributions based on DCAT.", | ||||
150 | "format": "JSON", | ||||
151 | "hash": "", | ||||
152 | "id": "2ae2758d-1f3a-4593-8f8b-77d4ad363d82", | ||||
153 | "last_modified": "2024-12-02T18:20:37.848584", | ||||
154 | "metadata_modified": "2024-12-02T18:20:37.860215", | ||||
155 | "mimetype": "application/json", | ||||
156 | "mimetype_inner": null, | ||||
157 | "name": "Original Metadata", | ||||
158 | "package_id": "b6df4f21-4506-4b09-b0c8-ee8bf4771a9b", | ||||
159 | "position": 0, | ||||
160 | "resource_type": null, | ||||
161 | "size": 1180, | ||||
162 | "state": "active", | ||||
163 | "url": | ||||
164 | resource/2ae2758d-1f3a-4593-8f8b-77d4ad363d82/download/metadata.json", | ||||
165 | "url_type": "upload" | ||||
166 | } | ||||
167 | ], | ||||
127 | "services_used_list": "", | 168 | "services_used_list": "", | ||
128 | "state": "active", | 169 | "state": "active", | ||
129 | "tags": [ | 170 | "tags": [ | ||
130 | { | 171 | { | ||
131 | "display_name": "Corpora", | 172 | "display_name": "Corpora", | ||
132 | "id": "976d8258-2146-448d-9f1f-a595d5a260f8", | 173 | "id": "976d8258-2146-448d-9f1f-a595d5a260f8", | ||
133 | "name": "Corpora", | 174 | "name": "Corpora", | ||
134 | "state": "active", | 175 | "state": "active", | ||
135 | "vocabulary_id": null | 176 | "vocabulary_id": null | ||
136 | }, | 177 | }, | ||
137 | { | 178 | { | ||
138 | "display_name": "Corpus", | 179 | "display_name": "Corpus", | ||
139 | "id": "5e434f4b-89f6-489e-9e6e-d2ff6003d493", | 180 | "id": "5e434f4b-89f6-489e-9e6e-d2ff6003d493", | ||
140 | "name": "Corpus", | 181 | "name": "Corpus", | ||
141 | "state": "active", | 182 | "state": "active", | ||
142 | "vocabulary_id": null | 183 | "vocabulary_id": null | ||
143 | }, | 184 | }, | ||
144 | { | 185 | { | ||
145 | "display_name": "Data Collection", | 186 | "display_name": "Data Collection", | ||
146 | "id": "7cec52fd-2eb1-4c11-9c4c-400d3278cc29", | 187 | "id": "7cec52fd-2eb1-4c11-9c4c-400d3278cc29", | ||
147 | "name": "Data Collection", | 188 | "name": "Data Collection", | ||
148 | "state": "active", | 189 | "state": "active", | ||
149 | "vocabulary_id": null | 190 | "vocabulary_id": null | ||
150 | }, | 191 | }, | ||
151 | { | 192 | { | ||
152 | "display_name": "Text Data", | 193 | "display_name": "Text Data", | ||
153 | "id": "a72366d9-3035-43be-8190-608b1c5c787b", | 194 | "id": "a72366d9-3035-43be-8190-608b1c5c787b", | ||
154 | "name": "Text Data", | 195 | "name": "Text Data", | ||
155 | "state": "active", | 196 | "state": "active", | ||
156 | "vocabulary_id": null | 197 | "vocabulary_id": null | ||
157 | }, | 198 | }, | ||
158 | { | 199 | { | ||
159 | "display_name": "Web Pages", | 200 | "display_name": "Web Pages", | ||
160 | "id": "0b80ecb2-15c5-4d0b-ac33-528d883c5a75", | 201 | "id": "0b80ecb2-15c5-4d0b-ac33-528d883c5a75", | ||
161 | "name": "Web Pages", | 202 | "name": "Web Pages", | ||
162 | "state": "active", | 203 | "state": "active", | ||
163 | "vocabulary_id": null | 204 | "vocabulary_id": null | ||
164 | }, | 205 | }, | ||
165 | { | 206 | { | ||
166 | "display_name": "commoncrawl", | 207 | "display_name": "commoncrawl", | ||
167 | "id": "8dfa4cff-425f-41f1-81ff-ecb3898172da", | 208 | "id": "8dfa4cff-425f-41f1-81ff-ecb3898172da", | ||
168 | "name": "commoncrawl", | 209 | "name": "commoncrawl", | ||
169 | "state": "active", | 210 | "state": "active", | ||
170 | "vocabulary_id": null | 211 | "vocabulary_id": null | ||
171 | }, | 212 | }, | ||
172 | { | 213 | { | ||
173 | "display_name": "document classification", | 214 | "display_name": "document classification", | ||
174 | "id": "a5ed96a5-12e0-4be7-95a6-0df5a2af6fcf", | 215 | "id": "a5ed96a5-12e0-4be7-95a6-0df5a2af6fcf", | ||
175 | "name": "document classification", | 216 | "name": "document classification", | ||
176 | "state": "active", | 217 | "state": "active", | ||
177 | "vocabulary_id": null | 218 | "vocabulary_id": null | ||
178 | }, | 219 | }, | ||
179 | { | 220 | { | ||
180 | "display_name": "text data", | 221 | "display_name": "text data", | ||
181 | "id": "71552fca-5e8d-428f-82c0-9d2d0160e73d", | 222 | "id": "71552fca-5e8d-428f-82c0-9d2d0160e73d", | ||
182 | "name": "text data", | 223 | "name": "text data", | ||
183 | "state": "active", | 224 | "state": "active", | ||
184 | "vocabulary_id": null | 225 | "vocabulary_id": null | ||
185 | }, | 226 | }, | ||
186 | { | 227 | { | ||
187 | "display_name": "text pre-training", | 228 | "display_name": "text pre-training", | ||
188 | "id": "fb405c0f-9173-4141-a885-7bad31eef1fc", | 229 | "id": "fb405c0f-9173-4141-a885-7bad31eef1fc", | ||
189 | "name": "text pre-training", | 230 | "name": "text pre-training", | ||
190 | "state": "active", | 231 | "state": "active", | ||
191 | "vocabulary_id": null | 232 | "vocabulary_id": null | ||
192 | }, | 233 | }, | ||
193 | { | 234 | { | ||
194 | "display_name": "web scraping", | 235 | "display_name": "web scraping", | ||
195 | "id": "de6192c1-ff86-496b-9a9e-634a2fb9b683", | 236 | "id": "de6192c1-ff86-496b-9a9e-634a2fb9b683", | ||
196 | "name": "web scraping", | 237 | "name": "web scraping", | ||
197 | "state": "active", | 238 | "state": "active", | ||
198 | "vocabulary_id": null | 239 | "vocabulary_id": null | ||
199 | } | 240 | } | ||
200 | ], | 241 | ], | ||
201 | "title": "CommonCrawl", | 242 | "title": "CommonCrawl", | ||
202 | "type": "dataset", | 243 | "type": "dataset", | ||
203 | "version": "" | 244 | "version": "" | ||
204 | } | 245 | } |