Changes
On December 2, 2024 at 6:01:56 PM UTC, admin:
-
Changed value of field
doi_status
toTrue
in OSCAR -
Changed value of field
doi_date_published
to2024-12-02
in OSCAR -
Added resource Original Metadata to OSCAR
f | 1 | { | f | 1 | { |
2 | "access_rights": "", | 2 | "access_rights": "", | ||
3 | "author": "Julien Launay", | 3 | "author": "Julien Launay", | ||
4 | "author_email": "", | 4 | "author_email": "", | ||
5 | "citation": [ | 5 | "citation": [ | ||
6 | "https://doi.org/10.48550/arXiv.2306.15550", | 6 | "https://doi.org/10.48550/arXiv.2306.15550", | ||
7 | "https://doi.org/10.48550/arXiv.2010.03813" | 7 | "https://doi.org/10.48550/arXiv.2010.03813" | ||
8 | ], | 8 | ], | ||
9 | "creator_user_id": "17755db4-395a-4b3b-ac09-e8e3484ca700", | 9 | "creator_user_id": "17755db4-395a-4b3b-ac09-e8e3484ca700", | ||
10 | "defined_in": "https://doi.org/10.48550/arXiv.2201.06642", | 10 | "defined_in": "https://doi.org/10.48550/arXiv.2201.06642", | ||
11 | "doi": "10.57702/10zu2mjt", | 11 | "doi": "10.57702/10zu2mjt", | ||
n | 12 | "doi_date_published": null, | n | 12 | "doi_date_published": "2024-12-02", |
13 | "doi_publisher": "TIB", | 13 | "doi_publisher": "TIB", | ||
n | 14 | "doi_status": false, | n | 14 | "doi_status": true, |
15 | "domain": "https://service.tib.eu/ldmservice", | 15 | "domain": "https://service.tib.eu/ldmservice", | ||
16 | "extra_authors": [ | 16 | "extra_authors": [ | ||
17 | { | 17 | { | ||
18 | "extra_author": "Elena Tommasone", | 18 | "extra_author": "Elena Tommasone", | ||
19 | "orcid": "" | 19 | "orcid": "" | ||
20 | }, | 20 | }, | ||
21 | { | 21 | { | ||
22 | "extra_author": "Baptiste Pannier", | 22 | "extra_author": "Baptiste Pannier", | ||
23 | "orcid": "" | 23 | "orcid": "" | ||
24 | }, | 24 | }, | ||
25 | { | 25 | { | ||
26 | "extra_author": "Fran\u00e7ois Boniface", | 26 | "extra_author": "Fran\u00e7ois Boniface", | ||
27 | "orcid": "" | 27 | "orcid": "" | ||
28 | }, | 28 | }, | ||
29 | { | 29 | { | ||
30 | "extra_author": "Am\u00e9lie Chatelain", | 30 | "extra_author": "Am\u00e9lie Chatelain", | ||
31 | "orcid": "" | 31 | "orcid": "" | ||
32 | }, | 32 | }, | ||
33 | { | 33 | { | ||
34 | "extra_author": "Alessandro Cappelli", | 34 | "extra_author": "Alessandro Cappelli", | ||
35 | "orcid": "" | 35 | "orcid": "" | ||
36 | }, | 36 | }, | ||
37 | { | 37 | { | ||
38 | "extra_author": "Iacopo Poli", | 38 | "extra_author": "Iacopo Poli", | ||
39 | "orcid": "" | 39 | "orcid": "" | ||
40 | }, | 40 | }, | ||
41 | { | 41 | { | ||
42 | "extra_author": "Djam\u00e9 Seddah", | 42 | "extra_author": "Djam\u00e9 Seddah", | ||
43 | "orcid": "" | 43 | "orcid": "" | ||
44 | } | 44 | } | ||
45 | ], | 45 | ], | ||
46 | "groups": [ | 46 | "groups": [ | ||
47 | { | 47 | { | ||
48 | "description": "", | 48 | "description": "", | ||
49 | "display_name": "Biomedical Text", | 49 | "display_name": "Biomedical Text", | ||
50 | "id": "7f266b67-3b2e-4343-8384-7f66d9ec26a5", | 50 | "id": "7f266b67-3b2e-4343-8384-7f66d9ec26a5", | ||
51 | "image_display_url": "", | 51 | "image_display_url": "", | ||
52 | "name": "biomedical-text", | 52 | "name": "biomedical-text", | ||
53 | "title": "Biomedical Text" | 53 | "title": "Biomedical Text" | ||
54 | }, | 54 | }, | ||
55 | { | 55 | { | ||
56 | "description": "", | 56 | "description": "", | ||
57 | "display_name": "Language Modeling", | 57 | "display_name": "Language Modeling", | ||
58 | "id": "f0c89329-f1ed-4ba3-90d2-5113fff9bf72", | 58 | "id": "f0c89329-f1ed-4ba3-90d2-5113fff9bf72", | ||
59 | "image_display_url": "", | 59 | "image_display_url": "", | ||
60 | "name": "language-modeling", | 60 | "name": "language-modeling", | ||
61 | "title": "Language Modeling" | 61 | "title": "Language Modeling" | ||
62 | }, | 62 | }, | ||
63 | { | 63 | { | ||
64 | "description": "", | 64 | "description": "", | ||
65 | "display_name": "Multilingual Corpora", | 65 | "display_name": "Multilingual Corpora", | ||
66 | "id": "0f464674-1466-4769-bd5e-14d665407425", | 66 | "id": "0f464674-1466-4769-bd5e-14d665407425", | ||
67 | "image_display_url": "", | 67 | "image_display_url": "", | ||
68 | "name": "multilingual-corpora", | 68 | "name": "multilingual-corpora", | ||
69 | "title": "Multilingual Corpora" | 69 | "title": "Multilingual Corpora" | ||
70 | }, | 70 | }, | ||
71 | { | 71 | { | ||
72 | "description": "", | 72 | "description": "", | ||
73 | "display_name": "Multilingual Web Corpus", | 73 | "display_name": "Multilingual Web Corpus", | ||
74 | "id": "993a8b69-57e9-4cb2-aa93-bdf061c19338", | 74 | "id": "993a8b69-57e9-4cb2-aa93-bdf061c19338", | ||
75 | "image_display_url": "", | 75 | "image_display_url": "", | ||
76 | "name": "multilingual-web-corpus", | 76 | "name": "multilingual-web-corpus", | ||
77 | "title": "Multilingual Web Corpus" | 77 | "title": "Multilingual Web Corpus" | ||
78 | }, | 78 | }, | ||
79 | { | 79 | { | ||
80 | "description": "", | 80 | "description": "", | ||
81 | "display_name": "Text Data", | 81 | "display_name": "Text Data", | ||
82 | "id": "30d52e93-891a-463b-a577-99a00396f012", | 82 | "id": "30d52e93-891a-463b-a577-99a00396f012", | ||
83 | "image_display_url": "", | 83 | "image_display_url": "", | ||
84 | "name": "text-data", | 84 | "name": "text-data", | ||
85 | "title": "Text Data" | 85 | "title": "Text Data" | ||
86 | } | 86 | } | ||
87 | ], | 87 | ], | ||
88 | "id": "b6a9812f-e3fd-4f97-b970-4c536dc6db85", | 88 | "id": "b6a9812f-e3fd-4f97-b970-4c536dc6db85", | ||
89 | "isopen": false, | 89 | "isopen": false, | ||
90 | "landing_page": "https://oscar.nlp.stanford.edu/", | 90 | "landing_page": "https://oscar.nlp.stanford.edu/", | ||
91 | "license_title": null, | 91 | "license_title": null, | ||
92 | "link_orkg": "", | 92 | "link_orkg": "", | ||
93 | "metadata_created": "2024-12-02T18:01:55.377030", | 93 | "metadata_created": "2024-12-02T18:01:55.377030", | ||
n | 94 | "metadata_modified": "2024-12-02T18:01:55.377035", | n | 94 | "metadata_modified": "2024-12-02T18:01:55.723222", |
95 | "name": "oscar", | 95 | "name": "oscar", | ||
96 | "notes": "The OSCAR corpus is a multilingual web corpus that is used | 96 | "notes": "The OSCAR corpus is a multilingual web corpus that is used | ||
97 | for pre-training large generative language models. It is a | 97 | for pre-training large generative language models. It is a | ||
98 | document-oriented corpus that is comparable in size and language size | 98 | document-oriented corpus that is comparable in size and language size | ||
99 | distribution to OSCAR 21.09.", | 99 | distribution to OSCAR 21.09.", | ||
n | 100 | "num_resources": 0, | n | 100 | "num_resources": 1, |
101 | "num_tags": 10, | 101 | "num_tags": 10, | ||
102 | "organization": { | 102 | "organization": { | ||
103 | "approval_status": "approved", | 103 | "approval_status": "approved", | ||
104 | "created": "2024-11-25T12:11:38.292601", | 104 | "created": "2024-11-25T12:11:38.292601", | ||
105 | "description": "", | 105 | "description": "", | ||
106 | "id": "079d46db-32df-4b48-91f3-0a8bc8f69559", | 106 | "id": "079d46db-32df-4b48-91f3-0a8bc8f69559", | ||
107 | "image_url": "", | 107 | "image_url": "", | ||
108 | "is_organization": true, | 108 | "is_organization": true, | ||
109 | "name": "no-organization", | 109 | "name": "no-organization", | ||
110 | "state": "active", | 110 | "state": "active", | ||
111 | "title": "No Organization", | 111 | "title": "No Organization", | ||
112 | "type": "organization" | 112 | "type": "organization" | ||
113 | }, | 113 | }, | ||
114 | "owner_org": "079d46db-32df-4b48-91f3-0a8bc8f69559", | 114 | "owner_org": "079d46db-32df-4b48-91f3-0a8bc8f69559", | ||
115 | "private": false, | 115 | "private": false, | ||
116 | "relationships_as_object": [], | 116 | "relationships_as_object": [], | ||
117 | "relationships_as_subject": [], | 117 | "relationships_as_subject": [], | ||
t | 118 | "resources": [], | t | 118 | "resources": [ |
119 | { | ||||
120 | "cache_last_updated": null, | ||||
121 | "cache_url": null, | ||||
122 | "created": "2024-12-02T18:38:42", | ||||
123 | "data": [ | ||||
124 | "dcterms:title", | ||||
125 | "dcterms:accessRights", | ||||
126 | "dcterms:creator", | ||||
127 | "dcterms:description", | ||||
128 | "dcterms:issued", | ||||
129 | "dcterms:language", | ||||
130 | "dcterms:identifier", | ||||
131 | "dcat:theme", | ||||
132 | "dcterms:type", | ||||
133 | "dcat:keyword", | ||||
134 | "dcat:landingPage", | ||||
135 | "dcterms:hasVersion", | ||||
136 | "dcterms:format", | ||||
137 | "mls:task", | ||||
138 | "datacite:isDescribedBy" | ||||
139 | ], | ||||
140 | "description": "The json representation of the dataset with its | ||||
141 | distributions based on DCAT.", | ||||
142 | "format": "JSON", | ||||
143 | "hash": "", | ||||
144 | "id": "7b9cd67c-7ec5-4f16-9e2b-0926770e7b1d", | ||||
145 | "last_modified": "2024-12-02T18:01:55.715046", | ||||
146 | "metadata_modified": "2024-12-02T18:01:55.726168", | ||||
147 | "mimetype": "application/json", | ||||
148 | "mimetype_inner": null, | ||||
149 | "name": "Original Metadata", | ||||
150 | "package_id": "b6a9812f-e3fd-4f97-b970-4c536dc6db85", | ||||
151 | "position": 0, | ||||
152 | "resource_type": null, | ||||
153 | "size": 1228, | ||||
154 | "state": "active", | ||||
155 | "url": | ||||
156 | resource/7b9cd67c-7ec5-4f16-9e2b-0926770e7b1d/download/metadata.json", | ||||
157 | "url_type": "upload" | ||||
158 | } | ||||
159 | ], | ||||
119 | "services_used_list": "", | 160 | "services_used_list": "", | ||
120 | "state": "active", | 161 | "state": "active", | ||
121 | "tags": [ | 162 | "tags": [ | ||
122 | { | 163 | { | ||
123 | "display_name": "Common Crawl", | 164 | "display_name": "Common Crawl", | ||
124 | "id": "daa1be46-b81f-4d1d-b2bf-19549a1660a5", | 165 | "id": "daa1be46-b81f-4d1d-b2bf-19549a1660a5", | ||
125 | "name": "Common Crawl", | 166 | "name": "Common Crawl", | ||
126 | "state": "active", | 167 | "state": "active", | ||
127 | "vocabulary_id": null | 168 | "vocabulary_id": null | ||
128 | }, | 169 | }, | ||
129 | { | 170 | { | ||
130 | "display_name": "French Language", | 171 | "display_name": "French Language", | ||
131 | "id": "b23bb708-4f24-4781-a647-058c0476999b", | 172 | "id": "b23bb708-4f24-4781-a647-058c0476999b", | ||
132 | "name": "French Language", | 173 | "name": "French Language", | ||
133 | "state": "active", | 174 | "state": "active", | ||
134 | "vocabulary_id": null | 175 | "vocabulary_id": null | ||
135 | }, | 176 | }, | ||
136 | { | 177 | { | ||
137 | "display_name": "French biomedical data", | 178 | "display_name": "French biomedical data", | ||
138 | "id": "b0dfead7-c0b8-40d0-b5e7-194d8b34d281", | 179 | "id": "b0dfead7-c0b8-40d0-b5e7-194d8b34d281", | ||
139 | "name": "French biomedical data", | 180 | "name": "French biomedical data", | ||
140 | "state": "active", | 181 | "state": "active", | ||
141 | "vocabulary_id": null | 182 | "vocabulary_id": null | ||
142 | }, | 183 | }, | ||
143 | { | 184 | { | ||
144 | "display_name": "OSCAR", | 185 | "display_name": "OSCAR", | ||
145 | "id": "cd2bd943-2178-4ed7-b348-9fefa15e096e", | 186 | "id": "cd2bd943-2178-4ed7-b348-9fefa15e096e", | ||
146 | "name": "OSCAR", | 187 | "name": "OSCAR", | ||
147 | "state": "active", | 188 | "state": "active", | ||
148 | "vocabulary_id": null | 189 | "vocabulary_id": null | ||
149 | }, | 190 | }, | ||
150 | { | 191 | { | ||
151 | "display_name": "Text Data", | 192 | "display_name": "Text Data", | ||
152 | "id": "a72366d9-3035-43be-8190-608b1c5c787b", | 193 | "id": "a72366d9-3035-43be-8190-608b1c5c787b", | ||
153 | "name": "Text Data", | 194 | "name": "Text Data", | ||
154 | "state": "active", | 195 | "state": "active", | ||
155 | "vocabulary_id": null | 196 | "vocabulary_id": null | ||
156 | }, | 197 | }, | ||
157 | { | 198 | { | ||
158 | "display_name": "biomedical text", | 199 | "display_name": "biomedical text", | ||
159 | "id": "1a5d487a-3f29-48c1-b8dd-b3757f9bc9b0", | 200 | "id": "1a5d487a-3f29-48c1-b8dd-b3757f9bc9b0", | ||
160 | "name": "biomedical text", | 201 | "name": "biomedical text", | ||
161 | "state": "active", | 202 | "state": "active", | ||
162 | "vocabulary_id": null | 203 | "vocabulary_id": null | ||
163 | }, | 204 | }, | ||
164 | { | 205 | { | ||
165 | "display_name": "language modeling", | 206 | "display_name": "language modeling", | ||
166 | "id": "44eb2c4c-e2cd-4698-bf3c-28ebbd5e3e42", | 207 | "id": "44eb2c4c-e2cd-4698-bf3c-28ebbd5e3e42", | ||
167 | "name": "language modeling", | 208 | "name": "language modeling", | ||
168 | "state": "active", | 209 | "state": "active", | ||
169 | "vocabulary_id": null | 210 | "vocabulary_id": null | ||
170 | }, | 211 | }, | ||
171 | { | 212 | { | ||
172 | "display_name": "multilingual", | 213 | "display_name": "multilingual", | ||
173 | "id": "ee7200c1-f044-4a4e-9c0d-3244e46c833d", | 214 | "id": "ee7200c1-f044-4a4e-9c0d-3244e46c833d", | ||
174 | "name": "multilingual", | 215 | "name": "multilingual", | ||
175 | "state": "active", | 216 | "state": "active", | ||
176 | "vocabulary_id": null | 217 | "vocabulary_id": null | ||
177 | }, | 218 | }, | ||
178 | { | 219 | { | ||
179 | "display_name": "multilingual corpora", | 220 | "display_name": "multilingual corpora", | ||
180 | "id": "4e539a01-eb7a-4913-8041-64cd93e643fc", | 221 | "id": "4e539a01-eb7a-4913-8041-64cd93e643fc", | ||
181 | "name": "multilingual corpora", | 222 | "name": "multilingual corpora", | ||
182 | "state": "active", | 223 | "state": "active", | ||
183 | "vocabulary_id": null | 224 | "vocabulary_id": null | ||
184 | }, | 225 | }, | ||
185 | { | 226 | { | ||
186 | "display_name": "web corpus", | 227 | "display_name": "web corpus", | ||
187 | "id": "f524634f-15ba-4bd7-85fe-83bfac06e04f", | 228 | "id": "f524634f-15ba-4bd7-85fe-83bfac06e04f", | ||
188 | "name": "web corpus", | 229 | "name": "web corpus", | ||
189 | "state": "active", | 230 | "state": "active", | ||
190 | "vocabulary_id": null | 231 | "vocabulary_id": null | ||
191 | } | 232 | } | ||
192 | ], | 233 | ], | ||
193 | "title": "OSCAR", | 234 | "title": "OSCAR", | ||
194 | "type": "dataset", | 235 | "type": "dataset", | ||
195 | "version": "" | 236 | "version": "" | ||
196 | } | 237 | } |