Changes
On December 3, 2024 at 10:53:18 AM UTC, admin:
-
Changed title to MS-MARCO (previously MS MARCO)
-
Set author of MS-MARCO to Nguyen et al. (previously Tri Nguyen)
-
Updated description of MS-MARCO from
Large-scale passage retrieval aims to fetch relevant passages from a million- or billion-scale collection for a given query to meet users’ information needs, serving as an important role in many downstream applications including open domain question answering, search engine, and recommendation system.
toMS-MARCO dataset is a large-scale question answering dataset, focusing on real-world web data
-
Removed the following tags from MS-MARCO
- document retrieval
- passage ranking
- Question Answering
- single entity
- open-domain QA
- search query logs
- information retrieval
- document
- human generated dataset
- MS MARCO
- human-generated dataset
- passage retrieval
- query
- single relation
- zero-shot retrieval
- user click
- ranking
- Natural Language Processing
- QA
- reading comprehension
- Machine Reading Comprehension
-
Added the following tags to MS-MARCO
-
Changed value of field
defined_in
tohttps://doi.org/10.48550/arXiv.1805.02220
in MS-MARCO -
Changed value of field
citation
to[]
in MS-MARCO -
Changed value of field
landing_page
tohttps://rajpurkar.github.io/SQuAD-explorer/
in MS-MARCO -
Deleted resource Original Metadata from MS-MARCO
f | 1 | { | f | 1 | { |
2 | "access_rights": "", | 2 | "access_rights": "", | ||
n | 3 | "author": "Tri Nguyen", | n | 3 | "author": "Nguyen et al.", |
4 | "author_email": "", | 4 | "author_email": "", | ||
n | 5 | "citation": [ | n | 5 | "citation": [], |
6 | "https://doi.org/10.48550/arXiv.2110.07752", | ||||
7 | "https://doi.org/10.48550/arXiv.2208.13661", | ||||
8 | "https://doi.org/10.48550/arXiv.2312.15503", | ||||
9 | "https://doi.org/10.48550/arXiv.2205.03284", | ||||
10 | "https://doi.org/10.48550/arXiv.2202.05144", | ||||
11 | "https://doi.org/10.48550/arXiv.2311.02834", | ||||
12 | "https://doi.org/10.48550/arXiv.2403.14541", | ||||
13 | "https://doi.org/10.1109/IKT51791.2020.9345613", | ||||
14 | "https://doi.org/10.48550/arXiv.2203.07735", | ||||
15 | "https://doi.org/10.48550/arXiv.2404.13950", | ||||
16 | "https://doi.org/10.48550/arXiv.1909.03716", | ||||
17 | "https://doi.org/10.48550/arXiv.2303.16780", | ||||
18 | "https://doi.org/10.48550/arXiv.2203.00537", | ||||
19 | "https://doi.org/10.48550/arXiv.2101.06980", | ||||
20 | "https://doi.org/10.48550/arXiv.2404.15103" | ||||
21 | ], | ||||
22 | "creator_user_id": "17755db4-395a-4b3b-ac09-e8e3484ca700", | 6 | "creator_user_id": "17755db4-395a-4b3b-ac09-e8e3484ca700", | ||
n | 23 | "defined_in": "https://doi.org/10.1145/3583780.3615123", | n | 7 | "defined_in": "https://doi.org/10.48550/arXiv.1805.02220", |
24 | "doi": "10.57702/y9ev8opk", | 8 | "doi": "10.57702/y9ev8opk", | ||
25 | "doi_date_published": "2024-11-25", | 9 | "doi_date_published": "2024-11-25", | ||
26 | "doi_publisher": "TIB", | 10 | "doi_publisher": "TIB", | ||
27 | "doi_status": true, | 11 | "doi_status": true, | ||
28 | "domain": "https://service.tib.eu/ldmservice", | 12 | "domain": "https://service.tib.eu/ldmservice", | ||
n | 29 | "extra_authors": [ | n | ||
30 | { | ||||
31 | "extra_author": "Mir Rosenberg", | ||||
32 | "orcid": "" | ||||
33 | }, | ||||
34 | { | ||||
35 | "extra_author": "Xia Song", | ||||
36 | "orcid": "" | ||||
37 | }, | ||||
38 | { | ||||
39 | "extra_author": "Jianfeng Gao", | ||||
40 | "orcid": "" | ||||
41 | }, | ||||
42 | { | ||||
43 | "extra_author": "Saurabh Tiwary", | ||||
44 | "orcid": "" | ||||
45 | }, | ||||
46 | { | ||||
47 | "extra_author": "Rangan Majumder", | ||||
48 | "orcid": "" | ||||
49 | }, | ||||
50 | { | ||||
51 | "extra_author": "Li Deng", | ||||
52 | "orcid": "" | ||||
53 | } | ||||
54 | ], | ||||
55 | "groups": [ | 13 | "groups": [ | ||
n | 56 | { | n | ||
57 | "description": "", | ||||
58 | "display_name": "Document Retrieval", | ||||
59 | "id": "530cb507-0ba5-4237-8cef-47043758c987", | ||||
60 | "image_display_url": "", | ||||
61 | "name": "document-retrieval", | ||||
62 | "title": "Document Retrieval" | ||||
63 | }, | ||||
64 | { | ||||
65 | "description": "", | ||||
66 | "display_name": "Information Retrieval", | ||||
67 | "id": "2193c32a-4b35-4438-b3a0-07c44d00abbc", | ||||
68 | "image_display_url": "", | ||||
69 | "name": "information-retrieval", | ||||
70 | "title": "Information Retrieval" | ||||
71 | }, | ||||
72 | { | 14 | { | ||
73 | "description": "", | 15 | "description": "", | ||
74 | "display_name": "Machine Reading Comprehension", | 16 | "display_name": "Machine Reading Comprehension", | ||
75 | "id": "d4bdf82e-a4db-400a-b364-18088a397a26", | 17 | "id": "d4bdf82e-a4db-400a-b364-18088a397a26", | ||
76 | "image_display_url": "", | 18 | "image_display_url": "", | ||
77 | "name": "machine-reading-comprehension", | 19 | "name": "machine-reading-comprehension", | ||
78 | "title": "Machine Reading Comprehension" | 20 | "title": "Machine Reading Comprehension" | ||
n | 79 | }, | n | ||
80 | { | ||||
81 | "description": "", | ||||
82 | "display_name": "Passage Ranking", | ||||
83 | "id": "c7fd2228-f103-41f8-8aec-17d9f5584d44", | ||||
84 | "image_display_url": "", | ||||
85 | "name": "passage-ranking", | ||||
86 | "title": "Passage Ranking" | ||||
87 | }, | 21 | }, | ||
88 | { | 22 | { | ||
89 | "description": "", | 23 | "description": "", | ||
90 | "display_name": "Question Answering", | 24 | "display_name": "Question Answering", | ||
91 | "id": "01e9bd6b-910d-421d-bb31-799604023211", | 25 | "id": "01e9bd6b-910d-421d-bb31-799604023211", | ||
92 | "image_display_url": "", | 26 | "image_display_url": "", | ||
93 | "name": "question-answering", | 27 | "name": "question-answering", | ||
94 | "title": "Question Answering" | 28 | "title": "Question Answering" | ||
95 | } | 29 | } | ||
96 | ], | 30 | ], | ||
97 | "id": "c5a87f2c-04ef-436f-8238-7ebec5a8d9df", | 31 | "id": "c5a87f2c-04ef-436f-8238-7ebec5a8d9df", | ||
98 | "isopen": false, | 32 | "isopen": false, | ||
n | 99 | "landing_page": "https://ai.stanford.edu/~suiyue/MARCO/", | n | 33 | "landing_page": "https://rajpurkar.github.io/SQuAD-explorer/", |
100 | "license_title": null, | 34 | "license_title": null, | ||
101 | "link_orkg": "", | 35 | "link_orkg": "", | ||
102 | "metadata_created": "2024-11-25T14:23:55.046003", | 36 | "metadata_created": "2024-11-25T14:23:55.046003", | ||
n | 103 | "metadata_modified": "2024-12-02T23:52:51.847717", | n | 37 | "metadata_modified": "2024-12-03T10:53:17.988763", |
104 | "name": "ms-marco", | 38 | "name": "ms-marco", | ||
n | 105 | "notes": "Large-scale passage retrieval aims to fetch relevant | n | 39 | "notes": "MS-MARCO dataset is a large-scale question answering |
106 | passages from a million- or billion-scale collection for a given query | 40 | dataset, focusing on real-world web data", | ||
107 | to meet users\u2019 information needs, serving as an important role in | ||||
108 | many downstream applications including open domain question answering, | ||||
109 | search engine, and recommendation system.", | ||||
110 | "num_resources": 1, | 41 | "num_resources": 0, | ||
111 | "num_tags": 23, | 42 | "num_tags": 4, | ||
112 | "organization": { | 43 | "organization": { | ||
113 | "approval_status": "approved", | 44 | "approval_status": "approved", | ||
114 | "created": "2024-11-25T12:11:38.292601", | 45 | "created": "2024-11-25T12:11:38.292601", | ||
115 | "description": "", | 46 | "description": "", | ||
116 | "id": "079d46db-32df-4b48-91f3-0a8bc8f69559", | 47 | "id": "079d46db-32df-4b48-91f3-0a8bc8f69559", | ||
117 | "image_url": "", | 48 | "image_url": "", | ||
118 | "is_organization": true, | 49 | "is_organization": true, | ||
119 | "name": "no-organization", | 50 | "name": "no-organization", | ||
120 | "state": "active", | 51 | "state": "active", | ||
121 | "title": "No Organization", | 52 | "title": "No Organization", | ||
122 | "type": "organization" | 53 | "type": "organization" | ||
123 | }, | 54 | }, | ||
124 | "owner_org": "079d46db-32df-4b48-91f3-0a8bc8f69559", | 55 | "owner_org": "079d46db-32df-4b48-91f3-0a8bc8f69559", | ||
125 | "private": false, | 56 | "private": false, | ||
126 | "relationships_as_object": [], | 57 | "relationships_as_object": [], | ||
127 | "relationships_as_subject": [], | 58 | "relationships_as_subject": [], | ||
n | 128 | "resources": [ | n | 59 | "resources": [], |
129 | { | ||||
130 | "cache_last_updated": null, | ||||
131 | "cache_url": null, | ||||
132 | "created": "2024-12-03T00:20:35", | ||||
133 | "data": [ | ||||
134 | "dcterms:title", | ||||
135 | "dcterms:accessRights", | ||||
136 | "dcterms:creator", | ||||
137 | "dcterms:description", | ||||
138 | "dcterms:issued", | ||||
139 | "dcterms:language", | ||||
140 | "dcterms:identifier", | ||||
141 | "dcat:theme", | ||||
142 | "dcterms:type", | ||||
143 | "dcat:keyword", | ||||
144 | "dcat:landingPage", | ||||
145 | "dcterms:hasVersion", | ||||
146 | "dcterms:format", | ||||
147 | "mls:task", | ||||
148 | "datacite:isDescribedBy" | ||||
149 | ], | ||||
150 | "description": "The json representation of the dataset with its | ||||
151 | distributions based on DCAT.", | ||||
152 | "format": "JSON", | ||||
153 | "hash": "", | ||||
154 | "id": "fa44de5a-4d21-4414-9aca-3fc4d351e3ec", | ||||
155 | "last_modified": "2024-12-02T23:52:51.837607", | ||||
156 | "metadata_modified": "2024-12-02T23:52:51.850683", | ||||
157 | "mimetype": "application/json", | ||||
158 | "mimetype_inner": null, | ||||
159 | "name": "Original Metadata", | ||||
160 | "package_id": "c5a87f2c-04ef-436f-8238-7ebec5a8d9df", | ||||
161 | "position": 0, | ||||
162 | "resource_type": null, | ||||
163 | "size": 2135, | ||||
164 | "state": "active", | ||||
165 | "url": | ||||
166 | resource/fa44de5a-4d21-4414-9aca-3fc4d351e3ec/download/metadata.json", | ||||
167 | "url_type": "upload" | ||||
168 | } | ||||
169 | ], | ||||
170 | "services_used_list": "", | 60 | "services_used_list": "", | ||
171 | "state": "active", | 61 | "state": "active", | ||
172 | "tags": [ | 62 | "tags": [ | ||
173 | { | 63 | { | ||
n | 174 | "display_name": "MS MARCO", | n | 64 | "display_name": "MS-MARCO", |
175 | "id": "6bece918-b5d4-4314-a6f7-c8c1b57956d9", | 65 | "id": "d5278f93-0bc2-4542-a757-17cf56ba18e1", | ||
176 | "name": "MS MARCO", | 66 | "name": "MS-MARCO", | ||
177 | "state": "active", | ||||
178 | "vocabulary_id": null | ||||
179 | }, | ||||
180 | { | ||||
181 | "display_name": "Machine Reading Comprehension", | ||||
182 | "id": "0bffb536-2e89-437a-898c-9e55eb8f5446", | ||||
183 | "name": "Machine Reading Comprehension", | ||||
184 | "state": "active", | ||||
185 | "vocabulary_id": null | ||||
186 | }, | ||||
187 | { | ||||
188 | "display_name": "Natural Language Processing", | ||||
189 | "id": "035ca6e6-7473-43c7-82b3-cb1c2b4131fe", | ||||
190 | "name": "Natural Language Processing", | ||||
191 | "state": "active", | ||||
192 | "vocabulary_id": null | ||||
193 | }, | ||||
194 | { | ||||
195 | "display_name": "QA", | ||||
196 | "id": "179724b1-43fb-44f3-b17a-ff3550071d87", | ||||
197 | "name": "QA", | ||||
198 | "state": "active", | ||||
199 | "vocabulary_id": null | ||||
200 | }, | ||||
201 | { | ||||
202 | "display_name": "Question Answering", | ||||
203 | "id": "90f0a19e-b0c2-47b9-9180-dd38182c8003", | ||||
204 | "name": "Question Answering", | ||||
205 | "state": "active", | ||||
206 | "vocabulary_id": null | ||||
207 | }, | ||||
208 | { | ||||
209 | "display_name": "document", | ||||
210 | "id": "796b2125-074c-41a7-be5e-7dbfa21baa11", | ||||
211 | "name": "document", | ||||
212 | "state": "active", | ||||
213 | "vocabulary_id": null | ||||
214 | }, | ||||
215 | { | ||||
216 | "display_name": "document retrieval", | ||||
217 | "id": "1eb6e992-6a7d-476f-97d7-b6a295368fe2", | ||||
218 | "name": "document retrieval", | ||||
219 | "state": "active", | ||||
220 | "vocabulary_id": null | ||||
221 | }, | ||||
222 | { | ||||
223 | "display_name": "human generated dataset", | ||||
224 | "id": "c5b141ba-1a91-4e2b-b350-007ebc08e3e4", | ||||
225 | "name": "human generated dataset", | ||||
226 | "state": "active", | ||||
227 | "vocabulary_id": null | ||||
228 | }, | ||||
229 | { | ||||
230 | "display_name": "human-generated dataset", | ||||
231 | "id": "a29ba68d-3443-4dc9-aa39-dcf7657f2292", | ||||
232 | "name": "human-generated dataset", | ||||
233 | "state": "active", | ||||
234 | "vocabulary_id": null | ||||
235 | }, | ||||
236 | { | ||||
237 | "display_name": "information retrieval", | ||||
238 | "id": "c094f269-c169-4c66-b2f1-98967cbef543", | ||||
239 | "name": "information retrieval", | ||||
240 | "state": "active", | 67 | "state": "active", | ||
241 | "vocabulary_id": null | 68 | "vocabulary_id": null | ||
242 | }, | 69 | }, | ||
243 | { | 70 | { | ||
244 | "display_name": "machine reading comprehension", | 71 | "display_name": "machine reading comprehension", | ||
245 | "id": "81904f9d-c7ac-4a01-a208-7101b3cdaa47", | 72 | "id": "81904f9d-c7ac-4a01-a208-7101b3cdaa47", | ||
246 | "name": "machine reading comprehension", | 73 | "name": "machine reading comprehension", | ||
247 | "state": "active", | 74 | "state": "active", | ||
248 | "vocabulary_id": null | 75 | "vocabulary_id": null | ||
249 | }, | 76 | }, | ||
250 | { | 77 | { | ||
n | 251 | "display_name": "open-domain QA", | n | ||
252 | "id": "49d1bdd9-0c26-46da-bbda-c4618be72681", | ||||
253 | "name": "open-domain QA", | ||||
254 | "state": "active", | ||||
255 | "vocabulary_id": null | ||||
256 | }, | ||||
257 | { | ||||
258 | "display_name": "passage ranking", | ||||
259 | "id": "05530dc7-3c93-466d-a828-e76c1ab75332", | ||||
260 | "name": "passage ranking", | ||||
261 | "state": "active", | ||||
262 | "vocabulary_id": null | ||||
263 | }, | ||||
264 | { | ||||
265 | "display_name": "passage retrieval", | ||||
266 | "id": "24e47c7a-fc2d-483d-82ac-f63b32303276", | ||||
267 | "name": "passage retrieval", | ||||
268 | "state": "active", | ||||
269 | "vocabulary_id": null | ||||
270 | }, | ||||
271 | { | ||||
272 | "display_name": "query", | ||||
273 | "id": "5c903983-4289-498e-9ab1-969eddd7119e", | ||||
274 | "name": "query", | ||||
275 | "state": "active", | ||||
276 | "vocabulary_id": null | ||||
277 | }, | ||||
278 | { | ||||
279 | "display_name": "question answering", | 78 | "display_name": "question answering", | ||
280 | "id": "d4e35d0b-ca07-4d05-bb27-50b2f30f2f00", | 79 | "id": "d4e35d0b-ca07-4d05-bb27-50b2f30f2f00", | ||
281 | "name": "question answering", | 80 | "name": "question answering", | ||
282 | "state": "active", | 81 | "state": "active", | ||
283 | "vocabulary_id": null | 82 | "vocabulary_id": null | ||
284 | }, | 83 | }, | ||
285 | { | 84 | { | ||
n | 286 | "display_name": "ranking", | n | 85 | "display_name": "real-world web data", |
287 | "id": "b548741b-2174-4996-86fa-d8421e0f4d24", | 86 | "id": "ac304857-aa75-4dfa-a0b7-fb46c693f0d5", | ||
288 | "name": "ranking", | 87 | "name": "real-world web data", | ||
289 | "state": "active", | ||||
290 | "vocabulary_id": null | ||||
291 | }, | ||||
292 | { | ||||
293 | "display_name": "reading comprehension", | ||||
294 | "id": "d1f7f9e1-373b-4452-9427-a72e3f8af701", | ||||
295 | "name": "reading comprehension", | ||||
296 | "state": "active", | ||||
297 | "vocabulary_id": null | ||||
298 | }, | ||||
299 | { | ||||
300 | "display_name": "search query logs", | ||||
301 | "id": "d2721519-dba5-4567-a1d8-6568d881b85d", | ||||
302 | "name": "search query logs", | ||||
303 | "state": "active", | ||||
304 | "vocabulary_id": null | ||||
305 | }, | ||||
306 | { | ||||
307 | "display_name": "single entity", | ||||
308 | "id": "96a6dc93-4c5c-41bd-a3de-4c6eb347e168", | ||||
309 | "name": "single entity", | ||||
310 | "state": "active", | ||||
311 | "vocabulary_id": null | ||||
312 | }, | ||||
313 | { | ||||
314 | "display_name": "single relation", | ||||
315 | "id": "f4b0bddd-8fab-44fb-8f93-1a8aea1a23ab", | ||||
316 | "name": "single relation", | ||||
317 | "state": "active", | ||||
318 | "vocabulary_id": null | ||||
319 | }, | ||||
320 | { | ||||
321 | "display_name": "user click", | ||||
322 | "id": "2797776e-78c1-49ff-b055-b0e980f795eb", | ||||
323 | "name": "user click", | ||||
324 | "state": "active", | ||||
325 | "vocabulary_id": null | ||||
326 | }, | ||||
327 | { | ||||
328 | "display_name": "zero-shot retrieval", | ||||
329 | "id": "c1e134bc-f820-427f-845b-acf9a5994f12", | ||||
330 | "name": "zero-shot retrieval", | ||||
331 | "state": "active", | 88 | "state": "active", | ||
332 | "vocabulary_id": null | 89 | "vocabulary_id": null | ||
333 | } | 90 | } | ||
334 | ], | 91 | ], | ||
t | 335 | "title": "MS MARCO", | t | 92 | "title": "MS-MARCO", |
336 | "type": "dataset", | 93 | "type": "dataset", | ||
337 | "version": "" | 94 | "version": "" | ||
338 | } | 95 | } |