Changes
On January 12, 2023 at 1:14:21 PM UTC, admin:
-
Changed value of field
source_metadata_modified
to2022-01-20T10:59:42.198563
in News400 Dataset -
Deleted resource All resource data from News400 Dataset
-
Removed field
datastore_active
from resource news400_features.tar.gz in News400 Dataset -
Removed field
datastore_active
from resource news400.tar.gz in News400 Dataset -
Removed field
datastore_active
from resource news400_wordembeddings.tar.gz in News400 Dataset
f | 1 | { | f | 1 | { |
2 | "author": "Eric M\u00fcller-Budack, Jonas Theiner, Sebastian | 2 | "author": "Eric M\u00fcller-Budack, Jonas Theiner, Sebastian | ||
3 | Diering, Maximilian Idahl, Ralph Ewerth", | 3 | Diering, Maximilian Idahl, Ralph Ewerth", | ||
4 | "author_email": "eric.mueller@tib.eu", | 4 | "author_email": "eric.mueller@tib.eu", | ||
5 | "creator_user_id": "17755db4-395a-4b3b-ac09-e8e3484ca700", | 5 | "creator_user_id": "17755db4-395a-4b3b-ac09-e8e3484ca700", | ||
6 | "doi": "10.25835/0084897", | 6 | "doi": "10.25835/0084897", | ||
7 | "doi_date_published": "2020-06-04", | 7 | "doi_date_published": "2020-06-04", | ||
8 | "doi_publisher": "LUIS", | 8 | "doi_publisher": "LUIS", | ||
9 | "doi_status": "true", | 9 | "doi_status": "true", | ||
10 | "domain": "https://data.uni-hannover.de", | 10 | "domain": "https://data.uni-hannover.de", | ||
11 | "groups": [], | 11 | "groups": [], | ||
12 | "have_copyright": "Yes", | 12 | "have_copyright": "Yes", | ||
13 | "id": "c729ffd9-8be1-49a9-8c43-dab2f8a87753", | 13 | "id": "c729ffd9-8be1-49a9-8c43-dab2f8a87753", | ||
14 | "isopen": false, | 14 | "isopen": false, | ||
15 | "license_id": "CC-BY-NC-3.0", | 15 | "license_id": "CC-BY-NC-3.0", | ||
16 | "license_title": "CC-BY-NC-3.0", | 16 | "license_title": "CC-BY-NC-3.0", | ||
17 | "maintainer": "Eric M\u00fcller-Budack", | 17 | "maintainer": "Eric M\u00fcller-Budack", | ||
18 | "maintainer_email": "eric.mueller@tib.eu", | 18 | "maintainer_email": "eric.mueller@tib.eu", | ||
19 | "metadata_created": "2021-10-14T10:16:03.098485", | 19 | "metadata_created": "2021-10-14T10:16:03.098485", | ||
n | 20 | "metadata_modified": "2021-10-14T10:16:03.098491", | n | 20 | "metadata_modified": "2023-01-12T13:14:21.448200", |
21 | "name": "luh-news400", | 21 | "name": "luh-news400", | ||
22 | "notes": "# Multimodal Analytics for Real-world News using Measures | 22 | "notes": "# Multimodal Analytics for Real-world News using Measures | ||
23 | of Cross-modal Entity Consistency\r\n\r\nThis repository contains the | 23 | of Cross-modal Entity Consistency\r\n\r\nThis repository contains the | ||
24 | *News400* dataset introduced in the paper:\r\n\r\n> Eric | 24 | *News400* dataset introduced in the paper:\r\n\r\n> Eric | ||
25 | M\u00fcller-Budack, Jonas Theiner, Sebastian Diering, Maximilian | 25 | M\u00fcller-Budack, Jonas Theiner, Sebastian Diering, Maximilian | ||
26 | Idahl, and Ralph Ewerth. 2020. \r\nMultimodal Analytics for Real-world | 26 | Idahl, and Ralph Ewerth. 2020. \r\nMultimodal Analytics for Real-world | ||
27 | News using Measures of Cross-modal Entity Consistency. \r\nIn | 27 | News using Measures of Cross-modal Entity Consistency. \r\nIn | ||
28 | Proceedings of the 2020 International Conference on Multimedia | 28 | Proceedings of the 2020 International Conference on Multimedia | ||
29 | Retrieval (ICMR '20). Association for Computing Machinery, New York, | 29 | Retrieval (ICMR '20). Association for Computing Machinery, New York, | ||
30 | NY, USA, 16\u201325. DOI: | 30 | NY, USA, 16\u201325. DOI: | ||
31 | https://doi.org/10.1145/3372278.3390670\r\n\r\n## Content\r\n\r\n- | 31 | https://doi.org/10.1145/3372278.3390670\r\n\r\n## Content\r\n\r\n- | ||
32 | **news400.tar.gz**:\r\n - ```dataset.jsonl``` containing:\r\n | 32 | **news400.tar.gz**:\r\n - ```dataset.jsonl``` containing:\r\n | ||
33 | - Web links to the news texts\r\n - Web links to the news | 33 | - Web links to the news texts\r\n - Web links to the news | ||
34 | image\r\n - Outputs of the named entity recognition and | 34 | image\r\n - Outputs of the named entity recognition and | ||
35 | disambiguation (NERD) approach\r\n - Untampered and tampered | 35 | disambiguation (NERD) approach\r\n - Untampered and tampered | ||
36 | entities\r\n - ```<entity>.jsonl``` file for each entity type | 36 | entities\r\n - ```<entity>.jsonl``` file for each entity type | ||
37 | containing the following information for each entity:\r\n - | 37 | containing the following information for each entity:\r\n - | ||
38 | Wikidata ID\r\n - Wikidata label\r\n - Meta information | 38 | Wikidata ID\r\n - Wikidata label\r\n - Meta information | ||
39 | used for tampering\r\n - Web links to all reference images | 39 | used for tampering\r\n - Web links to all reference images | ||
40 | crawled from Google, Bing, and Wikidata\r\n - splits for testing | 40 | crawled from Google, Bing, and Wikidata\r\n - splits for testing | ||
41 | and validation\r\n- **news400_features.tar.gz**:\r\n - Visual | 41 | and validation\r\n- **news400_features.tar.gz**:\r\n - Visual | ||
42 | features of the news images for persons, locations, and scenes\r\n | 42 | features of the news images for persons, locations, and scenes\r\n | ||
43 | - Visual features of the reference images for persons, locations, and | 43 | - Visual features of the reference images for persons, locations, and | ||
44 | scenes\r\n- **news400_wordembeddings.tar.gz**: Word embeddings of all | 44 | scenes\r\n- **news400_wordembeddings.tar.gz**: Word embeddings of all | ||
45 | nouns in the news texts\r\n\r\n## Source Code\r\n\r\nThe source code | 45 | nouns in the news texts\r\n\r\n## Source Code\r\n\r\nThe source code | ||
46 | to reproduce our results as well as download scripts to crawl news | 46 | to reproduce our results as well as download scripts to crawl news | ||
47 | texts and images can be found on our GitHub page: | 47 | texts and images can be found on our GitHub page: | ||
48 | https://github.com/TIBHannover/cross-modal_entity_consistency\r\n", | 48 | https://github.com/TIBHannover/cross-modal_entity_consistency\r\n", | ||
n | 49 | "num_resources": 4, | n | 49 | "num_resources": 3, |
50 | "num_tags": 5, | 50 | "num_tags": 5, | ||
51 | "organization": { | 51 | "organization": { | ||
52 | "approval_status": "approved", | 52 | "approval_status": "approved", | ||
53 | "created": "2017-11-23T17:30:37.757128", | 53 | "created": "2017-11-23T17:30:37.757128", | ||
54 | "description": "The German National Library of Science and | 54 | "description": "The German National Library of Science and | ||
55 | Technology, abbreviated TIB, is the national library of the Federal | 55 | Technology, abbreviated TIB, is the national library of the Federal | ||
56 | Republic of Germany for all fields of engineering, technology, and the | 56 | Republic of Germany for all fields of engineering, technology, and the | ||
57 | natural sciences.", | 57 | natural sciences.", | ||
58 | "id": "0c5362f5-b99e-41db-8256-3d0d7549bf4d", | 58 | "id": "0c5362f5-b99e-41db-8256-3d0d7549bf4d", | ||
59 | "image_url": | 59 | "image_url": | ||
60 | 3conf/ext/tib_tmpl_bootstrap/Resources/Public/images/TIB_Logo_en.png", | 60 | 3conf/ext/tib_tmpl_bootstrap/Resources/Public/images/TIB_Logo_en.png", | ||
61 | "is_organization": true, | 61 | "is_organization": true, | ||
62 | "name": "tib", | 62 | "name": "tib", | ||
63 | "state": "active", | 63 | "state": "active", | ||
64 | "title": "TIB", | 64 | "title": "TIB", | ||
65 | "type": "organization" | 65 | "type": "organization" | ||
66 | }, | 66 | }, | ||
67 | "owner_org": "0c5362f5-b99e-41db-8256-3d0d7549bf4d", | 67 | "owner_org": "0c5362f5-b99e-41db-8256-3d0d7549bf4d", | ||
68 | "private": false, | 68 | "private": false, | ||
69 | "relationships_as_object": [], | 69 | "relationships_as_object": [], | ||
70 | "relationships_as_subject": [], | 70 | "relationships_as_subject": [], | ||
71 | "repository_name": "Leibniz University Hannover", | 71 | "repository_name": "Leibniz University Hannover", | ||
72 | "resources": [ | 72 | "resources": [ | ||
73 | { | 73 | { | ||
74 | "cache_last_updated": null, | 74 | "cache_last_updated": null, | ||
75 | "cache_url": null, | 75 | "cache_url": null, | ||
76 | "created": "2020-06-04T13:54:05.550568", | 76 | "created": "2020-06-04T13:54:05.550568", | ||
n | 77 | "datastore_active": false, | n | ||
78 | "description": "", | 77 | "description": "", | ||
79 | "format": "TAR", | 78 | "format": "TAR", | ||
80 | "hash": "", | 79 | "hash": "", | ||
81 | "id": "af85e355-88e1-4d4d-91ec-785e1801dd47", | 80 | "id": "af85e355-88e1-4d4d-91ec-785e1801dd47", | ||
82 | "last_modified": "2020-06-04T13:54:05.503139", | 81 | "last_modified": "2020-06-04T13:54:05.503139", | ||
n | 83 | "metadata_modified": "2021-10-14T10:16:03.085395", | n | 82 | "metadata_modified": "2023-01-12T13:14:21.451741", |
84 | "mimetype": "application/x-tar", | 83 | "mimetype": "application/x-tar", | ||
85 | "mimetype_inner": null, | 84 | "mimetype_inner": null, | ||
86 | "name": "news400.tar.gz", | 85 | "name": "news400.tar.gz", | ||
87 | "package_id": "c729ffd9-8be1-49a9-8c43-dab2f8a87753", | 86 | "package_id": "c729ffd9-8be1-49a9-8c43-dab2f8a87753", | ||
88 | "position": 0, | 87 | "position": 0, | ||
89 | "resource_type": null, | 88 | "resource_type": null, | ||
n | 90 | "revision_id": "c67ae631-53d5-4c15-a24d-ee0d99e8c4bf", | n | ||
91 | "size": 7478569, | 89 | "size": 7478569, | ||
92 | "state": "active", | 90 | "state": "active", | ||
93 | "url": | 91 | "url": | ||
94 | esource/af85e355-88e1-4d4d-91ec-785e1801dd47/download/news400.tar.gz", | 92 | esource/af85e355-88e1-4d4d-91ec-785e1801dd47/download/news400.tar.gz", | ||
95 | "url_type": "" | 93 | "url_type": "" | ||
96 | }, | 94 | }, | ||
97 | { | 95 | { | ||
98 | "cache_last_updated": null, | 96 | "cache_last_updated": null, | ||
99 | "cache_url": null, | 97 | "cache_url": null, | ||
100 | "created": "2020-06-04T14:00:14.775286", | 98 | "created": "2020-06-04T14:00:14.775286", | ||
n | 101 | "datastore_active": false, | n | ||
102 | "description": "", | 99 | "description": "", | ||
103 | "format": "TAR", | 100 | "format": "TAR", | ||
104 | "hash": "", | 101 | "hash": "", | ||
105 | "id": "23365241-435b-4dda-921c-09bd5f908010", | 102 | "id": "23365241-435b-4dda-921c-09bd5f908010", | ||
106 | "last_modified": "2020-06-04T14:00:14.728423", | 103 | "last_modified": "2020-06-04T14:00:14.728423", | ||
n | 107 | "metadata_modified": "2021-10-14T10:16:03.086664", | n | 104 | "metadata_modified": "2023-01-12T13:14:21.451934", |
108 | "mimetype": "application/x-tar", | 105 | "mimetype": "application/x-tar", | ||
109 | "mimetype_inner": null, | 106 | "mimetype_inner": null, | ||
110 | "name": "news400_features.tar.gz", | 107 | "name": "news400_features.tar.gz", | ||
111 | "package_id": "c729ffd9-8be1-49a9-8c43-dab2f8a87753", | 108 | "package_id": "c729ffd9-8be1-49a9-8c43-dab2f8a87753", | ||
112 | "position": 1, | 109 | "position": 1, | ||
113 | "resource_type": null, | 110 | "resource_type": null, | ||
n | 114 | "revision_id": "3df57a77-e3cf-43bd-9692-c36d6a06d547", | n | ||
115 | "size": 471094303, | 111 | "size": 471094303, | ||
116 | "state": "active", | 112 | "state": "active", | ||
117 | "url": | 113 | "url": | ||
118 | 3365241-435b-4dda-921c-09bd5f908010/download/news400_features.tar.gz", | 114 | 3365241-435b-4dda-921c-09bd5f908010/download/news400_features.tar.gz", | ||
119 | "url_type": "" | 115 | "url_type": "" | ||
120 | }, | 116 | }, | ||
121 | { | 117 | { | ||
122 | "cache_last_updated": null, | 118 | "cache_last_updated": null, | ||
123 | "cache_url": null, | 119 | "cache_url": null, | ||
124 | "created": "2020-06-04T14:02:51.017611", | 120 | "created": "2020-06-04T14:02:51.017611", | ||
n | 125 | "datastore_active": false, | n | ||
126 | "description": "", | 121 | "description": "", | ||
127 | "format": "TAR", | 122 | "format": "TAR", | ||
128 | "hash": "", | 123 | "hash": "", | ||
129 | "id": "5500907c-2e5c-4967-a99d-4d64b6cde975", | 124 | "id": "5500907c-2e5c-4967-a99d-4d64b6cde975", | ||
130 | "last_modified": "2020-06-04T14:02:50.971063", | 125 | "last_modified": "2020-06-04T14:02:50.971063", | ||
n | 131 | "metadata_modified": "2021-10-14T10:16:03.087921", | n | 126 | "metadata_modified": "2023-01-12T13:14:21.452095", |
132 | "mimetype": "application/x-tar", | 127 | "mimetype": "application/x-tar", | ||
133 | "mimetype_inner": null, | 128 | "mimetype_inner": null, | ||
134 | "name": "news400_wordembeddings.tar.gz", | 129 | "name": "news400_wordembeddings.tar.gz", | ||
135 | "package_id": "c729ffd9-8be1-49a9-8c43-dab2f8a87753", | 130 | "package_id": "c729ffd9-8be1-49a9-8c43-dab2f8a87753", | ||
136 | "position": 2, | 131 | "position": 2, | ||
137 | "resource_type": null, | 132 | "resource_type": null, | ||
n | 138 | "revision_id": "e1347495-f742-4f60-863c-95d8d2e66d5f", | n | ||
139 | "size": 43316706, | 133 | "size": 43316706, | ||
140 | "state": "active", | 134 | "state": "active", | ||
141 | "url": | 135 | "url": | ||
142 | c-2e5c-4967-a99d-4d64b6cde975/download/news400_wordembeddings.tar.gz", | 136 | c-2e5c-4967-a99d-4d64b6cde975/download/news400_wordembeddings.tar.gz", | ||
143 | "url_type": "" | 137 | "url_type": "" | ||
n | 144 | }, | n | ||
145 | { | ||||
146 | "cache_last_updated": null, | ||||
147 | "cache_url": null, | ||||
148 | "created": "2020-06-08T13:11:48.406760", | ||||
149 | "datastore_active": false, | ||||
150 | "description": "", | ||||
151 | "downloadall_datapackage_hash": | ||||
152 | "87aa2e26bd640cbb4e7acb3177323ced", | ||||
153 | "downloadall_metadata_modified": "2020-06-04T14:04:33.377341", | ||||
154 | "format": "ZIP", | ||||
155 | "hash": "", | ||||
156 | "id": "d77bf15b-9cdd-473a-8110-9cd3132c3d8d", | ||||
157 | "last_modified": "2020-06-08T13:11:48.251282", | ||||
158 | "metadata_modified": "2021-10-14T10:16:03.089174", | ||||
159 | "mimetype": "application/zip", | ||||
160 | "mimetype_inner": null, | ||||
161 | "name": "All resource data", | ||||
162 | "package_id": "c729ffd9-8be1-49a9-8c43-dab2f8a87753", | ||||
163 | "position": 3, | ||||
164 | "resource_type": null, | ||||
165 | "revision_id": "e1347495-f742-4f60-863c-95d8d2e66d5f", | ||||
166 | "size": 522044833, | ||||
167 | "state": "active", | ||||
168 | "url": | ||||
169 | rce/d77bf15b-9cdd-473a-8110-9cd3132c3d8d/download/news400-v4tceu.zip", | ||||
170 | "url_type": "" | ||||
171 | } | 138 | } | ||
172 | ], | 139 | ], | ||
173 | "source_metadata_created": "2020-06-04T13:53:33.217406", | 140 | "source_metadata_created": "2020-06-04T13:53:33.217406", | ||
t | 174 | "source_metadata_modified": "2020-11-11T15:25:07.596446", | t | 141 | "source_metadata_modified": "2022-01-20T10:59:42.198563", |
175 | "state": "active", | 142 | "state": "active", | ||
176 | "tags": [ | 143 | "tags": [ | ||
177 | { | 144 | { | ||
178 | "display_name": "Cross-modal consistency", | 145 | "display_name": "Cross-modal consistency", | ||
179 | "id": "adf268b1-a8c7-4ced-9662-e5b4ef1db197", | 146 | "id": "adf268b1-a8c7-4ced-9662-e5b4ef1db197", | ||
180 | "name": "Cross-modal consistency", | 147 | "name": "Cross-modal consistency", | ||
181 | "state": "active", | 148 | "state": "active", | ||
182 | "vocabulary_id": null | 149 | "vocabulary_id": null | ||
183 | }, | 150 | }, | ||
184 | { | 151 | { | ||
185 | "display_name": "cross-modal entity verification", | 152 | "display_name": "cross-modal entity verification", | ||
186 | "id": "12a9c183-d4e8-4aef-9099-8b29d390f861", | 153 | "id": "12a9c183-d4e8-4aef-9099-8b29d390f861", | ||
187 | "name": "cross-modal entity verification", | 154 | "name": "cross-modal entity verification", | ||
188 | "state": "active", | 155 | "state": "active", | ||
189 | "vocabulary_id": null | 156 | "vocabulary_id": null | ||
190 | }, | 157 | }, | ||
191 | { | 158 | { | ||
192 | "display_name": "deep learning", | 159 | "display_name": "deep learning", | ||
193 | "id": "19e41883-3799-4184-9e0e-26c95795b119", | 160 | "id": "19e41883-3799-4184-9e0e-26c95795b119", | ||
194 | "name": "deep learning", | 161 | "name": "deep learning", | ||
195 | "state": "active", | 162 | "state": "active", | ||
196 | "vocabulary_id": null | 163 | "vocabulary_id": null | ||
197 | }, | 164 | }, | ||
198 | { | 165 | { | ||
199 | "display_name": "image repurposing detection", | 166 | "display_name": "image repurposing detection", | ||
200 | "id": "04bdab29-34f3-4093-bf54-1129fed5f7da", | 167 | "id": "04bdab29-34f3-4093-bf54-1129fed5f7da", | ||
201 | "name": "image repurposing detection", | 168 | "name": "image repurposing detection", | ||
202 | "state": "active", | 169 | "state": "active", | ||
203 | "vocabulary_id": null | 170 | "vocabulary_id": null | ||
204 | }, | 171 | }, | ||
205 | { | 172 | { | ||
206 | "display_name": "multimodal retrieval", | 173 | "display_name": "multimodal retrieval", | ||
207 | "id": "9a52a3c5-10e4-4056-8a1b-98a19044c62f", | 174 | "id": "9a52a3c5-10e4-4056-8a1b-98a19044c62f", | ||
208 | "name": "multimodal retrieval", | 175 | "name": "multimodal retrieval", | ||
209 | "state": "active", | 176 | "state": "active", | ||
210 | "vocabulary_id": null | 177 | "vocabulary_id": null | ||
211 | } | 178 | } | ||
212 | ], | 179 | ], | ||
213 | "terms_of_usage": "Yes", | 180 | "terms_of_usage": "Yes", | ||
214 | "title": "News400 Dataset", | 181 | "title": "News400 Dataset", | ||
215 | "type": "vdataset", | 182 | "type": "vdataset", | ||
216 | "url": "https://data.uni-hannover.de/dataset/news400", | 183 | "url": "https://data.uni-hannover.de/dataset/news400", | ||
217 | "version": "" | 184 | "version": "" | ||
218 | } | 185 | } |