Changes
On August 4, 2023 at 8:46:40 AM UTC, admin:
-
Set author of News400 Dataset to Eric Müller-Budack (previously Eric Müller-Budack, Jonas Theiner, Sebastian Diering, Maximilian Idahl, Ralph Ewerth)
f | 1 | { | f | 1 | { |
n | 2 | "author": "Eric M\u00fcller-Budack, Jonas Theiner, Sebastian | n | 2 | "author": "Eric M\u00fcller-Budack", |
3 | Diering, Maximilian Idahl, Ralph Ewerth", | ||||
4 | "author_email": "eric.mueller@tib.eu", | 3 | "author_email": "eric.mueller@tib.eu", | ||
5 | "creator_user_id": "17755db4-395a-4b3b-ac09-e8e3484ca700", | 4 | "creator_user_id": "17755db4-395a-4b3b-ac09-e8e3484ca700", | ||
6 | "doi": "10.25835/0084897", | 5 | "doi": "10.25835/0084897", | ||
7 | "doi_date_published": "2020-06-04", | 6 | "doi_date_published": "2020-06-04", | ||
8 | "doi_publisher": "LUIS", | 7 | "doi_publisher": "LUIS", | ||
9 | "doi_status": "true", | 8 | "doi_status": "true", | ||
10 | "domain": "https://data.uni-hannover.de", | 9 | "domain": "https://data.uni-hannover.de", | ||
n | n | 10 | "extra_authors": [ | ||
11 | { | ||||
12 | "extra_author": " Jonas Theiner" | ||||
13 | }, | ||||
14 | { | ||||
15 | "extra_author": " Sebastian Diering" | ||||
16 | }, | ||||
17 | { | ||||
18 | "extra_author": " Maximilian Idahl" | ||||
19 | }, | ||||
20 | { | ||||
21 | "extra_author": " Ralph Ewerth" | ||||
22 | } | ||||
23 | ], | ||||
11 | "groups": [], | 24 | "groups": [], | ||
12 | "have_copyright": "Yes", | 25 | "have_copyright": "Yes", | ||
13 | "id": "c729ffd9-8be1-49a9-8c43-dab2f8a87753", | 26 | "id": "c729ffd9-8be1-49a9-8c43-dab2f8a87753", | ||
14 | "isopen": false, | 27 | "isopen": false, | ||
15 | "license_id": "CC-BY-NC-3.0", | 28 | "license_id": "CC-BY-NC-3.0", | ||
16 | "license_title": "CC-BY-NC-3.0", | 29 | "license_title": "CC-BY-NC-3.0", | ||
17 | "maintainer": "Eric M\u00fcller-Budack", | 30 | "maintainer": "Eric M\u00fcller-Budack", | ||
18 | "maintainer_email": "eric.mueller@tib.eu", | 31 | "maintainer_email": "eric.mueller@tib.eu", | ||
19 | "metadata_created": "2021-10-14T10:16:03.098485", | 32 | "metadata_created": "2021-10-14T10:16:03.098485", | ||
n | 20 | "metadata_modified": "2023-01-12T13:14:21.448200", | n | 33 | "metadata_modified": "2023-08-04T08:46:40.898926", |
21 | "name": "luh-news400", | 34 | "name": "luh-news400", | ||
22 | "notes": "# Multimodal Analytics for Real-world News using Measures | 35 | "notes": "# Multimodal Analytics for Real-world News using Measures | ||
23 | of Cross-modal Entity Consistency\r\n\r\nThis repository contains the | 36 | of Cross-modal Entity Consistency\r\n\r\nThis repository contains the | ||
24 | *News400* dataset introduced in the paper:\r\n\r\n> Eric | 37 | *News400* dataset introduced in the paper:\r\n\r\n> Eric | ||
25 | M\u00fcller-Budack, Jonas Theiner, Sebastian Diering, Maximilian | 38 | M\u00fcller-Budack, Jonas Theiner, Sebastian Diering, Maximilian | ||
26 | Idahl, and Ralph Ewerth. 2020. \r\nMultimodal Analytics for Real-world | 39 | Idahl, and Ralph Ewerth. 2020. \r\nMultimodal Analytics for Real-world | ||
27 | News using Measures of Cross-modal Entity Consistency. \r\nIn | 40 | News using Measures of Cross-modal Entity Consistency. \r\nIn | ||
28 | Proceedings of the 2020 International Conference on Multimedia | 41 | Proceedings of the 2020 International Conference on Multimedia | ||
29 | Retrieval (ICMR '20). Association for Computing Machinery, New York, | 42 | Retrieval (ICMR '20). Association for Computing Machinery, New York, | ||
30 | NY, USA, 16\u201325. DOI: | 43 | NY, USA, 16\u201325. DOI: | ||
31 | https://doi.org/10.1145/3372278.3390670\r\n\r\n## Content\r\n\r\n- | 44 | https://doi.org/10.1145/3372278.3390670\r\n\r\n## Content\r\n\r\n- | ||
32 | **news400.tar.gz**:\r\n - ```dataset.jsonl``` containing:\r\n | 45 | **news400.tar.gz**:\r\n - ```dataset.jsonl``` containing:\r\n | ||
33 | - Web links to the news texts\r\n - Web links to the news | 46 | - Web links to the news texts\r\n - Web links to the news | ||
34 | image\r\n - Outputs of the named entity recognition and | 47 | image\r\n - Outputs of the named entity recognition and | ||
35 | disambiguation (NERD) approach\r\n - Untampered and tampered | 48 | disambiguation (NERD) approach\r\n - Untampered and tampered | ||
36 | entities\r\n - ```<entity>.jsonl``` file for each entity type | 49 | entities\r\n - ```<entity>.jsonl``` file for each entity type | ||
37 | containing the following information for each entity:\r\n - | 50 | containing the following information for each entity:\r\n - | ||
38 | Wikidata ID\r\n - Wikidata label\r\n - Meta information | 51 | Wikidata ID\r\n - Wikidata label\r\n - Meta information | ||
39 | used for tampering\r\n - Web links to all reference images | 52 | used for tampering\r\n - Web links to all reference images | ||
40 | crawled from Google, Bing, and Wikidata\r\n - splits for testing | 53 | crawled from Google, Bing, and Wikidata\r\n - splits for testing | ||
41 | and validation\r\n- **news400_features.tar.gz**:\r\n - Visual | 54 | and validation\r\n- **news400_features.tar.gz**:\r\n - Visual | ||
42 | features of the news images for persons, locations, and scenes\r\n | 55 | features of the news images for persons, locations, and scenes\r\n | ||
43 | - Visual features of the reference images for persons, locations, and | 56 | - Visual features of the reference images for persons, locations, and | ||
44 | scenes\r\n- **news400_wordembeddings.tar.gz**: Word embeddings of all | 57 | scenes\r\n- **news400_wordembeddings.tar.gz**: Word embeddings of all | ||
45 | nouns in the news texts\r\n\r\n## Source Code\r\n\r\nThe source code | 58 | nouns in the news texts\r\n\r\n## Source Code\r\n\r\nThe source code | ||
46 | to reproduce our results as well as download scripts to crawl news | 59 | to reproduce our results as well as download scripts to crawl news | ||
47 | texts and images can be found on our GitHub page: | 60 | texts and images can be found on our GitHub page: | ||
48 | https://github.com/TIBHannover/cross-modal_entity_consistency\r\n", | 61 | https://github.com/TIBHannover/cross-modal_entity_consistency\r\n", | ||
49 | "num_resources": 3, | 62 | "num_resources": 3, | ||
50 | "num_tags": 5, | 63 | "num_tags": 5, | ||
51 | "organization": { | 64 | "organization": { | ||
52 | "approval_status": "approved", | 65 | "approval_status": "approved", | ||
53 | "created": "2017-11-23T17:30:37.757128", | 66 | "created": "2017-11-23T17:30:37.757128", | ||
54 | "description": "The German National Library of Science and | 67 | "description": "The German National Library of Science and | ||
55 | Technology, abbreviated TIB, is the national library of the Federal | 68 | Technology, abbreviated TIB, is the national library of the Federal | ||
56 | Republic of Germany for all fields of engineering, technology, and the | 69 | Republic of Germany for all fields of engineering, technology, and the | ||
57 | natural sciences.", | 70 | natural sciences.", | ||
58 | "id": "0c5362f5-b99e-41db-8256-3d0d7549bf4d", | 71 | "id": "0c5362f5-b99e-41db-8256-3d0d7549bf4d", | ||
59 | "image_url": | 72 | "image_url": | ||
60 | 3conf/ext/tib_tmpl_bootstrap/Resources/Public/images/TIB_Logo_en.png", | 73 | 3conf/ext/tib_tmpl_bootstrap/Resources/Public/images/TIB_Logo_en.png", | ||
61 | "is_organization": true, | 74 | "is_organization": true, | ||
62 | "name": "tib", | 75 | "name": "tib", | ||
63 | "state": "active", | 76 | "state": "active", | ||
64 | "title": "TIB", | 77 | "title": "TIB", | ||
65 | "type": "organization" | 78 | "type": "organization" | ||
66 | }, | 79 | }, | ||
67 | "owner_org": "0c5362f5-b99e-41db-8256-3d0d7549bf4d", | 80 | "owner_org": "0c5362f5-b99e-41db-8256-3d0d7549bf4d", | ||
68 | "private": false, | 81 | "private": false, | ||
69 | "relationships_as_object": [], | 82 | "relationships_as_object": [], | ||
70 | "relationships_as_subject": [], | 83 | "relationships_as_subject": [], | ||
71 | "repository_name": "Leibniz University Hannover", | 84 | "repository_name": "Leibniz University Hannover", | ||
72 | "resources": [ | 85 | "resources": [ | ||
73 | { | 86 | { | ||
74 | "cache_last_updated": null, | 87 | "cache_last_updated": null, | ||
75 | "cache_url": null, | 88 | "cache_url": null, | ||
76 | "created": "2020-06-04T13:54:05.550568", | 89 | "created": "2020-06-04T13:54:05.550568", | ||
77 | "description": "", | 90 | "description": "", | ||
78 | "format": "TAR", | 91 | "format": "TAR", | ||
79 | "hash": "", | 92 | "hash": "", | ||
80 | "id": "af85e355-88e1-4d4d-91ec-785e1801dd47", | 93 | "id": "af85e355-88e1-4d4d-91ec-785e1801dd47", | ||
81 | "last_modified": "2020-06-04T13:54:05.503139", | 94 | "last_modified": "2020-06-04T13:54:05.503139", | ||
n | 82 | "metadata_modified": "2023-01-12T13:14:21.451741", | n | 95 | "metadata_modified": "2023-08-04T08:46:40.902402", |
83 | "mimetype": "application/x-tar", | 96 | "mimetype": "application/x-tar", | ||
84 | "mimetype_inner": null, | 97 | "mimetype_inner": null, | ||
85 | "name": "news400.tar.gz", | 98 | "name": "news400.tar.gz", | ||
86 | "package_id": "c729ffd9-8be1-49a9-8c43-dab2f8a87753", | 99 | "package_id": "c729ffd9-8be1-49a9-8c43-dab2f8a87753", | ||
87 | "position": 0, | 100 | "position": 0, | ||
88 | "resource_type": null, | 101 | "resource_type": null, | ||
89 | "size": 7478569, | 102 | "size": 7478569, | ||
90 | "state": "active", | 103 | "state": "active", | ||
91 | "url": | 104 | "url": | ||
92 | esource/af85e355-88e1-4d4d-91ec-785e1801dd47/download/news400.tar.gz", | 105 | esource/af85e355-88e1-4d4d-91ec-785e1801dd47/download/news400.tar.gz", | ||
93 | "url_type": "" | 106 | "url_type": "" | ||
94 | }, | 107 | }, | ||
95 | { | 108 | { | ||
96 | "cache_last_updated": null, | 109 | "cache_last_updated": null, | ||
97 | "cache_url": null, | 110 | "cache_url": null, | ||
98 | "created": "2020-06-04T14:00:14.775286", | 111 | "created": "2020-06-04T14:00:14.775286", | ||
99 | "description": "", | 112 | "description": "", | ||
100 | "format": "TAR", | 113 | "format": "TAR", | ||
101 | "hash": "", | 114 | "hash": "", | ||
102 | "id": "23365241-435b-4dda-921c-09bd5f908010", | 115 | "id": "23365241-435b-4dda-921c-09bd5f908010", | ||
103 | "last_modified": "2020-06-04T14:00:14.728423", | 116 | "last_modified": "2020-06-04T14:00:14.728423", | ||
n | 104 | "metadata_modified": "2023-01-12T13:14:21.451934", | n | 117 | "metadata_modified": "2023-08-04T08:46:40.902535", |
105 | "mimetype": "application/x-tar", | 118 | "mimetype": "application/x-tar", | ||
106 | "mimetype_inner": null, | 119 | "mimetype_inner": null, | ||
107 | "name": "news400_features.tar.gz", | 120 | "name": "news400_features.tar.gz", | ||
108 | "package_id": "c729ffd9-8be1-49a9-8c43-dab2f8a87753", | 121 | "package_id": "c729ffd9-8be1-49a9-8c43-dab2f8a87753", | ||
109 | "position": 1, | 122 | "position": 1, | ||
110 | "resource_type": null, | 123 | "resource_type": null, | ||
111 | "size": 471094303, | 124 | "size": 471094303, | ||
112 | "state": "active", | 125 | "state": "active", | ||
113 | "url": | 126 | "url": | ||
114 | 3365241-435b-4dda-921c-09bd5f908010/download/news400_features.tar.gz", | 127 | 3365241-435b-4dda-921c-09bd5f908010/download/news400_features.tar.gz", | ||
115 | "url_type": "" | 128 | "url_type": "" | ||
116 | }, | 129 | }, | ||
117 | { | 130 | { | ||
118 | "cache_last_updated": null, | 131 | "cache_last_updated": null, | ||
119 | "cache_url": null, | 132 | "cache_url": null, | ||
120 | "created": "2020-06-04T14:02:51.017611", | 133 | "created": "2020-06-04T14:02:51.017611", | ||
121 | "description": "", | 134 | "description": "", | ||
122 | "format": "TAR", | 135 | "format": "TAR", | ||
123 | "hash": "", | 136 | "hash": "", | ||
124 | "id": "5500907c-2e5c-4967-a99d-4d64b6cde975", | 137 | "id": "5500907c-2e5c-4967-a99d-4d64b6cde975", | ||
125 | "last_modified": "2020-06-04T14:02:50.971063", | 138 | "last_modified": "2020-06-04T14:02:50.971063", | ||
n | 126 | "metadata_modified": "2023-01-12T13:14:21.452095", | n | 139 | "metadata_modified": "2023-08-04T08:46:40.902643", |
127 | "mimetype": "application/x-tar", | 140 | "mimetype": "application/x-tar", | ||
128 | "mimetype_inner": null, | 141 | "mimetype_inner": null, | ||
129 | "name": "news400_wordembeddings.tar.gz", | 142 | "name": "news400_wordembeddings.tar.gz", | ||
130 | "package_id": "c729ffd9-8be1-49a9-8c43-dab2f8a87753", | 143 | "package_id": "c729ffd9-8be1-49a9-8c43-dab2f8a87753", | ||
131 | "position": 2, | 144 | "position": 2, | ||
132 | "resource_type": null, | 145 | "resource_type": null, | ||
133 | "size": 43316706, | 146 | "size": 43316706, | ||
134 | "state": "active", | 147 | "state": "active", | ||
135 | "url": | 148 | "url": | ||
136 | c-2e5c-4967-a99d-4d64b6cde975/download/news400_wordembeddings.tar.gz", | 149 | c-2e5c-4967-a99d-4d64b6cde975/download/news400_wordembeddings.tar.gz", | ||
137 | "url_type": "" | 150 | "url_type": "" | ||
138 | } | 151 | } | ||
139 | ], | 152 | ], | ||
t | t | 153 | "services_used_list": "", | ||
140 | "source_metadata_created": "2020-06-04T13:53:33.217406", | 154 | "source_metadata_created": "2020-06-04T13:53:33.217406", | ||
141 | "source_metadata_modified": "2022-01-20T10:59:42.198563", | 155 | "source_metadata_modified": "2022-01-20T10:59:42.198563", | ||
142 | "state": "active", | 156 | "state": "active", | ||
143 | "tags": [ | 157 | "tags": [ | ||
144 | { | 158 | { | ||
145 | "display_name": "Cross-modal consistency", | 159 | "display_name": "Cross-modal consistency", | ||
146 | "id": "adf268b1-a8c7-4ced-9662-e5b4ef1db197", | 160 | "id": "adf268b1-a8c7-4ced-9662-e5b4ef1db197", | ||
147 | "name": "Cross-modal consistency", | 161 | "name": "Cross-modal consistency", | ||
148 | "state": "active", | 162 | "state": "active", | ||
149 | "vocabulary_id": null | 163 | "vocabulary_id": null | ||
150 | }, | 164 | }, | ||
151 | { | 165 | { | ||
152 | "display_name": "cross-modal entity verification", | 166 | "display_name": "cross-modal entity verification", | ||
153 | "id": "12a9c183-d4e8-4aef-9099-8b29d390f861", | 167 | "id": "12a9c183-d4e8-4aef-9099-8b29d390f861", | ||
154 | "name": "cross-modal entity verification", | 168 | "name": "cross-modal entity verification", | ||
155 | "state": "active", | 169 | "state": "active", | ||
156 | "vocabulary_id": null | 170 | "vocabulary_id": null | ||
157 | }, | 171 | }, | ||
158 | { | 172 | { | ||
159 | "display_name": "deep learning", | 173 | "display_name": "deep learning", | ||
160 | "id": "19e41883-3799-4184-9e0e-26c95795b119", | 174 | "id": "19e41883-3799-4184-9e0e-26c95795b119", | ||
161 | "name": "deep learning", | 175 | "name": "deep learning", | ||
162 | "state": "active", | 176 | "state": "active", | ||
163 | "vocabulary_id": null | 177 | "vocabulary_id": null | ||
164 | }, | 178 | }, | ||
165 | { | 179 | { | ||
166 | "display_name": "image repurposing detection", | 180 | "display_name": "image repurposing detection", | ||
167 | "id": "04bdab29-34f3-4093-bf54-1129fed5f7da", | 181 | "id": "04bdab29-34f3-4093-bf54-1129fed5f7da", | ||
168 | "name": "image repurposing detection", | 182 | "name": "image repurposing detection", | ||
169 | "state": "active", | 183 | "state": "active", | ||
170 | "vocabulary_id": null | 184 | "vocabulary_id": null | ||
171 | }, | 185 | }, | ||
172 | { | 186 | { | ||
173 | "display_name": "multimodal retrieval", | 187 | "display_name": "multimodal retrieval", | ||
174 | "id": "9a52a3c5-10e4-4056-8a1b-98a19044c62f", | 188 | "id": "9a52a3c5-10e4-4056-8a1b-98a19044c62f", | ||
175 | "name": "multimodal retrieval", | 189 | "name": "multimodal retrieval", | ||
176 | "state": "active", | 190 | "state": "active", | ||
177 | "vocabulary_id": null | 191 | "vocabulary_id": null | ||
178 | } | 192 | } | ||
179 | ], | 193 | ], | ||
180 | "terms_of_usage": "Yes", | 194 | "terms_of_usage": "Yes", | ||
181 | "title": "News400 Dataset", | 195 | "title": "News400 Dataset", | ||
182 | "type": "vdataset", | 196 | "type": "vdataset", | ||
183 | "url": "https://data.uni-hannover.de/dataset/news400", | 197 | "url": "https://data.uni-hannover.de/dataset/news400", | ||
184 | "version": "" | 198 | "version": "" | ||
185 | } | 199 | } |