Changes
On July 23, 2021 at 7:32:36 AM UTC, admin:
-
Added resource Test Dataset to SemEval-2021 Task 11 Shared Task Dataset
f | 1 | { | f | 1 | { |
2 | "author": "Jennifer D'Souza and S\u00f6ren Auer and Ted Pedersen", | 2 | "author": "Jennifer D'Souza and S\u00f6ren Auer and Ted Pedersen", | ||
3 | "author_email": "", | 3 | "author_email": "", | ||
4 | "creator_user_id": "17755db4-395a-4b3b-ac09-e8e3484ca700", | 4 | "creator_user_id": "17755db4-395a-4b3b-ac09-e8e3484ca700", | ||
5 | "extras": [], | 5 | "extras": [], | ||
6 | "groups": [], | 6 | "groups": [], | ||
7 | "id": "2b92e3fd-1ab8-45bb-995c-6102b5bd2f5f", | 7 | "id": "2b92e3fd-1ab8-45bb-995c-6102b5bd2f5f", | ||
8 | "isopen": true, | 8 | "isopen": true, | ||
9 | "license_id": "cc-by-sa", | 9 | "license_id": "cc-by-sa", | ||
10 | "license_title": "Creative Commons Attribution Share-Alike", | 10 | "license_title": "Creative Commons Attribution Share-Alike", | ||
11 | "license_url": "http://www.opendefinition.org/licenses/cc-by-sa", | 11 | "license_url": "http://www.opendefinition.org/licenses/cc-by-sa", | ||
12 | "maintainer": "Jennifer D'Souza", | 12 | "maintainer": "Jennifer D'Souza", | ||
13 | "maintainer_email": "jennifer.dsouza@tib.eu", | 13 | "maintainer_email": "jennifer.dsouza@tib.eu", | ||
14 | "metadata_created": "2021-07-23T07:28:39.166946", | 14 | "metadata_created": "2021-07-23T07:28:39.166946", | ||
n | 15 | "metadata_modified": "2021-07-23T07:31:23.057582", | n | 15 | "metadata_modified": "2021-07-23T07:32:36.605135", |
16 | "name": "semeval-2021-task-11-shared-task-dataset", | 16 | "name": "semeval-2021-task-11-shared-task-dataset", | ||
17 | "notes": "NLPContributionGraph - Structuring Scholarly NLP | 17 | "notes": "NLPContributionGraph - Structuring Scholarly NLP | ||
18 | Contributions in the Open Research Knowledge | 18 | Contributions in the Open Research Knowledge | ||
19 | Graph\r\n\r\nBackground\r\n\r\nNLPContributionGraph was introduced as | 19 | Graph\r\n\r\nBackground\r\n\r\nNLPContributionGraph was introduced as | ||
20 | Task 11 at SemEval 2021 for the first time. The task is defined on a | 20 | Task 11 at SemEval 2021 for the first time. The task is defined on a | ||
21 | dataset of Natural Language Processing (NLP) scholarly articles with | 21 | dataset of Natural Language Processing (NLP) scholarly articles with | ||
22 | their contributions structured to be integrable within Knowledge Graph | 22 | their contributions structured to be integrable within Knowledge Graph | ||
23 | infrastructures such as the Open Research Knowledge Graph. The | 23 | infrastructures such as the Open Research Knowledge Graph. The | ||
24 | structured contribution annotations are provided as (1) Contribution | 24 | structured contribution annotations are provided as (1) Contribution | ||
25 | sentences : a set of sentences about the contribution in the article; | 25 | sentences : a set of sentences about the contribution in the article; | ||
26 | (2) Scientific terms and relations: a set of scientific terms and | 26 | (2) Scientific terms and relations: a set of scientific terms and | ||
27 | relational cue phrases extracted from the contribution sentences; and | 27 | relational cue phrases extracted from the contribution sentences; and | ||
28 | (3) Triples: semantic statements that pair scientific terms with a | 28 | (3) Triples: semantic statements that pair scientific terms with a | ||
29 | relation, modeled toward subject-predicate-object RDF statements for | 29 | relation, modeled toward subject-predicate-object RDF statements for | ||
30 | KG building. The Triples are organized under three (mandatory) or more | 30 | KG building. The Triples are organized under three (mandatory) or more | ||
31 | of twelve total information units (viz., ResearchProblem, Approach, | 31 | of twelve total information units (viz., ResearchProblem, Approach, | ||
32 | Model, Code, Dataset, ExperimentalSetup, Hyperparameters, Baselines, | 32 | Model, Code, Dataset, ExperimentalSetup, Hyperparameters, Baselines, | ||
33 | Results, Tasks, Experiments, and AblationAnalysis).\r\n\r\nThe Shared | 33 | Results, Tasks, Experiments, and AblationAnalysis).\r\n\r\nThe Shared | ||
34 | Task\r\n\r\nAs a complete submission for the Shared Task, given NLP | 34 | Task\r\n\r\nAs a complete submission for the Shared Task, given NLP | ||
35 | scholarly articles in plaintext format, systems had to automatically | 35 | scholarly articles in plaintext format, systems had to automatically | ||
36 | extract the following information: contribution sentences; scientific | 36 | extract the following information: contribution sentences; scientific | ||
37 | term and predicate phrases from the sentences; and * | 37 | term and predicate phrases from the sentences; and * | ||
38 | (subject,predicate,object) triple statements toward KG building | 38 | (subject,predicate,object) triple statements toward KG building | ||
39 | organized under three or more of twelve total information units.\r\n", | 39 | organized under three or more of twelve total information units.\r\n", | ||
n | 40 | "num_resources": 2, | n | 40 | "num_resources": 3, |
41 | "num_tags": 7, | 41 | "num_tags": 7, | ||
42 | "organization": { | 42 | "organization": { | ||
43 | "approval_status": "approved", | 43 | "approval_status": "approved", | ||
44 | "created": "2017-11-23T17:30:37.757128", | 44 | "created": "2017-11-23T17:30:37.757128", | ||
45 | "description": "The German National Library of Science and | 45 | "description": "The German National Library of Science and | ||
46 | Technology, abbreviated TIB, is the national library of the Federal | 46 | Technology, abbreviated TIB, is the national library of the Federal | ||
47 | Republic of Germany for all fields of engineering, technology, and the | 47 | Republic of Germany for all fields of engineering, technology, and the | ||
48 | natural sciences.", | 48 | natural sciences.", | ||
49 | "id": "0c5362f5-b99e-41db-8256-3d0d7549bf4d", | 49 | "id": "0c5362f5-b99e-41db-8256-3d0d7549bf4d", | ||
50 | "image_url": | 50 | "image_url": | ||
51 | 3conf/ext/tib_tmpl_bootstrap/Resources/Public/images/TIB_Logo_en.png", | 51 | 3conf/ext/tib_tmpl_bootstrap/Resources/Public/images/TIB_Logo_en.png", | ||
52 | "is_organization": true, | 52 | "is_organization": true, | ||
53 | "name": "tib-iasis", | 53 | "name": "tib-iasis", | ||
54 | "state": "active", | 54 | "state": "active", | ||
55 | "title": "TIB", | 55 | "title": "TIB", | ||
56 | "type": "organization" | 56 | "type": "organization" | ||
57 | }, | 57 | }, | ||
58 | "owner_org": "0c5362f5-b99e-41db-8256-3d0d7549bf4d", | 58 | "owner_org": "0c5362f5-b99e-41db-8256-3d0d7549bf4d", | ||
59 | "private": false, | 59 | "private": false, | ||
60 | "relationships_as_object": [], | 60 | "relationships_as_object": [], | ||
61 | "relationships_as_subject": [], | 61 | "relationships_as_subject": [], | ||
62 | "resources": [ | 62 | "resources": [ | ||
63 | { | 63 | { | ||
64 | "cache_last_updated": null, | 64 | "cache_last_updated": null, | ||
65 | "cache_url": null, | 65 | "cache_url": null, | ||
66 | "created": "2021-07-23T07:30:09.849592", | 66 | "created": "2021-07-23T07:30:09.849592", | ||
67 | "datastore_active": false, | 67 | "datastore_active": false, | ||
68 | "description": "\r\nTraining Data for the NLPContributionGraph | 68 | "description": "\r\nTraining Data for the NLPContributionGraph | ||
69 | Shared Task 11 at SemEval-2021\r\n\r\nThe repository is organized as | 69 | Shared Task 11 at SemEval-2021\r\n\r\nThe repository is organized as | ||
70 | follows:\r\n\r\nREADME.md | 70 | follows:\r\n\r\nREADME.md | ||
71 | \r\n[task-name-folder]/ # | 71 | \r\n[task-name-folder]/ # | ||
72 | natural_language_inference, paraphrase_generation, question_answering, | 72 | natural_language_inference, paraphrase_generation, question_answering, | ||
73 | relation_extraction, topic_models\r\n \u251c\u2500\u2500 | 73 | relation_extraction, topic_models\r\n \u251c\u2500\u2500 | ||
74 | [article-counter-folder]/ # ranges between 0 to 100 | 74 | [article-counter-folder]/ # ranges between 0 to 100 | ||
75 | since we annotated varying numbers of articles per task\r\n \u2502 | 75 | since we annotated varying numbers of articles per task\r\n \u2502 | ||
76 | \u251c\u2500\u2500 [articlename].pdf # scholarly | 76 | \u251c\u2500\u2500 [articlename].pdf # scholarly | ||
77 | article pdf\r\n \u2502 \u251c\u2500\u2500 | 77 | article pdf\r\n \u2502 \u251c\u2500\u2500 | ||
78 | [articlename]-Grobid-out.txt # plaintext output from the | 78 | [articlename]-Grobid-out.txt # plaintext output from the | ||
79 | [Grobid parser](https://github.com/kermitt2/grobid)\r\n \u2502 | 79 | [Grobid parser](https://github.com/kermitt2/grobid)\r\n \u2502 | ||
80 | \u251c\u2500\u2500 [articlename]-Stanza-out.txt # plaintext | 80 | \u251c\u2500\u2500 [articlename]-Stanza-out.txt # plaintext | ||
81 | preprocessed output from | 81 | preprocessed output from | ||
82 | [Stanza](https://github.com/stanfordnlp/stanza)\r\n \u2502 | 82 | [Stanza](https://github.com/stanfordnlp/stanza)\r\n \u2502 | ||
83 | \u251c\u2500\u2500 sentences.txt # annotated | 83 | \u251c\u2500\u2500 sentences.txt # annotated | ||
84 | Contribution sentences in the file\r\n \u2502 \u251c\u2500\u2500 | 84 | Contribution sentences in the file\r\n \u2502 \u251c\u2500\u2500 | ||
85 | entities.txt # annotated entities in the | 85 | entities.txt # annotated entities in the | ||
86 | Contribution sentences\r\n \u2502 \u2514\u2500\u2500 info-units/ | 86 | Contribution sentences\r\n \u2502 \u2514\u2500\u2500 info-units/ | ||
87 | # the folder containing information units in JSON format\r\n \u2502 | 87 | # the folder containing information units in JSON format\r\n \u2502 | ||
88 | \u2502 \u2514\u2500\u2500 research-problem.json # | 88 | \u2502 \u2514\u2500\u2500 research-problem.json # | ||
89 | `research problem` mandatory information unit in json format\r\n | 89 | `research problem` mandatory information unit in json format\r\n | ||
90 | \u2502 \u2502 \u2514\u2500\u2500 model.json | 90 | \u2502 \u2502 \u2514\u2500\u2500 model.json | ||
91 | # `model` information unit in json format; in some articles it is | 91 | # `model` information unit in json format; in some articles it is | ||
92 | called `approach`\r\n \u2502 \u2502 \u2514\u2500\u2500 ... | 92 | called `approach`\r\n \u2502 \u2502 \u2514\u2500\u2500 ... | ||
93 | # there are 12 information units in all and each article may be | 93 | # there are 12 information units in all and each article may be | ||
94 | annotated by 3 or 6\r\n \u2502 \u2514\u2500\u2500 triples/ | 94 | annotated by 3 or 6\r\n \u2502 \u2514\u2500\u2500 triples/ | ||
95 | # the folder containing information unit triples one per line\r\n | 95 | # the folder containing information unit triples one per line\r\n | ||
96 | \u2502 \u2502 \u2514\u2500\u2500 research-problem.txt | 96 | \u2502 \u2502 \u2514\u2500\u2500 research-problem.txt | ||
97 | # `research problem` triples (one research problem statement per | 97 | # `research problem` triples (one research problem statement per | ||
98 | line)\r\n \u2502 \u2502 \u2514\u2500\u2500 model.txt | 98 | line)\r\n \u2502 \u2502 \u2514\u2500\u2500 model.txt | ||
99 | # `model` triples (one statement per line)\r\n \u2502 \u2502 | 99 | # `model` triples (one statement per line)\r\n \u2502 \u2502 | ||
100 | \u2514\u2500\u2500 ... # there are 12 | 100 | \u2514\u2500\u2500 ... # there are 12 | ||
101 | information units in all and each article may be annotated by 3 or | 101 | information units in all and each article may be annotated by 3 or | ||
102 | 6\r\n \u2502 \u2514\u2500\u2500 ... | 102 | 6\r\n \u2502 \u2514\u2500\u2500 ... | ||
103 | # there are between 1 to 100 articles annotated for each task, so this | 103 | # there are between 1 to 100 articles annotated for each task, so this | ||
104 | repeats for the remaining annotated articles\r\n \u2514\u2500\u2500 | 104 | repeats for the remaining annotated articles\r\n \u2514\u2500\u2500 | ||
105 | ... # there are 24 tasks | 105 | ... # there are 24 tasks | ||
106 | selected overall, so this repeats 23 more times\r\n\r\n", | 106 | selected overall, so this repeats 23 more times\r\n\r\n", | ||
107 | "format": "json, pdf, txt", | 107 | "format": "json, pdf, txt", | ||
108 | "hash": "", | 108 | "hash": "", | ||
109 | "id": "c4c7da41-4bc7-4512-9cba-611c570cf97f", | 109 | "id": "c4c7da41-4bc7-4512-9cba-611c570cf97f", | ||
110 | "last_modified": null, | 110 | "last_modified": null, | ||
111 | "metadata_modified": "2021-07-23T07:31:23.060633", | 111 | "metadata_modified": "2021-07-23T07:31:23.060633", | ||
112 | "mimetype": null, | 112 | "mimetype": null, | ||
113 | "mimetype_inner": null, | 113 | "mimetype_inner": null, | ||
114 | "name": "Training Dataset", | 114 | "name": "Training Dataset", | ||
115 | "package_id": "2b92e3fd-1ab8-45bb-995c-6102b5bd2f5f", | 115 | "package_id": "2b92e3fd-1ab8-45bb-995c-6102b5bd2f5f", | ||
116 | "position": 0, | 116 | "position": 0, | ||
117 | "resource_type": null, | 117 | "resource_type": null, | ||
118 | "size": null, | 118 | "size": null, | ||
119 | "state": "active", | 119 | "state": "active", | ||
120 | "url": "https://github.com/ncg-task/training-data", | 120 | "url": "https://github.com/ncg-task/training-data", | ||
121 | "url_type": null | 121 | "url_type": null | ||
122 | }, | 122 | }, | ||
123 | { | 123 | { | ||
124 | "cache_last_updated": null, | 124 | "cache_last_updated": null, | ||
125 | "cache_url": null, | 125 | "cache_url": null, | ||
126 | "created": "2021-07-23T07:31:23.074109", | 126 | "created": "2021-07-23T07:31:23.074109", | ||
127 | "datastore_active": false, | 127 | "datastore_active": false, | ||
128 | "description": "Trial data for the NLPContributionGraph Shared | 128 | "description": "Trial data for the NLPContributionGraph Shared | ||
129 | Task 11 at SemEval-2021.", | 129 | Task 11 at SemEval-2021.", | ||
130 | "format": "json, pdf, txt", | 130 | "format": "json, pdf, txt", | ||
131 | "hash": "", | 131 | "hash": "", | ||
132 | "id": "012f4b52-bdb0-4ed3-8c5b-fd00b9630eae", | 132 | "id": "012f4b52-bdb0-4ed3-8c5b-fd00b9630eae", | ||
133 | "last_modified": null, | 133 | "last_modified": null, | ||
n | 134 | "metadata_modified": "2021-07-23T07:31:23.060853", | n | 134 | "metadata_modified": "2021-07-23T07:32:36.608235", |
135 | "mimetype": null, | 135 | "mimetype": null, | ||
136 | "mimetype_inner": null, | 136 | "mimetype_inner": null, | ||
137 | "name": "Trial Dataset", | 137 | "name": "Trial Dataset", | ||
138 | "package_id": "2b92e3fd-1ab8-45bb-995c-6102b5bd2f5f", | 138 | "package_id": "2b92e3fd-1ab8-45bb-995c-6102b5bd2f5f", | ||
139 | "position": 1, | 139 | "position": 1, | ||
140 | "resource_type": null, | 140 | "resource_type": null, | ||
141 | "size": null, | 141 | "size": null, | ||
142 | "state": "active", | 142 | "state": "active", | ||
143 | "url": "https://github.com/ncg-task/trial-data", | 143 | "url": "https://github.com/ncg-task/trial-data", | ||
144 | "url_type": null | 144 | "url_type": null | ||
t | t | 145 | }, | ||
146 | { | ||||
147 | "cache_last_updated": null, | ||||
148 | "cache_url": null, | ||||
149 | "created": "2021-07-23T07:32:36.621018", | ||||
150 | "datastore_active": false, | ||||
151 | "description": "Test Data for the NLPContributionGraph Shared | ||||
152 | Task 11 at SemEval-2021", | ||||
153 | "format": "json, pdf, txt", | ||||
154 | "hash": "", | ||||
155 | "id": "5fd23a53-08de-4723-a5dd-c5bbe8afea14", | ||||
156 | "last_modified": null, | ||||
157 | "metadata_modified": "2021-07-23T07:32:36.608430", | ||||
158 | "mimetype": null, | ||||
159 | "mimetype_inner": null, | ||||
160 | "name": "Test Dataset", | ||||
161 | "package_id": "2b92e3fd-1ab8-45bb-995c-6102b5bd2f5f", | ||||
162 | "position": 2, | ||||
163 | "resource_type": null, | ||||
164 | "size": null, | ||||
165 | "state": "active", | ||||
166 | "url": "https://github.com/ncg-task/test-data", | ||||
167 | "url_type": null | ||||
145 | } | 168 | } | ||
146 | ], | 169 | ], | ||
147 | "state": "draft", | 170 | "state": "draft", | ||
148 | "tags": [ | 171 | "tags": [ | ||
149 | { | 172 | { | ||
150 | "display_name": "dataset", | 173 | "display_name": "dataset", | ||
151 | "id": "ce5ad030-ca3d-47e6-abd1-5c92a2806f1b", | 174 | "id": "ce5ad030-ca3d-47e6-abd1-5c92a2806f1b", | ||
152 | "name": "dataset", | 175 | "name": "dataset", | ||
153 | "state": "active", | 176 | "state": "active", | ||
154 | "vocabulary_id": null | 177 | "vocabulary_id": null | ||
155 | }, | 178 | }, | ||
156 | { | 179 | { | ||
157 | "display_name": "language resource", | 180 | "display_name": "language resource", | ||
158 | "id": "95e3d7f3-d046-428b-98c7-93653d23a183", | 181 | "id": "95e3d7f3-d046-428b-98c7-93653d23a183", | ||
159 | "name": "language resource", | 182 | "name": "language resource", | ||
160 | "state": "active", | 183 | "state": "active", | ||
161 | "vocabulary_id": null | 184 | "vocabulary_id": null | ||
162 | }, | 185 | }, | ||
163 | { | 186 | { | ||
164 | "display_name": "natural language processing", | 187 | "display_name": "natural language processing", | ||
165 | "id": "8af9c93a-1d87-41e0-83d9-f5d01a2bbd0c", | 188 | "id": "8af9c93a-1d87-41e0-83d9-f5d01a2bbd0c", | ||
166 | "name": "natural language processing", | 189 | "name": "natural language processing", | ||
167 | "state": "active", | 190 | "state": "active", | ||
168 | "vocabulary_id": null | 191 | "vocabulary_id": null | ||
169 | }, | 192 | }, | ||
170 | { | 193 | { | ||
171 | "display_name": "open research knowledge graph", | 194 | "display_name": "open research knowledge graph", | ||
172 | "id": "c9fb26fb-f92f-4740-899e-290c1a384971", | 195 | "id": "c9fb26fb-f92f-4740-899e-290c1a384971", | ||
173 | "name": "open research knowledge graph", | 196 | "name": "open research knowledge graph", | ||
174 | "state": "active", | 197 | "state": "active", | ||
175 | "vocabulary_id": null | 198 | "vocabulary_id": null | ||
176 | }, | 199 | }, | ||
177 | { | 200 | { | ||
178 | "display_name": "scholarly knowledge graphs", | 201 | "display_name": "scholarly knowledge graphs", | ||
179 | "id": "759ea23d-8996-4917-b4b5-d020b86f7d1a", | 202 | "id": "759ea23d-8996-4917-b4b5-d020b86f7d1a", | ||
180 | "name": "scholarly knowledge graphs", | 203 | "name": "scholarly knowledge graphs", | ||
181 | "state": "active", | 204 | "state": "active", | ||
182 | "vocabulary_id": null | 205 | "vocabulary_id": null | ||
183 | }, | 206 | }, | ||
184 | { | 207 | { | ||
185 | "display_name": "semeval", | 208 | "display_name": "semeval", | ||
186 | "id": "44b579c2-0954-4335-8e31-92288b06eb7e", | 209 | "id": "44b579c2-0954-4335-8e31-92288b06eb7e", | ||
187 | "name": "semeval", | 210 | "name": "semeval", | ||
188 | "state": "active", | 211 | "state": "active", | ||
189 | "vocabulary_id": null | 212 | "vocabulary_id": null | ||
190 | }, | 213 | }, | ||
191 | { | 214 | { | ||
192 | "display_name": "shared task", | 215 | "display_name": "shared task", | ||
193 | "id": "ffd2394b-4c6d-4c18-9d24-66e7e851bd2f", | 216 | "id": "ffd2394b-4c6d-4c18-9d24-66e7e851bd2f", | ||
194 | "name": "shared task", | 217 | "name": "shared task", | ||
195 | "state": "active", | 218 | "state": "active", | ||
196 | "vocabulary_id": null | 219 | "vocabulary_id": null | ||
197 | } | 220 | } | ||
198 | ], | 221 | ], | ||
199 | "title": "SemEval-2021 Task 11 Shared Task Dataset", | 222 | "title": "SemEval-2021 Task 11 Shared Task Dataset", | ||
200 | "type": "dataset", | 223 | "type": "dataset", | ||
201 | "url": "https://github.com/ncg-task/", | 224 | "url": "https://github.com/ncg-task/", | ||
202 | "version": "1.0" | 225 | "version": "1.0" | ||
203 | } | 226 | } |