Changes
On August 4, 2023 at 8:46:24 AM UTC, admin:
-
No fields were updated. See the metadata diff for more details.
f | 1 | { | f | 1 | { |
2 | "author": "Jennifer D'Souza", | 2 | "author": "Jennifer D'Souza", | ||
3 | "author_email": "jennifer.dsouza@tib.eu", | 3 | "author_email": "jennifer.dsouza@tib.eu", | ||
4 | "creator_user_id": "17755db4-395a-4b3b-ac09-e8e3484ca700", | 4 | "creator_user_id": "17755db4-395a-4b3b-ac09-e8e3484ca700", | ||
5 | "doi": "10.25835/hodc41f5", | 5 | "doi": "10.25835/hodc41f5", | ||
6 | "doi_date_published": "2022-10-07", | 6 | "doi_date_published": "2022-10-07", | ||
7 | "doi_publisher": "LUIS", | 7 | "doi_publisher": "LUIS", | ||
8 | "doi_status": "true", | 8 | "doi_status": "true", | ||
9 | "domain": "https://data.uni-hannover.de", | 9 | "domain": "https://data.uni-hannover.de", | ||
10 | "groups": [], | 10 | "groups": [], | ||
11 | "have_copyright": "Yes", | 11 | "have_copyright": "Yes", | ||
12 | "id": "e7e8ca91-d0bb-433b-b1a0-9a92c9477775", | 12 | "id": "e7e8ca91-d0bb-433b-b1a0-9a92c9477775", | ||
13 | "isopen": false, | 13 | "isopen": false, | ||
14 | "license_id": "CC-BY-SA-3.0", | 14 | "license_id": "CC-BY-SA-3.0", | ||
15 | "license_title": "CC-BY-SA-3.0", | 15 | "license_title": "CC-BY-SA-3.0", | ||
16 | "maintainer": "Jennifer D'Souza", | 16 | "maintainer": "Jennifer D'Souza", | ||
17 | "maintainer_email": "", | 17 | "maintainer_email": "", | ||
18 | "metadata_created": "2023-01-12T13:14:10.784838", | 18 | "metadata_created": "2023-01-12T13:14:10.784838", | ||
n | 19 | "metadata_modified": "2023-01-12T13:14:10.784844", | n | 19 | "metadata_modified": "2023-08-04T08:46:24.834737", |
20 | "name": "luh-cs-ner-dataset", | 20 | "name": "luh-cs-ner-dataset", | ||
21 | "notes": "# Computer Science Named Entity Recognition in the Open | 21 | "notes": "# Computer Science Named Entity Recognition in the Open | ||
22 | Research Knowledge Graph\r\n\r\n### 1) About\r\n\r\nThis work proposes | 22 | Research Knowledge Graph\r\n\r\n### 1) About\r\n\r\nThis work proposes | ||
23 | a standardized CS-NER task by defining a set of seven | 23 | a standardized CS-NER task by defining a set of seven | ||
24 | _contribution-centric_ scholarly\r\nentities for CS NER viz., | 24 | _contribution-centric_ scholarly\r\nentities for CS NER viz., | ||
25 | _research problem_ , _solution_ , _resource_ , _language_ ,\r\n_tool_ | 25 | _research problem_ , _solution_ , _resource_ , _language_ ,\r\n_tool_ | ||
26 | , _method_ , and _dataset_ . \r\n\r\nThe main contributions | 26 | , _method_ , and _dataset_ . \r\n\r\nThe main contributions | ||
27 | are:\r\n\r\n1) Merges annotations for contribution-centric named | 27 | are:\r\n\r\n1) Merges annotations for contribution-centric named | ||
28 | entities from related work as the following datasets: \r\n \r\n- The | 28 | entities from related work as the following datasets: \r\n \r\n- The | ||
29 | dataset proposed in [Analyzing the Dynamics of Research by Extracting | 29 | dataset proposed in [Analyzing the Dynamics of Research by Extracting | ||
30 | Key Aspects of Scientific Papers](https://aclanthology.org/I11-1001/) | 30 | Key Aspects of Scientific Papers](https://aclanthology.org/I11-1001/) | ||
31 | (Gupta & Manning, IJCNLP 2011) is the source for | 31 | (Gupta & Manning, IJCNLP 2011) is the source for | ||
32 | ](https://github.com/jd-coderepos/contributions-ner-cs/tree/main/ftd), | 32 | ](https://github.com/jd-coderepos/contributions-ner-cs/tree/main/ftd), | ||
33 | annotated for both titles and abstracts for the following select | 33 | annotated for both titles and abstracts for the following select | ||
34 | entities mapped to our standardized types _focus_ -> _solution_ ; | 34 | entities mapped to our standardized types _focus_ -> _solution_ ; | ||
35 | _domain_ -> _research problem_ ; and _technique_ -> _method_ \r\n | 35 | _domain_ -> _research problem_ ; and _technique_ -> _method_ \r\n | ||
36 | \r\n- The dataset proposed in [Multi-Task Identification of Entities, | 36 | \r\n- The dataset proposed in [Multi-Task Identification of Entities, | ||
37 | Relations, and Coreference for Scientific Knowledge Graph | 37 | Relations, and Coreference for Scientific Knowledge Graph | ||
38 | Construction](https://aclanthology.org/D18-1360/) (Luan et al., EMNLP | 38 | Construction](https://aclanthology.org/D18-1360/) (Luan et al., EMNLP | ||
39 | 2018) is the source for | 39 | 2018) is the source for | ||
40 | ttps://github.com/jd-coderepos/contributions-ner-cs/tree/main/scierc), | 40 | ttps://github.com/jd-coderepos/contributions-ner-cs/tree/main/scierc), | ||
41 | annotated for abstracts for the following select entities with | 41 | annotated for abstracts for the following select entities with | ||
42 | mappings _task_ -> _research problem_\r\n\r\n - The dataset proposed | 42 | mappings _task_ -> _research problem_\r\n\r\n - The dataset proposed | ||
43 | in [SemEval-2021 Task 11: NLPContributionGraph - Structuring Scholarly | 43 | in [SemEval-2021 Task 11: NLPContributionGraph - Structuring Scholarly | ||
44 | NLP Contributions for a Research Knowledge | 44 | NLP Contributions for a Research Knowledge | ||
45 | Graph](https://aclanthology.org/2021.semeval-1.44/) (D\u2019Souza et | 45 | Graph](https://aclanthology.org/2021.semeval-1.44/) (D\u2019Souza et | ||
46 | al., SemEval 2021) is the source for | 46 | al., SemEval 2021) is the source for | ||
47 | ](https://github.com/jd-coderepos/contributions-ner-cs/tree/main/ncg), | 47 | ](https://github.com/jd-coderepos/contributions-ner-cs/tree/main/ncg), | ||
48 | annotated for both titles and abstracts for _research problem_\r\n\r\n | 48 | annotated for both titles and abstracts for _research problem_\r\n\r\n | ||
49 | - https://paperswithcode.com/ as the | 49 | - https://paperswithcode.com/ as the | ||
50 | c](https://github.com/jd-coderepos/contributions-ner-cs/tree/main/pwc) | 50 | c](https://github.com/jd-coderepos/contributions-ner-cs/tree/main/pwc) | ||
51 | annotated for both titles and abstracts for _task_ -> _research | 51 | annotated for both titles and abstracts for _task_ -> _research | ||
52 | problem_ and _method_ entities.\r\n\r\n2) Additionally, supplies a new | 52 | problem_ and _method_ entities.\r\n\r\n2) Additionally, supplies a new | ||
53 | annotated dataset for the titles in the ACL anthology in the [acl | 53 | annotated dataset for the titles in the ACL anthology in the [acl | ||
54 | y](https://github.com/jd-coderepos/contributions-ner-cs/tree/main/acl) | 54 | y](https://github.com/jd-coderepos/contributions-ner-cs/tree/main/acl) | ||
55 | \r\nwhere titles are annotated with all seven entities.\r\n\r\n\r\n### | 55 | \r\nwhere titles are annotated with all seven entities.\r\n\r\n\r\n### | ||
56 | 2) Dataset Statistics for [full | 56 | 2) Dataset Statistics for [full | ||
57 | d-coderepos/contributions-ner-cs/tree/main/full%20dataset)\r\n\r\n#### | 57 | d-coderepos/contributions-ner-cs/tree/main/full%20dataset)\r\n\r\n#### | ||
58 | Titles\r\n\r\n`train.data`\r\n\r\n| NER | Count |\r\n\r\n| --- | --- | 58 | Titles\r\n\r\n`train.data`\r\n\r\n| NER | Count |\r\n\r\n| --- | --- | ||
59 | |\r\n\r\n| solution | 65,213 |\r\n\r\n| research problem | 43,033 | 59 | |\r\n\r\n| solution | 65,213 |\r\n\r\n| research problem | 43,033 | ||
60 | |\r\n\r\n| resource | 19,759 |\r\n\r\n| method | 19,645 |\r\n\r\n| | 60 | |\r\n\r\n| resource | 19,759 |\r\n\r\n| method | 19,645 |\r\n\r\n| | ||
61 | tool | 4,856 |\r\n\r\n| dataset | 4,062 |\r\n\r\n| language | 1,704 | 61 | tool | 4,856 |\r\n\r\n| dataset | 4,062 |\r\n\r\n| language | 1,704 | ||
62 | |\r\n\r\n`dev.data`\r\n\r\n| NER | Count |\r\n\r\n| --- | --- | 62 | |\r\n\r\n`dev.data`\r\n\r\n| NER | Count |\r\n\r\n| --- | --- | ||
63 | |\r\n\r\n| solution | 3,685 |\r\n\r\n| research problem | 2,717 | 63 | |\r\n\r\n| solution | 3,685 |\r\n\r\n| research problem | 2,717 | ||
64 | |\r\n\r\n| resource | 1,224 |\r\n\r\n| method | 1,172 |\r\n\r\n| tool | 64 | |\r\n\r\n| resource | 1,224 |\r\n\r\n| method | 1,172 |\r\n\r\n| tool | ||
65 | | 264 |\r\n\r\n| dataset | 191 |\r\n\r\n| language | 79 | 65 | | 264 |\r\n\r\n| dataset | 191 |\r\n\r\n| language | 79 | ||
66 | |\r\n\r\n`test.data`\r\n\r\n| NER | Count |\r\n\r\n| --- | --- | 66 | |\r\n\r\n`test.data`\r\n\r\n| NER | Count |\r\n\r\n| --- | --- | ||
67 | |\r\n\r\n| solution | 29,287 |\r\n\r\n| research problem | 11,093 | 67 | |\r\n\r\n| solution | 29,287 |\r\n\r\n| research problem | 11,093 | ||
68 | |\r\n\r\n| resource | 8,511 |\r\n\r\n| method | 7,009 |\r\n\r\n| tool | 68 | |\r\n\r\n| resource | 8,511 |\r\n\r\n| method | 7,009 |\r\n\r\n| tool | ||
69 | | 2,272 |\r\n\r\n| dataset | 947 |\r\n\r\n| language | 690 | 69 | | 2,272 |\r\n\r\n| dataset | 947 |\r\n\r\n| language | 690 | ||
70 | |\r\n\r\n\r\n#### Abstracts\r\n\r\n`train-abs.data`\r\n\r\n| NER | | 70 | |\r\n\r\n\r\n#### Abstracts\r\n\r\n`train-abs.data`\r\n\r\n| NER | | ||
71 | Count |\r\n\r\n| --- | --- |\r\n\r\n| research problem | 15,498 | 71 | Count |\r\n\r\n| --- | --- |\r\n\r\n| research problem | 15,498 | ||
72 | |\r\n\r\n| method | 12,932 |\r\n\r\n`dev-abs.data`\r\n\r\n| NER | | 72 | |\r\n\r\n| method | 12,932 |\r\n\r\n`dev-abs.data`\r\n\r\n| NER | | ||
73 | Count |\r\n\r\n| --- | --- |\r\n\r\n| research problem | 1,450 | 73 | Count |\r\n\r\n| --- | --- |\r\n\r\n| research problem | 1,450 | ||
74 | |\r\n\r\n| method | 839 |\r\n\r\n`test-abs.data`\r\n\r\n| NER | Count | 74 | |\r\n\r\n| method | 839 |\r\n\r\n`test-abs.data`\r\n\r\n| NER | Count | ||
75 | |\r\n\r\n| --- | --- |\r\n\r\n| research problem | 4,123 |\r\n\r\n| | 75 | |\r\n\r\n| --- | --- |\r\n\r\n| research problem | 4,123 |\r\n\r\n| | ||
76 | method | 3,170 |\r\n\r\nThe reamining repositories have specialized | 76 | method | 3,170 |\r\n\r\nThe reamining repositories have specialized | ||
77 | README files with the respective dataset statistics.\r\n\r\n### 3) | 77 | README files with the respective dataset statistics.\r\n\r\n### 3) | ||
78 | Citation\r\n\r\nAccepted for publication in [ICADL | 78 | Citation\r\n\r\nAccepted for publication in [ICADL | ||
79 | 2022](https://icadl.net/icadl2022/) proceedings.\r\n\r\n`Citation | 79 | 2022](https://icadl.net/icadl2022/) proceedings.\r\n\r\n`Citation | ||
80 | information | 80 | information | ||
81 | hcoming`\r\n\r\n\r\nPreprint\r\n\r\n```\r\n@article{d2022computer,\r\n | 81 | hcoming`\r\n\r\n\r\nPreprint\r\n\r\n```\r\n@article{d2022computer,\r\n | ||
82 | title={Computer Science Named Entity Recognition in the Open Research | 82 | title={Computer Science Named Entity Recognition in the Open Research | ||
83 | Knowledge Graph},\r\n author={D'Souza, Jennifer and Auer, | 83 | Knowledge Graph},\r\n author={D'Souza, Jennifer and Auer, | ||
84 | S{\\\"o}ren},\r\n journal={arXiv preprint arXiv:2203.14579},\r\n | 84 | S{\\\"o}ren},\r\n journal={arXiv preprint arXiv:2203.14579},\r\n | ||
85 | year={2022}\r\n}\r\n```\r\n\r\n### 4) Additional resources\r\n\r\n#### | 85 | year={2022}\r\n}\r\n```\r\n\r\n### 4) Additional resources\r\n\r\n#### | ||
86 | CS NER Software trained on the dataset in this | 86 | CS NER Software trained on the dataset in this | ||
87 | repository\r\n\r\nCodebase: | 87 | repository\r\n\r\nCodebase: | ||
88 | orkg/nlp/orkg-nlp-experiments/-/tree/master/orkg_cs_ner\r\n\r\nService | 88 | orkg/nlp/orkg-nlp-experiments/-/tree/master/orkg_cs_ner\r\n\r\nService | ||
89 | URL - REST API: | 89 | URL - REST API: | ||
90 | /docs#/annotation/annotates_paper_annotation_csner_post\r\n\r\nService | 90 | /docs#/annotation/annotates_paper_annotation_csner_post\r\n\r\nService | ||
91 | URL - PyPi: | 91 | URL - PyPi: | ||
92 | ervices/services.html#cs-ner-computer-science-named-entity-recognition | 92 | ervices/services.html#cs-ner-computer-science-named-entity-recognition | ||
93 | \r\n\r\n", | 93 | \r\n\r\n", | ||
94 | "num_resources": 1, | 94 | "num_resources": 1, | ||
95 | "num_tags": 4, | 95 | "num_tags": 4, | ||
96 | "organization": { | 96 | "organization": { | ||
97 | "approval_status": "approved", | 97 | "approval_status": "approved", | ||
98 | "created": "2017-11-23T17:30:37.757128", | 98 | "created": "2017-11-23T17:30:37.757128", | ||
99 | "description": "The German National Library of Science and | 99 | "description": "The German National Library of Science and | ||
100 | Technology, abbreviated TIB, is the national library of the Federal | 100 | Technology, abbreviated TIB, is the national library of the Federal | ||
101 | Republic of Germany for all fields of engineering, technology, and the | 101 | Republic of Germany for all fields of engineering, technology, and the | ||
102 | natural sciences.", | 102 | natural sciences.", | ||
103 | "id": "0c5362f5-b99e-41db-8256-3d0d7549bf4d", | 103 | "id": "0c5362f5-b99e-41db-8256-3d0d7549bf4d", | ||
104 | "image_url": | 104 | "image_url": | ||
105 | 3conf/ext/tib_tmpl_bootstrap/Resources/Public/images/TIB_Logo_en.png", | 105 | 3conf/ext/tib_tmpl_bootstrap/Resources/Public/images/TIB_Logo_en.png", | ||
106 | "is_organization": true, | 106 | "is_organization": true, | ||
107 | "name": "tib", | 107 | "name": "tib", | ||
108 | "state": "active", | 108 | "state": "active", | ||
109 | "title": "TIB", | 109 | "title": "TIB", | ||
110 | "type": "organization" | 110 | "type": "organization" | ||
111 | }, | 111 | }, | ||
112 | "owner_org": "0c5362f5-b99e-41db-8256-3d0d7549bf4d", | 112 | "owner_org": "0c5362f5-b99e-41db-8256-3d0d7549bf4d", | ||
113 | "private": false, | 113 | "private": false, | ||
114 | "relationships_as_object": [], | 114 | "relationships_as_object": [], | ||
115 | "relationships_as_subject": [], | 115 | "relationships_as_subject": [], | ||
116 | "repository_name": "Leibniz University Hannover", | 116 | "repository_name": "Leibniz University Hannover", | ||
117 | "resources": [ | 117 | "resources": [ | ||
118 | { | 118 | { | ||
119 | "cache_last_updated": null, | 119 | "cache_last_updated": null, | ||
120 | "cache_url": null, | 120 | "cache_url": null, | ||
121 | "created": "2022-10-07T06:57:34.111351", | 121 | "created": "2022-10-07T06:57:34.111351", | ||
122 | "description": "", | 122 | "description": "", | ||
123 | "format": "TXT", | 123 | "format": "TXT", | ||
124 | "hash": "", | 124 | "hash": "", | ||
125 | "id": "7d66bd9b-b0e0-4fb7-8b23-1bcdbefa6f7e", | 125 | "id": "7d66bd9b-b0e0-4fb7-8b23-1bcdbefa6f7e", | ||
126 | "last_modified": null, | 126 | "last_modified": null, | ||
n | 127 | "metadata_modified": "2023-01-12T13:14:10.774723", | n | 127 | "metadata_modified": "2023-08-04T08:46:24.837945", |
128 | "mimetype": null, | 128 | "mimetype": null, | ||
129 | "mimetype_inner": null, | 129 | "mimetype_inner": null, | ||
130 | "name": "CS-NER dataset", | 130 | "name": "CS-NER dataset", | ||
131 | "package_id": "e7e8ca91-d0bb-433b-b1a0-9a92c9477775", | 131 | "package_id": "e7e8ca91-d0bb-433b-b1a0-9a92c9477775", | ||
132 | "position": 0, | 132 | "position": 0, | ||
133 | "resource_type": null, | 133 | "resource_type": null, | ||
134 | "size": null, | 134 | "size": null, | ||
135 | "state": "active", | 135 | "state": "active", | ||
136 | "url": "https://github.com/jd-coderepos/contributions-ner-cs", | 136 | "url": "https://github.com/jd-coderepos/contributions-ner-cs", | ||
137 | "url_type": "" | 137 | "url_type": "" | ||
138 | } | 138 | } | ||
139 | ], | 139 | ], | ||
t | t | 140 | "services_used_list": "", | ||
140 | "source_metadata_created": "2022-10-07T06:56:39.680761", | 141 | "source_metadata_created": "2022-10-07T06:56:39.680761", | ||
141 | "source_metadata_modified": "2022-10-07T07:02:56.838704", | 142 | "source_metadata_modified": "2022-10-07T07:02:56.838704", | ||
142 | "state": "active", | 143 | "state": "active", | ||
143 | "tags": [ | 144 | "tags": [ | ||
144 | { | 145 | { | ||
145 | "display_name": "information retrieval", | 146 | "display_name": "information retrieval", | ||
146 | "id": "c094f269-c169-4c66-b2f1-98967cbef543", | 147 | "id": "c094f269-c169-4c66-b2f1-98967cbef543", | ||
147 | "name": "information retrieval", | 148 | "name": "information retrieval", | ||
148 | "state": "active", | 149 | "state": "active", | ||
149 | "vocabulary_id": null | 150 | "vocabulary_id": null | ||
150 | }, | 151 | }, | ||
151 | { | 152 | { | ||
152 | "display_name": "named entity recognition", | 153 | "display_name": "named entity recognition", | ||
153 | "id": "2c89c618-a7b7-4d1b-984b-aee05fb83b1e", | 154 | "id": "2c89c618-a7b7-4d1b-984b-aee05fb83b1e", | ||
154 | "name": "named entity recognition", | 155 | "name": "named entity recognition", | ||
155 | "state": "active", | 156 | "state": "active", | ||
156 | "vocabulary_id": null | 157 | "vocabulary_id": null | ||
157 | }, | 158 | }, | ||
158 | { | 159 | { | ||
159 | "display_name": "natural language processing", | 160 | "display_name": "natural language processing", | ||
160 | "id": "8af9c93a-1d87-41e0-83d9-f5d01a2bbd0c", | 161 | "id": "8af9c93a-1d87-41e0-83d9-f5d01a2bbd0c", | ||
161 | "name": "natural language processing", | 162 | "name": "natural language processing", | ||
162 | "state": "active", | 163 | "state": "active", | ||
163 | "vocabulary_id": null | 164 | "vocabulary_id": null | ||
164 | }, | 165 | }, | ||
165 | { | 166 | { | ||
166 | "display_name": "scientific named entity recognition", | 167 | "display_name": "scientific named entity recognition", | ||
167 | "id": "7e50bb81-b464-472c-aa9d-25236a2bede2", | 168 | "id": "7e50bb81-b464-472c-aa9d-25236a2bede2", | ||
168 | "name": "scientific named entity recognition", | 169 | "name": "scientific named entity recognition", | ||
169 | "state": "active", | 170 | "state": "active", | ||
170 | "vocabulary_id": null | 171 | "vocabulary_id": null | ||
171 | } | 172 | } | ||
172 | ], | 173 | ], | ||
173 | "terms_of_usage": "Yes", | 174 | "terms_of_usage": "Yes", | ||
174 | "title": "CS-NER", | 175 | "title": "CS-NER", | ||
175 | "type": "vdataset", | 176 | "type": "vdataset", | ||
176 | "url": "https://data.uni-hannover.de/dataset/cs-ner-dataset", | 177 | "url": "https://data.uni-hannover.de/dataset/cs-ner-dataset", | ||
177 | "version": "" | 178 | "version": "" | ||
178 | } | 179 | } |