f | { | f | { |
n | "author": "Jennifer D\u2019Souza; Soeren Auer", | n | "author": "Jennifer D\u2019Souza, Soeren Auer", |
| "author_email": "jennifer.dsouza@tib.eu", | | "author_email": "jennifer.dsouza@tib.eu", |
| "creator_user_id": "17755db4-395a-4b3b-ac09-e8e3484ca700", | | "creator_user_id": "17755db4-395a-4b3b-ac09-e8e3484ca700", |
| "doi": "10.25835/0019761", | | "doi": "10.25835/0019761", |
| "doi_date_published": "2020-07-03", | | "doi_date_published": "2020-07-03", |
| "doi_publisher": "LUIS", | | "doi_publisher": "LUIS", |
| "doi_status": "true", | | "doi_status": "true", |
| "domain": "https://data.uni-hannover.de", | | "domain": "https://data.uni-hannover.de", |
| "groups": [], | | "groups": [], |
| "have_copyright": "Yes", | | "have_copyright": "Yes", |
| "id": "3c23ef7a-45e8-47a0-b635-ce39006d2ba7", | | "id": "3c23ef7a-45e8-47a0-b635-ce39006d2ba7", |
| "isopen": false, | | "isopen": false, |
| "license_id": "CC-BY-SA-3.0", | | "license_id": "CC-BY-SA-3.0", |
| "license_title": "CC-BY-SA-3.0", | | "license_title": "CC-BY-SA-3.0", |
| "maintainer": "Jennifer D'Souza", | | "maintainer": "Jennifer D'Souza", |
| "maintainer_email": "jennifer.dsouza@tib.eu", | | "maintainer_email": "jennifer.dsouza@tib.eu", |
| "metadata_created": "2021-10-14T10:16:03.354056", | | "metadata_created": "2021-10-14T10:16:03.354056", |
n | "metadata_modified": "2021-10-14T10:16:03.354061", | n | "metadata_modified": "2023-01-12T13:14:21.766915", |
| "name": "luh-nlpcontributions-pilot-dataset", | | "name": "luh-nlpcontributions-pilot-dataset", |
| "notes": "##An Annotation Scheme for Machine Reading of Scholarly | | "notes": "##An Annotation Scheme for Machine Reading of Scholarly |
| Contributions in Natural Language Processing Literature\r\n\r\nThis | | Contributions in Natural Language Processing Literature\r\n\r\nThis |
| dataset is the result of a pilot annotation exercise to capture the | | dataset is the result of a pilot annotation exercise to capture the |
| scholarly contributions in natural language processing (NLP) articles, | | scholarly contributions in natural language processing (NLP) articles, |
| particularly, for the articles that discuss machine learning (ML) | | particularly, for the articles that discuss machine learning (ML) |
| approaches for various information extraction tasks. The pilot | | approaches for various information extraction tasks. The pilot |
| annotation exercise was performed on 50 NLP-ML scholarly articles | | annotation exercise was performed on 50 NLP-ML scholarly articles |
| presenting contributions to the five information extraction tasks 1. | | presenting contributions to the five information extraction tasks 1. |
| machine translation, 2. named entity recognition, 3. question | | machine translation, 2. named entity recognition, 3. question |
| answering, 4. relation classification, and 5. text classification. | | answering, 4. relation classification, and 5. text classification. |
| \r\n\r\nThe outcome of this pilot annotation exercise was two-fold: 1) | | \r\n\r\nThe outcome of this pilot annotation exercise was two-fold: 1) |
| a preliminary annotation methodology, and 2) the dataset released in | | a preliminary annotation methodology, and 2) the dataset released in |
| this repository.\r\n\r\nThe resulting annotation scheme is called | | this repository.\r\n\r\nThe resulting annotation scheme is called |
| <b><i>NLPContributions</i></b>.\r\n\r\n###Supporting | | <b><i>NLPContributions</i></b>.\r\n\r\n###Supporting |
| Publications\r\nD\u2019Souza, J., & Auer, S. (2020). NLPContributions: | | Publications\r\nD\u2019Souza, J., & Auer, S. (2020). NLPContributions: |
| An Annotation Scheme for Machine Reading of Scholarly Contributions in | | An Annotation Scheme for Machine Reading of Scholarly Contributions in |
| Natural Language Processing Literature. In C. Zhang, P. Mayr, W. Lu, & | | Natural Language Processing Literature. In C. Zhang, P. Mayr, W. Lu, & |
| Y. Zhang (Eds.), Proceedings of the 1st Workshop on Extraction and | | Y. Zhang (Eds.), Proceedings of the 1st Workshop on Extraction and |
| Evaluation of Knowledge Entities from Scientific Documents co-located | | Evaluation of Knowledge Entities from Scientific Documents co-located |
| with the ACM/IEEE Joint Conference on Digital Libraries in 2020, | | with the ACM/IEEE Joint Conference on Digital Libraries in 2020, |
| EEKE@JCDL 2020, Virtual Event, China, August 1st, 2020 (Vol. 2658, pp. | | EEKE@JCDL 2020, Virtual Event, China, August 1st, 2020 (Vol. 2658, pp. |
n | 16\u201327). \r\n", | n | 16\u201327). \r\n\r\nD'Souza, Jennifer, and S\u00f6ren Auer. |
| | | \"Sentence, Phrase, and Triple Annotations to Build a Knowledge Graph |
| | | of Natural Language Processing Contributions\u2014A Trial Dataset.\" |
| | | Journal of Data and Information Science, vol.6, no.3, 2021, pp.6-34. |
| | | DOI: 10.2478/jdis-2021-0023 ", |
| "num_resources": 2, | | "num_resources": 1, |
| "num_tags": 8, | | "num_tags": 8, |
| "organization": { | | "organization": { |
| "approval_status": "approved", | | "approval_status": "approved", |
| "created": "2017-11-23T17:30:37.757128", | | "created": "2017-11-23T17:30:37.757128", |
| "description": "The German National Library of Science and | | "description": "The German National Library of Science and |
| Technology, abbreviated TIB, is the national library of the Federal | | Technology, abbreviated TIB, is the national library of the Federal |
| Republic of Germany for all fields of engineering, technology, and the | | Republic of Germany for all fields of engineering, technology, and the |
| natural sciences.", | | natural sciences.", |
| "id": "0c5362f5-b99e-41db-8256-3d0d7549bf4d", | | "id": "0c5362f5-b99e-41db-8256-3d0d7549bf4d", |
| "image_url": | | "image_url": |
| 3conf/ext/tib_tmpl_bootstrap/Resources/Public/images/TIB_Logo_en.png", | | 3conf/ext/tib_tmpl_bootstrap/Resources/Public/images/TIB_Logo_en.png", |
| "is_organization": true, | | "is_organization": true, |
| "name": "tib", | | "name": "tib", |
| "state": "active", | | "state": "active", |
| "title": "TIB", | | "title": "TIB", |
| "type": "organization" | | "type": "organization" |
| }, | | }, |
| "owner_org": "0c5362f5-b99e-41db-8256-3d0d7549bf4d", | | "owner_org": "0c5362f5-b99e-41db-8256-3d0d7549bf4d", |
| "private": false, | | "private": false, |
| "relationships_as_object": [], | | "relationships_as_object": [], |
| "relationships_as_subject": [], | | "relationships_as_subject": [], |
| "repository_name": "Leibniz University Hannover", | | "repository_name": "Leibniz University Hannover", |
| "resources": [ | | "resources": [ |
| { | | { |
| "cache_last_updated": null, | | "cache_last_updated": null, |
| "cache_url": null, | | "cache_url": null, |
n | "created": "2020-07-03T12:08:03.741972", | n | |
| "datastore_active": false, | | |
| "description": "", | | |
| "downloadall_datapackage_hash": | | |
| "cec5bb1048e5c2ed59fc1cff04117694", | | |
| "downloadall_metadata_modified": "2020-10-07T16:01:24.871375", | | |
| "format": "ZIP", | | |
| "hash": "", | | |
| "id": "7b1ad9cc-b27f-46e0-9f0d-e6c31d367555", | | |
| "last_modified": "2020-10-07T16:01:26.413955", | | |
| "metadata_modified": "2021-10-14T10:16:03.340535", | | |
| "mimetype": "application/zip", | | |
| "mimetype_inner": null, | | |
| "name": "All resource data", | | |
| "package_id": "3c23ef7a-45e8-47a0-b635-ce39006d2ba7", | | |
| "position": 0, | | |
| "resource_type": null, | | |
| "revision_id": "59ad9636-1ff7-4913-a0d3-19e7738aecd3", | | |
| "size": 984, | | |
| "state": "active", | | |
| "url": | | |
| 9f0d-e6c31d367555/download/nlpcontributions-pilot-dataset-jh70ne.zip", | | |
| "url_type": "" | | |
| }, | | |
| { | | |
| "cache_last_updated": null, | | |
| "cache_url": null, | | |
| "created": "2020-07-03T12:32:07.815445", | | "created": "2020-07-03T12:32:07.815445", |
n | "datastore_active": false, | n | |
| "description": "Research contributions in NLP annotated as | | "description": "Research contributions in NLP annotated as |
| structured data using the NLPContributionGraph scheme for structuring | | structured data using the NLPContributionGraph scheme for structuring |
| scholarly contributions in the | | scholarly contributions in the |
| [ORKG](https://www.orkg.org/orkg/).\r\n\r\nThe repository is organized | | [ORKG](https://www.orkg.org/orkg/).\r\n\r\nThe repository is organized |
| as follows:\r\n\r\n [task-name-folder]/ | | as follows:\r\n\r\n [task-name-folder]/ |
| # machine-translation, named-entity-recognition, question-answering, | | # machine-translation, named-entity-recognition, question-answering, |
| relation-classification, text-classification\r\n | | relation-classification, text-classification\r\n |
| \u251c\u2500\u2500 [article-counter-folder]/ # ranges | | \u251c\u2500\u2500 [article-counter-folder]/ # ranges |
| from 0 to 9 since we annotated 10 articles per task\r\n \u2502 | | from 0 to 9 since we annotated 10 articles per task\r\n \u2502 |
| \u2514\u2500\u2500 research-problem.json # `research | | \u2514\u2500\u2500 research-problem.json # `research |
| problem` mandatory information unit in json format\r\n \u2502 | | problem` mandatory information unit in json format\r\n \u2502 |
| \u2514\u2500\u2500 model.json # `model` | | \u2514\u2500\u2500 model.json # `model` |
| information unit in json format; in some articles it is called | | information unit in json format; in some articles it is called |
| `approach`\r\n \u2502 \u2514\u2500\u2500 ... | | `approach`\r\n \u2502 \u2514\u2500\u2500 ... |
| # there are 8 main information units in all and each article may be | | # there are 8 main information units in all and each article may be |
| annotated by 3 or 6\r\n \u2502 \u2514\u2500\u2500 triples/ | | annotated by 3 or 6\r\n \u2502 \u2514\u2500\u2500 triples/ |
| # the folder containing information unit triples one per line\r\n | | # the folder containing information unit triples one per line\r\n |
| \u2502 \u2502 \u2514\u2500\u2500 research-problem.txt | | \u2502 \u2502 \u2514\u2500\u2500 research-problem.txt |
| # `research problem` triples (one research problem statement per | | # `research problem` triples (one research problem statement per |
| line)\r\n \u2502 \u2502 \u2514\u2500\u2500 model.txt | | line)\r\n \u2502 \u2502 \u2514\u2500\u2500 model.txt |
| # `model` triples (one statement per line)\r\n \u2502 \u2502 | | # `model` triples (one statement per line)\r\n \u2502 \u2502 |
| \u2514\u2500\u2500 ... # there are 8 | | \u2514\u2500\u2500 ... # there are 8 |
| main information units in all and each article may be annotated by 3 | | main information units in all and each article may be annotated by 3 |
| or 6\r\n \u2502 \u2514\u2500\u2500 ... | | or 6\r\n \u2502 \u2514\u2500\u2500 ... |
| # there are ten articles annotated for each task, so this repeats nine | | # there are ten articles annotated for each task, so this repeats nine |
| more times\r\n \u2514\u2500\u2500 ... | | more times\r\n \u2514\u2500\u2500 ... |
| # there are five tasks selected overall, so this repeats four more | | # there are five tasks selected overall, so this repeats four more |
| times", | | times", |
| "format": "JSON", | | "format": "JSON", |
| "hash": "", | | "hash": "", |
| "id": "29c065cf-7087-49e0-89aa-4901f90a528c", | | "id": "29c065cf-7087-49e0-89aa-4901f90a528c", |
| "last_modified": "2020-07-24T12:03:36.054419", | | "last_modified": "2020-07-24T12:03:36.054419", |
n | "metadata_modified": "2021-10-14T10:16:03.341778", | n | "metadata_modified": "2023-01-12T13:14:21.770546", |
| "mimetype": "application/zip", | | "mimetype": "application/zip", |
| "mimetype_inner": null, | | "mimetype_inner": null, |
| "name": "Trial data from the NLPContributionGraph scheme", | | "name": "Trial data from the NLPContributionGraph scheme", |
| "package_id": "3c23ef7a-45e8-47a0-b635-ce39006d2ba7", | | "package_id": "3c23ef7a-45e8-47a0-b635-ce39006d2ba7", |
n | "position": 1, | n | "position": 0, |
| "resource_type": null, | | "resource_type": null, |
n | "revision_id": "24e4f9c5-66c9-43f9-8d09-e9aa9f72956c", | n | |
| "size": 29426833, | | "size": 29426833, |
| "state": "active", | | "state": "active", |
| "url": "https://github.com/ncg-task/trial-data", | | "url": "https://github.com/ncg-task/trial-data", |
| "url_type": "" | | "url_type": "" |
| } | | } |
| ], | | ], |
| "source_metadata_created": "2020-07-03T12:08:02.648914", | | "source_metadata_created": "2020-07-03T12:08:02.648914", |
t | "source_metadata_modified": "2020-10-07T16:01:26.550584", | t | "source_metadata_modified": "2022-02-21T13:09:17.617156", |
| "state": "active", | | "state": "active", |
| "tags": [ | | "tags": [ |
| { | | { |
| "display_name": "corpus", | | "display_name": "corpus", |
| "id": "95b6b4a3-2816-47b4-a9da-50ab21116c20", | | "id": "95b6b4a3-2816-47b4-a9da-50ab21116c20", |
| "name": "corpus", | | "name": "corpus", |
| "state": "active", | | "state": "active", |
| "vocabulary_id": null | | "vocabulary_id": null |
| }, | | }, |
| { | | { |
| "display_name": "machine reading", | | "display_name": "machine reading", |
| "id": "d34df1b1-5ebe-415e-8b43-80a15ef1e215", | | "id": "d34df1b1-5ebe-415e-8b43-80a15ef1e215", |
| "name": "machine reading", | | "name": "machine reading", |
| "state": "active", | | "state": "active", |
| "vocabulary_id": null | | "vocabulary_id": null |
| }, | | }, |
| { | | { |
| "display_name": "natural language processing", | | "display_name": "natural language processing", |
| "id": "8af9c93a-1d87-41e0-83d9-f5d01a2bbd0c", | | "id": "8af9c93a-1d87-41e0-83d9-f5d01a2bbd0c", |
| "name": "natural language processing", | | "name": "natural language processing", |
| "state": "active", | | "state": "active", |
| "vocabulary_id": null | | "vocabulary_id": null |
| }, | | }, |
| { | | { |
| "display_name": "open research knowledge graph", | | "display_name": "open research knowledge graph", |
| "id": "c9fb26fb-f92f-4740-899e-290c1a384971", | | "id": "c9fb26fb-f92f-4740-899e-290c1a384971", |
| "name": "open research knowledge graph", | | "name": "open research knowledge graph", |
| "state": "active", | | "state": "active", |
| "vocabulary_id": null | | "vocabulary_id": null |
| }, | | }, |
| { | | { |
| "display_name": "orkg", | | "display_name": "orkg", |
| "id": "a029b5df-5c95-4e99-94f3-1d9e2fbf1fd0", | | "id": "a029b5df-5c95-4e99-94f3-1d9e2fbf1fd0", |
| "name": "orkg", | | "name": "orkg", |
| "state": "active", | | "state": "active", |
| "vocabulary_id": null | | "vocabulary_id": null |
| }, | | }, |
| { | | { |
| "display_name": "pilot annotation", | | "display_name": "pilot annotation", |
| "id": "33d81870-19c7-4aea-9ba6-76c17255b905", | | "id": "33d81870-19c7-4aea-9ba6-76c17255b905", |
| "name": "pilot annotation", | | "name": "pilot annotation", |
| "state": "active", | | "state": "active", |
| "vocabulary_id": null | | "vocabulary_id": null |
| }, | | }, |
| { | | { |
| "display_name": "scholarly knowledge graph", | | "display_name": "scholarly knowledge graph", |
| "id": "cad5b92d-5dd7-4e88-ab94-cf3dc6cb6594", | | "id": "cad5b92d-5dd7-4e88-ab94-cf3dc6cb6594", |
| "name": "scholarly knowledge graph", | | "name": "scholarly knowledge graph", |
| "state": "active", | | "state": "active", |
| "vocabulary_id": null | | "vocabulary_id": null |
| }, | | }, |
| { | | { |
| "display_name": "semantic web", | | "display_name": "semantic web", |
| "id": "cc98f198-dc4f-47f2-87b4-9ee665b8dfd8", | | "id": "cc98f198-dc4f-47f2-87b4-9ee665b8dfd8", |
| "name": "semantic web", | | "name": "semantic web", |
| "state": "active", | | "state": "active", |
| "vocabulary_id": null | | "vocabulary_id": null |
| } | | } |
| ], | | ], |
| "terms_of_usage": "Yes", | | "terms_of_usage": "Yes", |
| "title": "NLPContributionGraph Trial Dataset", | | "title": "NLPContributionGraph Trial Dataset", |
| "type": "vdataset", | | "type": "vdataset", |
| "url": | | "url": |
| "https://data.uni-hannover.de/dataset/nlpcontributions-pilot-dataset", | | "https://data.uni-hannover.de/dataset/nlpcontributions-pilot-dataset", |
| "version": "2.0" | | "version": "2.0" |
| } | | } |