f | { | f | { |
| "access_rights": "", | | "access_rights": "", |
n | "author": "Yogatama and Smith", | n | "author": "Nitish Srivastava", |
| "author_email": "", | | "author_email": "", |
n | "citation": [], | n | "citation": [ |
| | | "https://doi.org/10.48550/arXiv.1611.05940", |
| | | "https://doi.org/10.1016/j.procs.2015.03.074" |
| | | ], |
| "creator_user_id": "17755db4-395a-4b3b-ac09-e8e3484ca700", | | "creator_user_id": "17755db4-395a-4b3b-ac09-e8e3484ca700", |
n | "defined_in": "https://doi.org/10.18653/v1/D17-1041", | n | "defined_in": "https://doi.org/10.48550/arXiv.1907.04919", |
| "doi": "10.57702/f4hmxqob", | | "doi": "10.57702/f4hmxqob", |
| "doi_date_published": "2024-11-25", | | "doi_date_published": "2024-11-25", |
| "doi_publisher": "TIB", | | "doi_publisher": "TIB", |
| "doi_status": true, | | "doi_status": true, |
| "domain": "https://service.tib.eu/ldmservice", | | "domain": "https://service.tib.eu/ldmservice", |
n | | n | "extra_authors": [ |
| | | { |
| | | "extra_author": "Geo\ufb00rey Hinton", |
| | | "orcid": "" |
| | | }, |
| | | { |
| | | "extra_author": "Ruslan Salakhutdinov", |
| | | "orcid": "" |
| | | } |
| | | ], |
| "groups": [], | | "groups": [ |
| | | { |
| | | "description": "", |
| | | "display_name": "Text Categorization", |
| | | "id": "2d56b907-932f-4618-ba36-d173c05f6275", |
| | | "image_display_url": "", |
| | | "name": "text-categorization", |
| | | "title": "Text Categorization" |
| | | }, |
| | | { |
| | | "description": "", |
| | | "display_name": "Text Classification", |
| | | "id": "0707d099-de73-41dc-85f6-93e4efa6bf14", |
| | | "image_display_url": "", |
| | | "name": "text-classification", |
| | | "title": "Text Classification" |
| | | } |
| | | ], |
| "id": "b283b5ec-82db-496b-b5e7-0e8cde168067", | | "id": "b283b5ec-82db-496b-b5e7-0e8cde168067", |
| "isopen": false, | | "isopen": false, |
n | "landing_page": "", | n | "landing_page": |
| | | "https://www.cs.toronto.edu/~rsalakhu/20Newsgroups.html", |
| "license_title": null, | | "license_title": null, |
| "link_orkg": "", | | "link_orkg": "", |
| "metadata_created": "2024-11-25T14:20:44.770424", | | "metadata_created": "2024-11-25T14:20:44.770424", |
n | "metadata_modified": "2024-11-25T14:33:57.677654", | n | "metadata_modified": "2024-12-02T18:03:36.560018", |
| "name": "20-newsgroups-dataset", | | "name": "20-newsgroups-dataset", |
n | "notes": "The 20 Newsgroups dataset used for topic classification, | n | "notes": "The 20 Newsgroups dataset consists of 18,845 posts taken |
| consisting of newsgroup documents for classifying sports topics.", | | from the Usenet newsgroup collection. Each post belongs to exactly one |
| | | newsgroup. Following the preprocessing in [12] and [7], the data was |
| | | partitioned chronologically into 11,314 training and 7,531 test |
| | | articles. After removing stopwords and stemming, the 2000 most |
| | | frequent words in the training set were used to represent the |
| | | documents.", |
| "num_resources": 1, | | "num_resources": 0, |
| "num_tags": 0, | | "num_tags": 8, |
| "organization": { | | "organization": { |
| "approval_status": "approved", | | "approval_status": "approved", |
| "created": "2024-11-25T12:11:38.292601", | | "created": "2024-11-25T12:11:38.292601", |
| "description": "", | | "description": "", |
| "id": "079d46db-32df-4b48-91f3-0a8bc8f69559", | | "id": "079d46db-32df-4b48-91f3-0a8bc8f69559", |
| "image_url": "", | | "image_url": "", |
| "is_organization": true, | | "is_organization": true, |
| "name": "no-organization", | | "name": "no-organization", |
| "state": "active", | | "state": "active", |
| "title": "No Organization", | | "title": "No Organization", |
| "type": "organization" | | "type": "organization" |
| }, | | }, |
| "owner_org": "079d46db-32df-4b48-91f3-0a8bc8f69559", | | "owner_org": "079d46db-32df-4b48-91f3-0a8bc8f69559", |
| "private": false, | | "private": false, |
| "relationships_as_object": [], | | "relationships_as_object": [], |
| "relationships_as_subject": [], | | "relationships_as_subject": [], |
n | "resources": [ | n | "resources": [], |
| | | "services_used_list": "", |
| | | "state": "active", |
| | | "tags": [ |
| { | | { |
n | "cache_last_updated": null, | n | "display_name": "20 Newsgroups", |
| "cache_url": null, | | "id": "e2be3fd7-6c7f-4975-9a58-c6448ed49fe1", |
| "created": "2024-11-25T15:30:26", | | "name": "20 Newsgroups", |
| "data": [ | | |
| "dcterms:title", | | |
| "dcterms:accessRights", | | |
| "dcterms:creator", | | |
| "dcterms:description", | | |
| "dcterms:issued", | | |
| "dcterms:language", | | |
| "dcterms:identifier", | | |
| "dcat:theme", | | |
| "dcterms:type", | | |
| "dcat:keyword", | | |
| "dcat:landingPage", | | |
| "dcterms:hasVersion", | | |
| "dcterms:format", | | |
| "mls:task", | | |
| "datacite:isDescribedBy" | | |
| ], | | |
| "description": "The json representation of the dataset with its | | |
| distributions based on DCAT.", | | |
| "format": "JSON", | | |
| "hash": "", | | |
| "id": "f2df2940-3b27-4e44-8508-83a3fb4a72d9", | | |
| "last_modified": "2024-11-25T14:33:57.670967", | | |
| "metadata_modified": "2024-11-25T14:33:57.680684", | | |
| "mimetype": "application/json", | | |
| "mimetype_inner": null, | | |
| "name": "Original Metadata", | | |
| "package_id": "b283b5ec-82db-496b-b5e7-0e8cde168067", | | |
| "position": 0, | | |
| "resource_type": null, | | |
| "size": 626, | | |
| "state": "active", | | "state": "active", |
n | "url": | n | "vocabulary_id": null |
| resource/f2df2940-3b27-4e44-8508-83a3fb4a72d9/download/metadata.json", | | }, |
| "url_type": "upload" | | { |
| | | "display_name": "Text Categorization", |
| | | "id": "475e53da-3cb9-4954-ae2b-672fb146c8ee", |
| | | "name": "Text Categorization", |
| | | "state": "active", |
| | | "vocabulary_id": null |
| | | }, |
| | | { |
| | | "display_name": "Text Classification", |
| | | "id": "daa24bd4-9e49-4ef5-870a-f51681ab0a20", |
| | | "name": "Text Classification", |
| | | "state": "active", |
| | | "vocabulary_id": null |
| | | }, |
| | | { |
| | | "display_name": "document representation", |
| | | "id": "9439f346-f33f-4187-a3a7-a24918f80bb4", |
| | | "name": "document representation", |
| | | "state": "active", |
| | | "vocabulary_id": null |
| | | }, |
| | | { |
| | | "display_name": "newsgroups", |
| | | "id": "3a7aba89-6881-4691-a484-0d8566f60351", |
| | | "name": "newsgroups", |
| | | "state": "active", |
| | | "vocabulary_id": null |
| | | }, |
| | | { |
| | | "display_name": "text categorization", |
| | | "id": "de32a8d9-cbad-45c4-afe4-0f707d3a146b", |
| | | "name": "text categorization", |
| | | "state": "active", |
| | | "vocabulary_id": null |
| | | }, |
| | | { |
| | | "display_name": "text classification", |
| | | "id": "98d3b2df-ea96-4826-971b-6c5bf64c999d", |
| | | "name": "text classification", |
| | | "state": "active", |
| | | "vocabulary_id": null |
| | | }, |
| | | { |
| | | "display_name": "topic modeling", |
| | | "id": "6f8aa70c-b1e6-4882-a45f-64a53a8c16a9", |
| | | "name": "topic modeling", |
| | | "state": "active", |
| | | "vocabulary_id": null |
| } | | } |
| ], | | ], |
t | "services_used_list": "", | t | |
| "state": "active", | | |
| "tags": [], | | |
| "title": "20 Newsgroups Dataset", | | "title": "20 Newsgroups dataset", |
| "type": "dataset", | | "type": "dataset", |
| "version": "" | | "version": "" |
| } | | } |