Changes
On December 16, 2024 at 8:39:40 PM UTC, admin:
-
Changed value of field
doi_status
toTrue
in CORU: Comprehensive Post-OCR Parsing and Receipt Understanding Dataset -
Changed value of field
doi_date_published
to2024-12-16
in CORU: Comprehensive Post-OCR Parsing and Receipt Understanding Dataset -
Added resource Original Metadata to CORU: Comprehensive Post-OCR Parsing and Receipt Understanding Dataset
f | 1 | { | f | 1 | { |
2 | "access_rights": "", | 2 | "access_rights": "", | ||
3 | "author": "Abdelrahman Abdallah", | 3 | "author": "Abdelrahman Abdallah", | ||
4 | "author_email": "", | 4 | "author_email": "", | ||
5 | "citation": [], | 5 | "citation": [], | ||
6 | "creator_user_id": "17755db4-395a-4b3b-ac09-e8e3484ca700", | 6 | "creator_user_id": "17755db4-395a-4b3b-ac09-e8e3484ca700", | ||
7 | "defined_in": "https://doi.org/10.48550/arXiv.2406.04493", | 7 | "defined_in": "https://doi.org/10.48550/arXiv.2406.04493", | ||
8 | "doi": "10.57702/qf25gc3z", | 8 | "doi": "10.57702/qf25gc3z", | ||
n | 9 | "doi_date_published": null, | n | 9 | "doi_date_published": "2024-12-16", |
10 | "doi_publisher": "TIB", | 10 | "doi_publisher": "TIB", | ||
n | 11 | "doi_status": false, | n | 11 | "doi_status": true, |
12 | "domain": "https://service.tib.eu/ldmservice", | 12 | "domain": "https://service.tib.eu/ldmservice", | ||
13 | "extra_authors": [ | 13 | "extra_authors": [ | ||
14 | { | 14 | { | ||
15 | "extra_author": "Mahmoud Abdalla", | 15 | "extra_author": "Mahmoud Abdalla", | ||
16 | "orcid": "" | 16 | "orcid": "" | ||
17 | }, | 17 | }, | ||
18 | { | 18 | { | ||
19 | "extra_author": "Mahmoud SalahEldin Kasem", | 19 | "extra_author": "Mahmoud SalahEldin Kasem", | ||
20 | "orcid": "" | 20 | "orcid": "" | ||
21 | }, | 21 | }, | ||
22 | { | 22 | { | ||
23 | "extra_author": "Mohamed Mahmoud", | 23 | "extra_author": "Mohamed Mahmoud", | ||
24 | "orcid": "" | 24 | "orcid": "" | ||
25 | }, | 25 | }, | ||
26 | { | 26 | { | ||
27 | "extra_author": "Ibrahim Abdelhalim", | 27 | "extra_author": "Ibrahim Abdelhalim", | ||
28 | "orcid": "" | 28 | "orcid": "" | ||
29 | }, | 29 | }, | ||
30 | { | 30 | { | ||
31 | "extra_author": "Mohamed Elkasaby", | 31 | "extra_author": "Mohamed Elkasaby", | ||
32 | "orcid": "" | 32 | "orcid": "" | ||
33 | }, | 33 | }, | ||
34 | { | 34 | { | ||
35 | "extra_author": "Yasser ElBendary", | 35 | "extra_author": "Yasser ElBendary", | ||
36 | "orcid": "" | 36 | "orcid": "" | ||
37 | }, | 37 | }, | ||
38 | { | 38 | { | ||
39 | "extra_author": "Adam Jatowt", | 39 | "extra_author": "Adam Jatowt", | ||
40 | "orcid": "" | 40 | "orcid": "" | ||
41 | } | 41 | } | ||
42 | ], | 42 | ], | ||
43 | "groups": [ | 43 | "groups": [ | ||
44 | { | 44 | { | ||
45 | "description": "", | 45 | "description": "", | ||
46 | "display_name": "Document Analysis", | 46 | "display_name": "Document Analysis", | ||
47 | "id": "196fab6b-b4cc-4bd7-b2c7-bf70a3acecf5", | 47 | "id": "196fab6b-b4cc-4bd7-b2c7-bf70a3acecf5", | ||
48 | "image_display_url": "", | 48 | "image_display_url": "", | ||
49 | "name": "document-analysis", | 49 | "name": "document-analysis", | ||
50 | "title": "Document Analysis" | 50 | "title": "Document Analysis" | ||
51 | }, | 51 | }, | ||
52 | { | 52 | { | ||
53 | "description": "", | 53 | "description": "", | ||
54 | "display_name": "Natural Language Processing", | 54 | "display_name": "Natural Language Processing", | ||
55 | "id": "5b974bcc-8f79-40fc-a05d-75b861d55ed0", | 55 | "id": "5b974bcc-8f79-40fc-a05d-75b861d55ed0", | ||
56 | "image_display_url": "", | 56 | "image_display_url": "", | ||
57 | "name": "natural-language-processing", | 57 | "name": "natural-language-processing", | ||
58 | "title": "Natural Language Processing" | 58 | "title": "Natural Language Processing" | ||
59 | }, | 59 | }, | ||
60 | { | 60 | { | ||
61 | "description": "", | 61 | "description": "", | ||
62 | "display_name": "Optical Character Recognition", | 62 | "display_name": "Optical Character Recognition", | ||
63 | "id": "6df40726-e0c9-4592-a311-1bfd3eb18438", | 63 | "id": "6df40726-e0c9-4592-a311-1bfd3eb18438", | ||
64 | "image_display_url": "", | 64 | "image_display_url": "", | ||
65 | "name": "optical-character-recognition", | 65 | "name": "optical-character-recognition", | ||
66 | "title": "Optical Character Recognition" | 66 | "title": "Optical Character Recognition" | ||
67 | } | 67 | } | ||
68 | ], | 68 | ], | ||
69 | "id": "b181ea24-53ce-4317-9081-f271985ae8a1", | 69 | "id": "b181ea24-53ce-4317-9081-f271985ae8a1", | ||
70 | "isopen": false, | 70 | "isopen": false, | ||
71 | "landing_page": | 71 | "landing_page": | ||
72 | "https://github.com/Update-For-Integrated-Business-AI/CORU", | 72 | "https://github.com/Update-For-Integrated-Business-AI/CORU", | ||
73 | "license_title": null, | 73 | "license_title": null, | ||
74 | "link_orkg": "", | 74 | "link_orkg": "", | ||
75 | "metadata_created": "2024-12-16T20:39:39.241942", | 75 | "metadata_created": "2024-12-16T20:39:39.241942", | ||
n | 76 | "metadata_modified": "2024-12-16T20:39:39.241947", | n | 76 | "metadata_modified": "2024-12-16T20:39:39.642976", |
77 | "name": | 77 | "name": | ||
78 | ru--comprehensive-post-ocr-parsing-and-receipt-understanding-dataset", | 78 | ru--comprehensive-post-ocr-parsing-and-receipt-understanding-dataset", | ||
79 | "notes": "A comprehensive dataset for post-OCR parsing and receipt | 79 | "notes": "A comprehensive dataset for post-OCR parsing and receipt | ||
80 | understanding, specifically designed to enhance OCR and information | 80 | understanding, specifically designed to enhance OCR and information | ||
81 | extraction from receipts in multilingual contexts involving Arabic and | 81 | extraction from receipts in multilingual contexts involving Arabic and | ||
82 | English.", | 82 | English.", | ||
n | 83 | "num_resources": 0, | n | 83 | "num_resources": 1, |
84 | "num_tags": 4, | 84 | "num_tags": 4, | ||
85 | "organization": { | 85 | "organization": { | ||
86 | "approval_status": "approved", | 86 | "approval_status": "approved", | ||
87 | "created": "2024-11-25T12:11:38.292601", | 87 | "created": "2024-11-25T12:11:38.292601", | ||
88 | "description": "", | 88 | "description": "", | ||
89 | "id": "079d46db-32df-4b48-91f3-0a8bc8f69559", | 89 | "id": "079d46db-32df-4b48-91f3-0a8bc8f69559", | ||
90 | "image_url": "", | 90 | "image_url": "", | ||
91 | "is_organization": true, | 91 | "is_organization": true, | ||
92 | "name": "no-organization", | 92 | "name": "no-organization", | ||
93 | "state": "active", | 93 | "state": "active", | ||
94 | "title": "No Organization", | 94 | "title": "No Organization", | ||
95 | "type": "organization" | 95 | "type": "organization" | ||
96 | }, | 96 | }, | ||
97 | "owner_org": "079d46db-32df-4b48-91f3-0a8bc8f69559", | 97 | "owner_org": "079d46db-32df-4b48-91f3-0a8bc8f69559", | ||
98 | "private": false, | 98 | "private": false, | ||
99 | "relationships_as_object": [], | 99 | "relationships_as_object": [], | ||
100 | "relationships_as_subject": [], | 100 | "relationships_as_subject": [], | ||
t | 101 | "resources": [], | t | 101 | "resources": [ |
102 | { | ||||
103 | "cache_last_updated": null, | ||||
104 | "cache_url": null, | ||||
105 | "created": "2024-12-16T18:25:46", | ||||
106 | "data": [ | ||||
107 | "dcterms:title", | ||||
108 | "dcterms:accessRights", | ||||
109 | "dcterms:creator", | ||||
110 | "dcterms:description", | ||||
111 | "dcterms:issued", | ||||
112 | "dcterms:language", | ||||
113 | "dcterms:identifier", | ||||
114 | "dcat:theme", | ||||
115 | "dcterms:type", | ||||
116 | "dcat:keyword", | ||||
117 | "dcat:landingPage", | ||||
118 | "dcterms:hasVersion", | ||||
119 | "dcterms:format", | ||||
120 | "mls:task", | ||||
121 | "datacite:isDescribedBy" | ||||
122 | ], | ||||
123 | "description": "The json representation of the dataset with its | ||||
124 | distributions based on DCAT.", | ||||
125 | "format": "JSON", | ||||
126 | "hash": "", | ||||
127 | "id": "c2ebaeb0-ee85-4a69-8b6c-09ae1e2a5470", | ||||
128 | "last_modified": "2024-12-16T20:39:39.635557", | ||||
129 | "metadata_modified": "2024-12-16T20:39:39.645778", | ||||
130 | "mimetype": "application/json", | ||||
131 | "mimetype_inner": null, | ||||
132 | "name": "Original Metadata", | ||||
133 | "package_id": "b181ea24-53ce-4317-9081-f271985ae8a1", | ||||
134 | "position": 0, | ||||
135 | "resource_type": null, | ||||
136 | "size": 1202, | ||||
137 | "state": "active", | ||||
138 | "url": | ||||
139 | resource/c2ebaeb0-ee85-4a69-8b6c-09ae1e2a5470/download/metadata.json", | ||||
140 | "url_type": "upload" | ||||
141 | } | ||||
142 | ], | ||||
102 | "services_used_list": "", | 143 | "services_used_list": "", | ||
103 | "state": "active", | 144 | "state": "active", | ||
104 | "tags": [ | 145 | "tags": [ | ||
105 | { | 146 | { | ||
106 | "display_name": "document analysis", | 147 | "display_name": "document analysis", | ||
107 | "id": "fd068cc5-5d26-4f2e-a92f-a60ef2c33839", | 148 | "id": "fd068cc5-5d26-4f2e-a92f-a60ef2c33839", | ||
108 | "name": "document analysis", | 149 | "name": "document analysis", | ||
109 | "state": "active", | 150 | "state": "active", | ||
110 | "vocabulary_id": null | 151 | "vocabulary_id": null | ||
111 | }, | 152 | }, | ||
112 | { | 153 | { | ||
113 | "display_name": "multilingual OCR", | 154 | "display_name": "multilingual OCR", | ||
114 | "id": "c22c83c1-8fce-4539-9a64-7acf742d7e47", | 155 | "id": "c22c83c1-8fce-4539-9a64-7acf742d7e47", | ||
115 | "name": "multilingual OCR", | 156 | "name": "multilingual OCR", | ||
116 | "state": "active", | 157 | "state": "active", | ||
117 | "vocabulary_id": null | 158 | "vocabulary_id": null | ||
118 | }, | 159 | }, | ||
119 | { | 160 | { | ||
120 | "display_name": "post-OCR parsing", | 161 | "display_name": "post-OCR parsing", | ||
121 | "id": "6084ae90-24f1-438d-b200-72f25750cfa8", | 162 | "id": "6084ae90-24f1-438d-b200-72f25750cfa8", | ||
122 | "name": "post-OCR parsing", | 163 | "name": "post-OCR parsing", | ||
123 | "state": "active", | 164 | "state": "active", | ||
124 | "vocabulary_id": null | 165 | "vocabulary_id": null | ||
125 | }, | 166 | }, | ||
126 | { | 167 | { | ||
127 | "display_name": "receipt understanding", | 168 | "display_name": "receipt understanding", | ||
128 | "id": "6838fed2-37f9-441c-98c1-281e869ae954", | 169 | "id": "6838fed2-37f9-441c-98c1-281e869ae954", | ||
129 | "name": "receipt understanding", | 170 | "name": "receipt understanding", | ||
130 | "state": "active", | 171 | "state": "active", | ||
131 | "vocabulary_id": null | 172 | "vocabulary_id": null | ||
132 | } | 173 | } | ||
133 | ], | 174 | ], | ||
134 | "title": "CORU: Comprehensive Post-OCR Parsing and Receipt | 175 | "title": "CORU: Comprehensive Post-OCR Parsing and Receipt | ||
135 | Understanding Dataset", | 176 | Understanding Dataset", | ||
136 | "type": "dataset", | 177 | "type": "dataset", | ||
137 | "version": "" | 178 | "version": "" | ||
138 | } | 179 | } |