Changes
On August 4, 2023 at 8:46:18 AM UTC, admin:
-
Set author of A Neural Approach for Text Extraction from Scholarly Figures to David Morris (previously David Morris, Peichen Tang, and Ralph Ewerth)
f | 1 | { | f | 1 | { |
n | 2 | "author": "David Morris, Peichen Tang, and Ralph Ewerth", | n | 2 | "author": "David Morris", |
3 | "author_email": "", | 3 | "author_email": "", | ||
4 | "creator_user_id": "17755db4-395a-4b3b-ac09-e8e3484ca700", | 4 | "creator_user_id": "17755db4-395a-4b3b-ac09-e8e3484ca700", | ||
5 | "doi": "10.25835/0030443", | 5 | "doi": "10.25835/0030443", | ||
6 | "doi_date_published": "2019-06-27", | 6 | "doi_date_published": "2019-06-27", | ||
7 | "doi_publisher": "LUIS", | 7 | "doi_publisher": "LUIS", | ||
8 | "doi_status": "true", | 8 | "doi_status": "true", | ||
9 | "domain": "https://data.uni-hannover.de", | 9 | "domain": "https://data.uni-hannover.de", | ||
n | n | 10 | "extra_authors": [ | ||
11 | { | ||||
12 | "extra_author": " Peichen Tang" | ||||
13 | }, | ||||
14 | { | ||||
15 | "extra_author": " and Ralph Ewerth" | ||||
16 | } | ||||
17 | ], | ||||
10 | "groups": [], | 18 | "groups": [], | ||
11 | "have_copyright": "Yes", | 19 | "have_copyright": "Yes", | ||
12 | "id": "ef96d2db-9cbe-4738-b7bf-da556e12bdc2", | 20 | "id": "ef96d2db-9cbe-4738-b7bf-da556e12bdc2", | ||
13 | "isopen": false, | 21 | "isopen": false, | ||
14 | "license_id": "CC-BY-3.0", | 22 | "license_id": "CC-BY-3.0", | ||
15 | "license_title": "CC-BY-3.0", | 23 | "license_title": "CC-BY-3.0", | ||
16 | "maintainer": "David Morris, Peichen Tang, and Ralph Ewerth", | 24 | "maintainer": "David Morris, Peichen Tang, and Ralph Ewerth", | ||
17 | "maintainer_email": "", | 25 | "maintainer_email": "", | ||
18 | "metadata_created": "2021-10-14T10:15:52.746653", | 26 | "metadata_created": "2021-10-14T10:15:52.746653", | ||
n | 19 | "metadata_modified": "2023-01-12T13:14:06.805432", | n | 27 | "metadata_modified": "2023-08-04T08:46:18.444285", |
20 | "name": | 28 | "name": | ||
21 | "luh-a-neural-approach-for-text-extraction-from-scholarly-figures", | 29 | "luh-a-neural-approach-for-text-extraction-from-scholarly-figures", | ||
22 | "notes": "# A Neural Approach for Text Extraction from Scholarly | 30 | "notes": "# A Neural Approach for Text Extraction from Scholarly | ||
23 | Figures\r\nThis is the readme for the supplemental data for our ICDAR | 31 | Figures\r\nThis is the readme for the supplemental data for our ICDAR | ||
24 | 2019 paper.\r\n\r\nYou can read our paper via IEEE here: | 32 | 2019 paper.\r\n\r\nYou can read our paper via IEEE here: | ||
25 | https://ieeexplore.ieee.org/document/8978202\r\n\r\nIf you found this | 33 | https://ieeexplore.ieee.org/document/8978202\r\n\r\nIf you found this | ||
26 | dataset useful, please consider citing our | 34 | dataset useful, please consider citing our | ||
27 | paper:\r\n\r\n\t@inproceedings{DBLP:conf/icdar/MorrisTE19,\r\n\t | 35 | paper:\r\n\r\n\t@inproceedings{DBLP:conf/icdar/MorrisTE19,\r\n\t | ||
28 | author = {David Morris and\r\n\t\t\t\t Peichen Tang | 36 | author = {David Morris and\r\n\t\t\t\t Peichen Tang | ||
29 | and\r\n\t\t\t\t Ralph Ewerth},\r\n\t title = {A Neural Approach | 37 | and\r\n\t\t\t\t Ralph Ewerth},\r\n\t title = {A Neural Approach | ||
30 | for Text Extraction from Scholarly Figures},\r\n\t booktitle = {2019 | 38 | for Text Extraction from Scholarly Figures},\r\n\t booktitle = {2019 | ||
31 | International Conference on Document Analysis and | 39 | International Conference on Document Analysis and | ||
32 | Recognition,\r\n\t\t\t\t {ICDAR} 2019, Sydney, Australia, September | 40 | Recognition,\r\n\t\t\t\t {ICDAR} 2019, Sydney, Australia, September | ||
33 | 20-25, 2019},\r\n\t pages = {1438--1443},\r\n\t publisher = | 41 | 20-25, 2019},\r\n\t pages = {1438--1443},\r\n\t publisher = | ||
34 | {{IEEE}},\r\n\t year = {2019},\r\n\t url = | 42 | {{IEEE}},\r\n\t year = {2019},\r\n\t url = | ||
35 | {https://doi.org/10.1109/ICDAR.2019.00231},\r\n\t doi = | 43 | {https://doi.org/10.1109/ICDAR.2019.00231},\r\n\t doi = | ||
36 | {10.1109/ICDAR.2019.00231},\r\n\t timestamp = {Tue, 04 Feb 2020 | 44 | {10.1109/ICDAR.2019.00231},\r\n\t timestamp = {Tue, 04 Feb 2020 | ||
37 | 13:28:39 +0100},\r\n\t biburl = | 45 | 13:28:39 +0100},\r\n\t biburl = | ||
38 | {https://dblp.org/rec/conf/icdar/MorrisTE19.bib},\r\n\t bibsource = | 46 | {https://dblp.org/rec/conf/icdar/MorrisTE19.bib},\r\n\t bibsource = | ||
39 | {dblp computer science bibliography, | 47 | {dblp computer science bibliography, | ||
40 | https://dblp.org}\r\n\t}\r\n\r\nThis work was financially supported by | 48 | https://dblp.org}\r\n\t}\r\n\r\nThis work was financially supported by | ||
41 | the German Federal Ministry of Education and Research (BMBF) and | 49 | the German Federal Ministry of Education and Research (BMBF) and | ||
42 | European Social Fund (ESF) (InclusiveOCW project, no. | 50 | European Social Fund (ESF) (InclusiveOCW project, no. | ||
43 | 01PE17004).\r\n## Datasets\r\nWe used different sources of data for | 51 | 01PE17004).\r\n## Datasets\r\nWe used different sources of data for | ||
44 | testing, validation, and training. Our testing set was assembled by | 52 | testing, validation, and training. Our testing set was assembled by | ||
45 | the work we cited by B\u00f6schen et al. We excluded the DeGruyter | 53 | the work we cited by B\u00f6schen et al. We excluded the DeGruyter | ||
46 | dataset, and use it as our validation dataset.\r\n### Testing\r\nThese | 54 | dataset, and use it as our validation dataset.\r\n### Testing\r\nThese | ||
47 | datasets contain a readme with license information. Further | 55 | datasets contain a readme with license information. Further | ||
48 | information about the associated project can be found in the authors' | 56 | information about the associated project can be found in the authors' | ||
49 | published work we cited: | 57 | published work we cited: | ||
50 | https://doi.org/10.1007/978-3-319-51811-4_2\r\n### Validation\r\nThe | 58 | https://doi.org/10.1007/978-3-319-51811-4_2\r\n### Validation\r\nThe | ||
51 | DeGruyter dataset does not include the labeled images due to license | 59 | DeGruyter dataset does not include the labeled images due to license | ||
52 | restrictions. As of writing, the images can still be downloaded from | 60 | restrictions. As of writing, the images can still be downloaded from | ||
53 | DeGruyter via the links in the readme. Note that depending on what | 61 | DeGruyter via the links in the readme. Note that depending on what | ||
54 | program you use to strip the images out of the PDF they are provided | 62 | program you use to strip the images out of the PDF they are provided | ||
55 | in, you may have to re-number the images.\r\n### Training\r\nWe used | 63 | in, you may have to re-number the images.\r\n### Training\r\nWe used | ||
56 | [label_generator](https://github.com/domoritz/label_generator)'s | 64 | [label_generator](https://github.com/domoritz/label_generator)'s | ||
57 | generated dataset, which the author made available on a requester-pays | 65 | generated dataset, which the author made available on a requester-pays | ||
58 | [amazon s3 bucket](s3://escience.washington.edu.viziometrics).\r\nWe | 66 | [amazon s3 bucket](s3://escience.washington.edu.viziometrics).\r\nWe | ||
59 | also used the Multi-Type Web Images dataset, which is mirrored | 67 | also used the Multi-Type Web Images dataset, which is mirrored | ||
60 | roduction.htm?spm=5176.100066.0.0.3bcad780oQ9Ce4&raceId=231651).\r\n## | 68 | roduction.htm?spm=5176.100066.0.0.3bcad780oQ9Ce4&raceId=231651).\r\n## | ||
61 | Code\r\nWe have made our code available in `code.zip`. We will upload | 69 | Code\r\nWe have made our code available in `code.zip`. We will upload | ||
62 | code, announce further news, and field questions via the [github | 70 | code, announce further news, and field questions via the [github | ||
63 | repo](https://github.com/david-morris/Neural-Figure-Text).\r\n\r\nOur | 71 | repo](https://github.com/david-morris/Neural-Figure-Text).\r\n\r\nOur | ||
64 | text detection network is adapted from [Argman's EAST | 72 | text detection network is adapted from [Argman's EAST | ||
65 | implementation](https://github.com/argman/EAST). The | 73 | implementation](https://github.com/argman/EAST). The | ||
66 | `EAST/checkpoints/ours` subdirectory contains the trained weights we | 74 | `EAST/checkpoints/ours` subdirectory contains the trained weights we | ||
67 | used in the paper.\r\n\r\nWe used a tesseract script to run text | 75 | used in the paper.\r\n\r\nWe used a tesseract script to run text | ||
68 | extraction from detected text rows. This is inside our code `code.tar` | 76 | extraction from detected text rows. This is inside our code `code.tar` | ||
69 | as `text_recognition_multipro.py`.\r\n\r\nWe used a java script | 77 | as `text_recognition_multipro.py`.\r\n\r\nWe used a java script | ||
70 | provided by Falk B\u00f6schen and adapted to our file structure. We | 78 | provided by Falk B\u00f6schen and adapted to our file structure. We | ||
71 | included this as `evaluator.jar`.\r\n\r\nParameter sweeps are | 79 | included this as `evaluator.jar`.\r\n\r\nParameter sweeps are | ||
72 | automated by `param_sweep.rb`. This file also shows how to invoke all | 80 | automated by `param_sweep.rb`. This file also shows how to invoke all | ||
73 | of these components.", | 81 | of these components.", | ||
74 | "num_resources": 1, | 82 | "num_resources": 1, | ||
75 | "num_tags": 3, | 83 | "num_tags": 3, | ||
76 | "organization": { | 84 | "organization": { | ||
77 | "approval_status": "approved", | 85 | "approval_status": "approved", | ||
78 | "created": "2017-11-23T17:30:37.757128", | 86 | "created": "2017-11-23T17:30:37.757128", | ||
79 | "description": "The German National Library of Science and | 87 | "description": "The German National Library of Science and | ||
80 | Technology, abbreviated TIB, is the national library of the Federal | 88 | Technology, abbreviated TIB, is the national library of the Federal | ||
81 | Republic of Germany for all fields of engineering, technology, and the | 89 | Republic of Germany for all fields of engineering, technology, and the | ||
82 | natural sciences.", | 90 | natural sciences.", | ||
83 | "id": "0c5362f5-b99e-41db-8256-3d0d7549bf4d", | 91 | "id": "0c5362f5-b99e-41db-8256-3d0d7549bf4d", | ||
84 | "image_url": | 92 | "image_url": | ||
85 | 3conf/ext/tib_tmpl_bootstrap/Resources/Public/images/TIB_Logo_en.png", | 93 | 3conf/ext/tib_tmpl_bootstrap/Resources/Public/images/TIB_Logo_en.png", | ||
86 | "is_organization": true, | 94 | "is_organization": true, | ||
87 | "name": "tib", | 95 | "name": "tib", | ||
88 | "state": "active", | 96 | "state": "active", | ||
89 | "title": "TIB", | 97 | "title": "TIB", | ||
90 | "type": "organization" | 98 | "type": "organization" | ||
91 | }, | 99 | }, | ||
92 | "owner_org": "0c5362f5-b99e-41db-8256-3d0d7549bf4d", | 100 | "owner_org": "0c5362f5-b99e-41db-8256-3d0d7549bf4d", | ||
93 | "private": false, | 101 | "private": false, | ||
94 | "relationships_as_object": [], | 102 | "relationships_as_object": [], | ||
95 | "relationships_as_subject": [], | 103 | "relationships_as_subject": [], | ||
96 | "repository_name": "Leibniz University Hannover", | 104 | "repository_name": "Leibniz University Hannover", | ||
97 | "resources": [ | 105 | "resources": [ | ||
98 | { | 106 | { | ||
99 | "cache_last_updated": null, | 107 | "cache_last_updated": null, | ||
100 | "cache_url": null, | 108 | "cache_url": null, | ||
101 | "created": "2019-06-27T16:35:58.670413", | 109 | "created": "2019-06-27T16:35:58.670413", | ||
102 | "description": "", | 110 | "description": "", | ||
103 | "format": "ZIP", | 111 | "format": "ZIP", | ||
104 | "hash": "", | 112 | "hash": "", | ||
105 | "id": "106f7149-6161-4117-893a-44990c05cbe4", | 113 | "id": "106f7149-6161-4117-893a-44990c05cbe4", | ||
106 | "last_modified": "2019-06-27T16:35:58.612669", | 114 | "last_modified": "2019-06-27T16:35:58.612669", | ||
n | 107 | "metadata_modified": "2023-01-12T13:14:06.809141", | n | 115 | "metadata_modified": "2023-08-04T08:46:18.451830", |
108 | "mimetype": "application/zip", | 116 | "mimetype": "application/zip", | ||
109 | "mimetype_inner": null, | 117 | "mimetype_inner": null, | ||
110 | "name": "code.zip", | 118 | "name": "code.zip", | ||
111 | "package_id": "ef96d2db-9cbe-4738-b7bf-da556e12bdc2", | 119 | "package_id": "ef96d2db-9cbe-4738-b7bf-da556e12bdc2", | ||
112 | "position": 0, | 120 | "position": 0, | ||
113 | "resource_type": null, | 121 | "resource_type": null, | ||
114 | "size": 798357692, | 122 | "size": 798357692, | ||
115 | "state": "active", | 123 | "state": "active", | ||
116 | "url": | 124 | "url": | ||
117 | bdc2/resource/106f7149-6161-4117-893a-44990c05cbe4/download/code.zip", | 125 | bdc2/resource/106f7149-6161-4117-893a-44990c05cbe4/download/code.zip", | ||
118 | "url_type": "" | 126 | "url_type": "" | ||
119 | } | 127 | } | ||
120 | ], | 128 | ], | ||
t | t | 129 | "services_used_list": "", | ||
121 | "source_metadata_created": "2019-06-27T16:29:02.921892", | 130 | "source_metadata_created": "2019-06-27T16:29:02.921892", | ||
122 | "source_metadata_modified": "2022-01-20T13:48:50.142253", | 131 | "source_metadata_modified": "2022-01-20T13:48:50.142253", | ||
123 | "state": "active", | 132 | "state": "active", | ||
124 | "tags": [ | 133 | "tags": [ | ||
125 | { | 134 | { | ||
126 | "display_name": "computer vision", | 135 | "display_name": "computer vision", | ||
127 | "id": "f650b4e3-9955-49b0-ba7b-2d302a990978", | 136 | "id": "f650b4e3-9955-49b0-ba7b-2d302a990978", | ||
128 | "name": "computer vision", | 137 | "name": "computer vision", | ||
129 | "state": "active", | 138 | "state": "active", | ||
130 | "vocabulary_id": null | 139 | "vocabulary_id": null | ||
131 | }, | 140 | }, | ||
132 | { | 141 | { | ||
133 | "display_name": "document analysis", | 142 | "display_name": "document analysis", | ||
134 | "id": "fd068cc5-5d26-4f2e-a92f-a60ef2c33839", | 143 | "id": "fd068cc5-5d26-4f2e-a92f-a60ef2c33839", | ||
135 | "name": "document analysis", | 144 | "name": "document analysis", | ||
136 | "state": "active", | 145 | "state": "active", | ||
137 | "vocabulary_id": null | 146 | "vocabulary_id": null | ||
138 | }, | 147 | }, | ||
139 | { | 148 | { | ||
140 | "display_name": "machine learning", | 149 | "display_name": "machine learning", | ||
141 | "id": "9e42784b-6ee7-47e8-a69a-28b8c510212b", | 150 | "id": "9e42784b-6ee7-47e8-a69a-28b8c510212b", | ||
142 | "name": "machine learning", | 151 | "name": "machine learning", | ||
143 | "state": "active", | 152 | "state": "active", | ||
144 | "vocabulary_id": null | 153 | "vocabulary_id": null | ||
145 | } | 154 | } | ||
146 | ], | 155 | ], | ||
147 | "terms_of_usage": "Yes", | 156 | "terms_of_usage": "Yes", | ||
148 | "title": "A Neural Approach for Text Extraction from Scholarly | 157 | "title": "A Neural Approach for Text Extraction from Scholarly | ||
149 | Figures", | 158 | Figures", | ||
150 | "type": "vdataset", | 159 | "type": "vdataset", | ||
151 | "url": | 160 | "url": | ||
152 | dataset/a-neural-approach-for-text-extraction-from-scholarly-figures", | 161 | dataset/a-neural-approach-for-text-extraction-from-scholarly-figures", | ||
153 | "version": "" | 162 | "version": "" | ||
154 | } | 163 | } |