Dataset Groups Activity Stream DocBank DocBank consists of 500K document layouts by weak supervision of articles available on the arXiv.com. BibTex: @dataset{Minghao_Li_and_Yiheng_Xu_and_Lei_Cui_and_Shaohan_Huang_2024, abstract = {DocBank consists of 500K document layouts by weak supervision of articles available on the arXiv.com.}, author = {Minghao Li and Yiheng Xu and Lei Cui and Shaohan Huang}, doi = {10.57702/k9rwozlf}, institution = {No Organization}, keyword = {'DocBank', 'Document Layout Analysis', 'Document Layout Generation', 'benchmark', 'document layout analysis', 'fine-grained annotations'}, month = {dec}, publisher = {TIB}, title = {DocBank}, url = {https://service.tib.eu/ldmservice/dataset/docbank}, year = {2024} }