Dataset Groups Activity Stream Web Synthetic Page Layout The dataset used for paragraph recognition in document images by spatial graph convolutional networks (GCN) applied on OCR text boxes. BibTex: @dataset{Renshen_Wang_and_Yasuhisa_Fujii_and_Ashok_Popat_2024, abstract = {The dataset used for paragraph recognition in document images by spatial graph convolutional networks (GCN) applied on OCR text boxes.}, author = {Renshen Wang and Yasuhisa Fujii and Ashok Popat}, doi = {10.57702/yli9t4bf}, institution = {No Organization}, keyword = {'GCN', 'OCR', 'Web Scraping', 'document image understanding', 'paragraph recognition'}, month = {dec}, publisher = {TIB}, title = {Web Synthetic Page Layout}, url = {https://service.tib.eu/ldmservice/dataset/web-synthetic-page-layout}, year = {2024} }