Dataset Groups Activity Stream URL pre-training dataset A dataset of 20 million unlabeled URLs for pre-training BibTex: @dataset{Ethan_M_Rudd_and_Ahmed_Abdallah_2024, abstract = {A dataset of 20 million unlabeled URLs for pre-training}, author = {Ethan M. Rudd and Ahmed Abdallah}, doi = {10.57702/5iceazvd}, institution = {No Organization}, keyword = {'Classification', 'Pre-training', 'URLs'}, month = {dec}, publisher = {TIB}, title = {URL pre-training dataset}, url = {https://service.tib.eu/ldmservice/dataset/url-pre-training-dataset}, year = {2024} }