Dataset Groups Activity Stream TREC05 spam corpus The dataset used in the paper is the TREC05 spam corpus, which contains 39,999 real ham and 52,790 spam emails. BibTex: @dataset{Xi_Li_and_David_J_Miller_and_Zhen_Xiang_and_George_Kesidis_2024, abstract = {The dataset used in the paper is the TREC05 spam corpus, which contains 39,999 real ham and 52,790 spam emails.}, author = {Xi Li and David J. Miller and Zhen Xiang and George Kesidis}, doi = {10.57702/wl6lxxr4}, institution = {No Organization}, keyword = {'ham', 'spam', 'text classification'}, month = {dec}, publisher = {TIB}, title = {TREC05 spam corpus}, url = {https://service.tib.eu/ldmservice/dataset/trec05-spam-corpus}, year = {2024} }