Dataset Groups Activity Stream Finance and Academic Papers The dataset used in the paper is a mixture of general corpus and domain-specific corpus, with a power-law relationship between loss, mixture ratio, and training tokens scale. BibTex: @dataset{Jiawei_Gu_and_Zacc_Yang_and_Chuanghao_Ding_and_Rui_Zhao_and_Fei_Tan_2025, abstract = {The dataset used in the paper is a mixture of general corpus and domain-specific corpus, with a power-law relationship between loss, mixture ratio, and training tokens scale.}, author = {Jiawei Gu and Zacc Yang and Chuanghao Ding and Rui Zhao and Fei Tan}, doi = {10.57702/t60tevy0}, institution = {No Organization}, keyword = {'continual pre-training', 'large language models', 'mixture ratio', 'power-law', 'training tokens'}, month = {jan}, publisher = {TIB}, title = {Finance and Academic Papers}, url = {https://service.tib.eu/ldmservice/dataset/finance-and-academic-papers}, year = {2025} }