Dataset Groups Activity Stream AstroMLab 1: Who Wins Astronomy Jeopardy!? A comprehensive evaluation of proprietary and open-weights large language models using the first astronomy-specific benchmarking dataset. BibTex: @dataset{Yuan-Sen_Ting_and_Tuan_Dung_Nguyen_and_Tirthankar_Ghosal_and_Rui_Pan_and_Hardik_Arora_and_Zechang_Sun_and_Tijmen_de_Haan_and_Nesar_Ramachandra_and_Azton_Wells_and_Sandeep_Madireddy_and_Alberto_Accomazzi_2024, abstract = {A comprehensive evaluation of proprietary and open-weights large language models using the first astronomy-specific benchmarking dataset.}, author = {Yuan-Sen Ting and Tuan Dung Nguyen and Tirthankar Ghosal and Rui Pan and Hardik Arora and Zechang Sun and Tijmen de Haan and Nesar Ramachandra and Azton Wells and Sandeep Madireddy and Alberto Accomazzi}, doi = {10.57702/0pvkq94j}, institution = {No Organization}, keyword = {'astronomy', 'benchmarking', 'large language models', 'question answering'}, month = {dec}, publisher = {TIB}, title = {AstroMLab 1: Who Wins Astronomy Jeopardy!?}, url = {https://service.tib.eu/ldmservice/dataset/astromlab-1--who-wins-astronomy-jeopardy--}, year = {2024} }