Dataset Groups Activity Stream Measuring Massive Multitask Language Understanding The dataset used in this paper is a multiple choice question set that allows for the evaluation of large language models. BibTex: @dataset{Reid_McIlroy-Young_and_Katrina_Brown_and_Conlan_Olson_and_Linjun_Zhang_and_Cynthia_Dwork_2024, abstract = {The dataset used in this paper is a multiple choice question set that allows for the evaluation of large language models.}, author = {Reid McIlroy-Young and Katrina Brown and Conlan Olson and Linjun Zhang and Cynthia Dwork}, doi = {10.57702/qxktk0p2}, institution = {No Organization}, keyword = {'language models', 'language understanding', 'multiple choice', 'question answering'}, month = {dec}, publisher = {TIB}, title = {Measuring Massive Multitask Language Understanding}, url = {https://service.tib.eu/ldmservice/dataset/measuring-massive-multitask-language-understanding}, year = {2024} }