Dataset Groups Activity Stream Are Larger Pretrained Language Models Uniformly Better? Comparing Performance at the Instance Level Larger language models have higher accu- racy on average, but are they better on ev- ery single instance (datapoint)? BibTex: @dataset{Ruiqi_Zhong_and_Dhruba_Ghosh_and_Dan_Klein_and_Jacob_Steinhardt_2024, abstract = {Larger language models have higher accu- racy on average, but are they better on ev- ery single instance (datapoint)?}, author = {Ruiqi Zhong and Dhruba Ghosh and Dan Klein and Jacob Steinhardt}, doi = {10.57702/34bmylpg}, institution = {No Organization}, keyword = {'instance-level accuracy', 'language models', 'pretrained language models'}, month = {dec}, publisher = {TIB}, title = {Are Larger Pretrained Language Models Uniformly Better? Comparing Performance at the Instance Level}, url = {https://service.tib.eu/ldmservice/dataset/are-larger-pretrained-language-models-uniformly-better--comparing-performance-at-the-instance-level}, year = {2024} }