Dataset Groups Activity Stream LAION-Aesthetic-3M The dataset used for training the prior model, containing 2M text-image pairs and 2M audio-visual pairs. BibTex: @dataset{Shufan_Li_and_Harkanwar_Singh_and_Aditya_Grover_2024, abstract = {The dataset used for training the prior model, containing 2M text-image pairs and 2M audio-visual pairs.}, author = {Shufan Li and Harkanwar Singh and Aditya Grover}, doi = {10.57702/3er1dhdi}, institution = {No Organization}, keyword = {'audio-visual alignment', 'dataset', 'image-text alignment'}, month = {dec}, publisher = {TIB}, title = {LAION-Aesthetic-3M}, url = {https://service.tib.eu/ldmservice/dataset/laion-aesthetic-3m}, year = {2024} }