Dataset Groups Activity Stream End-to-End Referring Video Object Segmentation with Multimodal Transformers The referring video object segmentation task (RVOS) involves segmentation of a text-referred object instance in the frames of a given video. BibTex: @dataset{Adam_Botach_and_Evgenii_Zheltonozhskii_and_Chaim_Baskin_2024, abstract = {The referring video object segmentation task (RVOS) involves segmentation of a text-referred object instance in the frames of a given video.}, author = {Adam Botach and Evgenii Zheltonozhskii and Chaim Baskin}, doi = {10.57702/48kufbtr}, institution = {No Organization}, keyword = {'Transformer-based approach', 'multimodal learning', 'video object segmentation'}, month = {dec}, publisher = {TIB}, title = {End-to-End Referring Video Object Segmentation with Multimodal Transformers}, url = {https://service.tib.eu/ldmservice/dataset/end-to-end-referring-video-object-segmentation-with-multimodal-transformers}, year = {2024} }