f | { | f | { |
| "access_rights": "", | | "access_rights": "", |
n | "author": "Soravit Changpinyo", | n | "author": "Soravit Changpino", |
| "author_email": "", | | "author_email": "", |
n | "citation": [ | n | "citation": [], |
| "https://doi.org/10.48550/arXiv.2405.16759", | | |
| "https://doi.org/10.48550/arXiv.2306.03168", | | |
| "https://doi.org/10.48550/arXiv.2403.12037" | | |
| ], | | |
| "creator_user_id": "17755db4-395a-4b3b-ac09-e8e3484ca700", | | "creator_user_id": "17755db4-395a-4b3b-ac09-e8e3484ca700", |
n | "defined_in": "https://doi.org/10.48550/arXiv.2304.08480", | n | "defined_in": "https://doi.org/10.48550/arXiv.2203.13131", |
| "doi": "10.57702/8ihuxai8", | | "doi": "10.57702/8ihuxai8", |
| "doi_date_published": "2024-12-02", | | "doi_date_published": "2024-12-02", |
| "doi_publisher": "TIB", | | "doi_publisher": "TIB", |
| "doi_status": true, | | "doi_status": true, |
| "domain": "https://service.tib.eu/ldmservice", | | "domain": "https://service.tib.eu/ldmservice", |
| "extra_authors": [ | | "extra_authors": [ |
| { | | { |
| "extra_author": "Piyush Sharma", | | "extra_author": "Piyush Sharma", |
| "orcid": "" | | "orcid": "" |
| }, | | }, |
| { | | { |
| "extra_author": "Nan Ding", | | "extra_author": "Nan Ding", |
| "orcid": "" | | "orcid": "" |
| }, | | }, |
| { | | { |
| "extra_author": "Radu Soricut", | | "extra_author": "Radu Soricut", |
| "orcid": "" | | "orcid": "" |
| } | | } |
| ], | | ], |
| "groups": [ | | "groups": [ |
| { | | { |
| "description": "", | | "description": "", |
| "display_name": "Image Captioning", | | "display_name": "Image Captioning", |
| "id": "7a76ce67-2607-4da9-a837-d2017dc33ec6", | | "id": "7a76ce67-2607-4da9-a837-d2017dc33ec6", |
| "image_display_url": "", | | "image_display_url": "", |
| "name": "image-captioning", | | "name": "image-captioning", |
| "title": "Image Captioning" | | "title": "Image Captioning" |
| }, | | }, |
| { | | { |
| "description": "", | | "description": "", |
n | "display_name": "Image Generation", | n | "display_name": "Multimodal Learning", |
| "id": "be25a76c-def1-4e73-8b1c-b81222d63867", | | "id": "a88cb918-103d-4226-a87d-9d9103c1f839", |
| "image_display_url": "", | | "image_display_url": "", |
n | "name": "image-generation", | n | "name": "multimodal-learning", |
| "title": "Image Generation" | | "title": "Multimodal Learning" |
| }, | | |
| { | | |
| "description": "", | | |
| "display_name": "Image-Text Pre-training", | | |
| "id": "03a3bb8b-20d3-40b2-b5ac-4fd948153611", | | |
| "image_display_url": "", | | |
| "name": "image-text-pre-training", | | |
| "title": "Image-Text Pre-training" | | |
| }, | | |
| { | | |
| "description": "", | | |
| "display_name": "Text-to-Image", | | |
| "id": "c68d7763-4eaf-4b60-a763-d8a968e4245e", | | |
| "image_display_url": "", | | |
| "name": "text-to-image", | | |
| "title": "Text-to-Image" | | |
| }, | | |
| { | | |
| "description": "", | | |
| "display_name": "Vision-and-Language Pre-training", | | |
| "id": "3e9c0249-6f01-4a87-aba1-46afaf268b22", | | |
| "image_display_url": "", | | |
| "name": "vision-and-language-pre-training", | | |
| "title": "Vision-and-Language Pre-training" | | |
| }, | | |
| { | | |
| "description": "", | | |
| "display_name": "Visual Concepts", | | |
| "id": "a5e78018-6689-40da-9bfc-121c955db376", | | |
| "image_display_url": "", | | |
| "name": "visual-concepts", | | |
| "title": "Visual Concepts" | | |
| } | | } |
| ], | | ], |
| "id": "1ef20fa9-4ccd-4bef-bd76-1da37630fab9", | | "id": "1ef20fa9-4ccd-4bef-bd76-1da37630fab9", |
| "isopen": false, | | "isopen": false, |
n | "landing_page": "https://arxiv.org/abs/2106.09528", | n | "landing_page": "https://arxiv.org/abs/1809.11096", |
| "license_title": null, | | "license_title": null, |
| "link_orkg": "", | | "link_orkg": "", |
| "metadata_created": "2024-12-02T17:55:34.172836", | | "metadata_created": "2024-12-02T17:55:34.172836", |
n | "metadata_modified": "2024-12-02T17:55:34.565393", | n | "metadata_modified": "2024-12-02T18:21:37.512489", |
| "name": "conceptual-12m", | | "name": "conceptual-12m", |
n | "notes": "The dataset used in the paper for training and evaluation | n | "notes": "Conceptual 12m dataset for automatic image captioning", |
| of the proposed method for training non-cascaded large scale | | |
| pixel-space text-to-image diffusion models.", | | |
| "num_resources": 1, | | "num_resources": 0, |
| "num_tags": 10, | | "num_tags": 3, |
| "organization": { | | "organization": { |
| "approval_status": "approved", | | "approval_status": "approved", |
| "created": "2024-11-25T12:11:38.292601", | | "created": "2024-11-25T12:11:38.292601", |
| "description": "", | | "description": "", |
| "id": "079d46db-32df-4b48-91f3-0a8bc8f69559", | | "id": "079d46db-32df-4b48-91f3-0a8bc8f69559", |
| "image_url": "", | | "image_url": "", |
| "is_organization": true, | | "is_organization": true, |
| "name": "no-organization", | | "name": "no-organization", |
| "state": "active", | | "state": "active", |
| "title": "No Organization", | | "title": "No Organization", |
| "type": "organization" | | "type": "organization" |
| }, | | }, |
| "owner_org": "079d46db-32df-4b48-91f3-0a8bc8f69559", | | "owner_org": "079d46db-32df-4b48-91f3-0a8bc8f69559", |
| "private": false, | | "private": false, |
| "relationships_as_object": [], | | "relationships_as_object": [], |
| "relationships_as_subject": [], | | "relationships_as_subject": [], |
n | "resources": [ | n | "resources": [], |
| { | | |
| "cache_last_updated": null, | | |
| "cache_url": null, | | |
| "created": "2024-12-02T18:38:42", | | |
| "data": [ | | |
| "dcterms:title", | | |
| "dcterms:accessRights", | | |
| "dcterms:creator", | | |
| "dcterms:description", | | |
| "dcterms:issued", | | |
| "dcterms:language", | | |
| "dcterms:identifier", | | |
| "dcat:theme", | | |
| "dcterms:type", | | |
| "dcat:keyword", | | |
| "dcat:landingPage", | | |
| "dcterms:hasVersion", | | |
| "dcterms:format", | | |
| "mls:task", | | |
| "datacite:isDescribedBy" | | |
| ], | | |
| "description": "The json representation of the dataset with its | | |
| distributions based on DCAT.", | | |
| "format": "JSON", | | |
| "hash": "", | | |
| "id": "d1b5d5f0-c6d5-4dbd-a6ee-9b2d889d86f2", | | |
| "last_modified": "2024-12-02T17:55:34.557141", | | |
| "metadata_modified": "2024-12-02T17:55:34.568168", | | |
| "mimetype": "application/json", | | |
| "mimetype_inner": null, | | |
| "name": "Original Metadata", | | |
| "package_id": "1ef20fa9-4ccd-4bef-bd76-1da37630fab9", | | |
| "position": 0, | | |
| "resource_type": null, | | |
| "size": 1308, | | |
| "state": "active", | | |
| "url": | | |
| resource/d1b5d5f0-c6d5-4dbd-a6ee-9b2d889d86f2/download/metadata.json", | | |
| "url_type": "upload" | | |
| } | | |
| ], | | |
| "services_used_list": "", | | "services_used_list": "", |
| "state": "active", | | "state": "active", |
| "tags": [ | | "tags": [ |
| { | | { |
n | "display_name": "12 million pairs", | n | "display_name": "Conceptual 12m", |
| "id": "6ef28272-8c66-4d2c-8597-c68ae76e4494", | | "id": "0d10ce61-89c5-498e-a7da-fb7ba37db90b", |
| "name": "12 million pairs", | | "name": "Conceptual 12m", |
| "state": "active", | | "state": "active", |
| "vocabulary_id": null | | "vocabulary_id": null |
| }, | | }, |
| { | | { |
n | "display_name": "Image-Text Pre-training", | n | "display_name": "Image Captioning", |
| "id": "214a5ac3-3cf8-44b0-b122-1a14136f63cf", | | "id": "c708cba4-0a1f-45c9-826f-898857783343", |
| "name": "Image-Text Pre-training", | | "name": "Image Captioning", |
| "state": "active", | | "state": "active", |
| "vocabulary_id": null | | "vocabulary_id": null |
| }, | | }, |
| { | | { |
n | "display_name": "Long-tail Visual Concepts", | n | |
| "id": "47acd844-ffa8-465a-8cbd-24bf7e84bb5b", | | |
| "name": "Long-tail Visual Concepts", | | |
| "state": "active", | | |
| "vocabulary_id": null | | |
| }, | | |
| { | | |
| "display_name": "diffusion models", | | |
| "id": "67686580-d41a-4e00-875a-494e99951342", | | |
| "name": "diffusion models", | | |
| "state": "active", | | |
| "vocabulary_id": null | | |
| }, | | |
| { | | |
| "display_name": "image captioning", | | "display_name": "Multimodal Learning", |
| "id": "f1bbe827-a03a-4280-b9fa-0599ccfc0541", | | "id": "41329983-d90e-4560-a97d-7fa2ba0a1f92", |
| "name": "image captioning", | | "name": "Multimodal Learning", |
| "state": "active", | | |
| "vocabulary_id": null | | |
| }, | | |
| { | | |
| "display_name": "image generation", | | |
| "id": "96df81b4-32fd-4826-a903-affb005a0a60", | | |
| "name": "image generation", | | |
| "state": "active", | | |
| "vocabulary_id": null | | |
| }, | | |
| { | | |
| "display_name": "long-tail visual concepts", | | |
| "id": "4bd2e9bf-cf63-4ae0-ba9b-b4f5b06e7540", | | |
| "name": "long-tail visual concepts", | | |
| "state": "active", | | |
| "vocabulary_id": null | | |
| }, | | |
| { | | |
| "display_name": "text-to-image", | | |
| "id": "0b0e2f9a-321b-4b04-abc6-14130f66400c", | | |
| "name": "text-to-image", | | |
| "state": "active", | | |
| "vocabulary_id": null | | |
| }, | | |
| { | | |
| "display_name": "vision-and-language pre-training", | | |
| "id": "0d9c23b6-373f-46e2-9478-068178037f58", | | |
| "name": "vision-and-language pre-training", | | |
| "state": "active", | | |
| "vocabulary_id": null | | |
| }, | | |
| { | | |
| "display_name": "web-scale image-text pre-training", | | |
| "id": "2c7f0652-00fb-434b-92b5-3e244f0e6fa5", | | |
| "name": "web-scale image-text pre-training", | | |
| "state": "active", | | "state": "active", |
| "vocabulary_id": null | | "vocabulary_id": null |
| } | | } |
| ], | | ], |
t | "title": "Conceptual 12M", | t | "title": "Conceptual 12m", |
| "type": "dataset", | | "type": "dataset", |
| "version": "" | | "version": "" |
| } | | } |