Changes
On December 2, 2024 at 6:23:01 PM UTC,
-
Changed value of field
doi_status
toTrue
in Training a helpful and harmless assistant with reinforcement learning from human feedback -
Changed value of field
doi_date_published
to2024-12-02
in Training a helpful and harmless assistant with reinforcement learning from human feedback -
Added resource Original Metadata to Training a helpful and harmless assistant with reinforcement learning from human feedback
f | 1 | { | f | 1 | { |
2 | "access_rights": "", | 2 | "access_rights": "", | ||
3 | "author": "Yuntao Bai", | 3 | "author": "Yuntao Bai", | ||
4 | "author_email": "", | 4 | "author_email": "", | ||
5 | "citation": [ | 5 | "citation": [ | ||
6 | "https://doi.org/10.48550/arXiv.2403.16649", | 6 | "https://doi.org/10.48550/arXiv.2403.16649", | ||
7 | "https://doi.org/10.48550/arXiv.2310.00819", | 7 | "https://doi.org/10.48550/arXiv.2310.00819", | ||
8 | "https://doi.org/10.48550/arXiv.2307.01139", | 8 | "https://doi.org/10.48550/arXiv.2307.01139", | ||
9 | "https://doi.org/10.48550/arXiv.2406.15568" | 9 | "https://doi.org/10.48550/arXiv.2406.15568" | ||
10 | ], | 10 | ], | ||
11 | "creator_user_id": "17755db4-395a-4b3b-ac09-e8e3484ca700", | 11 | "creator_user_id": "17755db4-395a-4b3b-ac09-e8e3484ca700", | ||
12 | "defined_in": "https://doi.org/10.48550/arXiv.2312.09244", | 12 | "defined_in": "https://doi.org/10.48550/arXiv.2312.09244", | ||
13 | "doi": "10.57702/ueb4xymx", | 13 | "doi": "10.57702/ueb4xymx", | ||
n | 14 | "doi_date_published": null, | n | 14 | "doi_date_published": "2024-12-02", |
15 | "doi_publisher": "TIB", | 15 | "doi_publisher": "TIB", | ||
n | 16 | "doi_status": false, | n | 16 | "doi_status": true, |
17 | "domain": "https://service.tib.eu/ldmservice", | 17 | "domain": "https://service.tib.eu/ldmservice", | ||
18 | "extra_authors": [ | 18 | "extra_authors": [ | ||
19 | { | 19 | { | ||
20 | "extra_author": "Andy Jones", | 20 | "extra_author": "Andy Jones", | ||
21 | "orcid": "" | 21 | "orcid": "" | ||
22 | }, | 22 | }, | ||
23 | { | 23 | { | ||
24 | "extra_author": "Kamal Ndousse", | 24 | "extra_author": "Kamal Ndousse", | ||
25 | "orcid": "" | 25 | "orcid": "" | ||
26 | }, | 26 | }, | ||
27 | { | 27 | { | ||
28 | "extra_author": "Amanda Askell", | 28 | "extra_author": "Amanda Askell", | ||
29 | "orcid": "" | 29 | "orcid": "" | ||
30 | }, | 30 | }, | ||
31 | { | 31 | { | ||
32 | "extra_author": "Anna Chen", | 32 | "extra_author": "Anna Chen", | ||
33 | "orcid": "" | 33 | "orcid": "" | ||
34 | }, | 34 | }, | ||
35 | { | 35 | { | ||
36 | "extra_author": "Nova DasSarma", | 36 | "extra_author": "Nova DasSarma", | ||
37 | "orcid": "" | 37 | "orcid": "" | ||
38 | }, | 38 | }, | ||
39 | { | 39 | { | ||
40 | "extra_author": "Dawn Drain", | 40 | "extra_author": "Dawn Drain", | ||
41 | "orcid": "" | 41 | "orcid": "" | ||
42 | }, | 42 | }, | ||
43 | { | 43 | { | ||
44 | "extra_author": "Stanislav Fort", | 44 | "extra_author": "Stanislav Fort", | ||
45 | "orcid": "" | 45 | "orcid": "" | ||
46 | }, | 46 | }, | ||
47 | { | 47 | { | ||
48 | "extra_author": "Deep Ganguli", | 48 | "extra_author": "Deep Ganguli", | ||
49 | "orcid": "" | 49 | "orcid": "" | ||
50 | }, | 50 | }, | ||
51 | { | 51 | { | ||
52 | "extra_author": "Tom Henighan", | 52 | "extra_author": "Tom Henighan", | ||
53 | "orcid": "" | 53 | "orcid": "" | ||
54 | } | 54 | } | ||
55 | ], | 55 | ], | ||
56 | "groups": [ | 56 | "groups": [ | ||
57 | { | 57 | { | ||
58 | "description": "", | 58 | "description": "", | ||
59 | "display_name": "Human Feedback", | 59 | "display_name": "Human Feedback", | ||
60 | "id": "23a64964-cf6d-4b1b-bd3b-899e9ccb6c1c", | 60 | "id": "23a64964-cf6d-4b1b-bd3b-899e9ccb6c1c", | ||
61 | "image_display_url": "", | 61 | "image_display_url": "", | ||
62 | "name": "human-feedback", | 62 | "name": "human-feedback", | ||
63 | "title": "Human Feedback" | 63 | "title": "Human Feedback" | ||
64 | }, | 64 | }, | ||
65 | { | 65 | { | ||
66 | "description": "", | 66 | "description": "", | ||
67 | "display_name": "Human-computer interaction", | 67 | "display_name": "Human-computer interaction", | ||
68 | "id": "36c2df3a-7a5c-4c40-87fd-dd737e4abb30", | 68 | "id": "36c2df3a-7a5c-4c40-87fd-dd737e4abb30", | ||
69 | "image_display_url": "", | 69 | "image_display_url": "", | ||
70 | "name": "human-computer-interaction", | 70 | "name": "human-computer-interaction", | ||
71 | "title": "Human-computer interaction" | 71 | "title": "Human-computer interaction" | ||
72 | }, | 72 | }, | ||
73 | { | 73 | { | ||
74 | "description": "", | 74 | "description": "", | ||
75 | "display_name": "Natural Language Generation", | 75 | "display_name": "Natural Language Generation", | ||
76 | "id": "4b655249-9f90-4a62-a4b5-03a8d7ee0310", | 76 | "id": "4b655249-9f90-4a62-a4b5-03a8d7ee0310", | ||
77 | "image_display_url": "", | 77 | "image_display_url": "", | ||
78 | "name": "natural-language-generation", | 78 | "name": "natural-language-generation", | ||
79 | "title": "Natural Language Generation" | 79 | "title": "Natural Language Generation" | ||
80 | }, | 80 | }, | ||
81 | { | 81 | { | ||
82 | "description": "", | 82 | "description": "", | ||
83 | "display_name": "Reinforcement Learning", | 83 | "display_name": "Reinforcement Learning", | ||
84 | "id": "5f9e20df-0af0-4b7e-9637-e244d4a99379", | 84 | "id": "5f9e20df-0af0-4b7e-9637-e244d4a99379", | ||
85 | "image_display_url": "", | 85 | "image_display_url": "", | ||
86 | "name": "reinforcement-learning", | 86 | "name": "reinforcement-learning", | ||
87 | "title": "Reinforcement Learning" | 87 | "title": "Reinforcement Learning" | ||
88 | } | 88 | } | ||
89 | ], | 89 | ], | ||
90 | "id": "88c98003-16a4-4943-8853-a198911da984", | 90 | "id": "88c98003-16a4-4943-8853-a198911da984", | ||
91 | "isopen": false, | 91 | "isopen": false, | ||
92 | "landing_page": "https://arxiv.org/abs/2204.05862", | 92 | "landing_page": "https://arxiv.org/abs/2204.05862", | ||
93 | "license_title": null, | 93 | "license_title": null, | ||
94 | "link_orkg": "", | 94 | "link_orkg": "", | ||
95 | "metadata_created": "2024-12-02T18:23:00.098953", | 95 | "metadata_created": "2024-12-02T18:23:00.098953", | ||
n | 96 | "metadata_modified": "2024-12-02T18:23:00.098958", | n | 96 | "metadata_modified": "2024-12-02T18:23:00.436902", |
97 | "name": | 97 | "name": | ||
98 | d-harmless-assistant-with-reinforcement-learning-from-human-feedback", | 98 | d-harmless-assistant-with-reinforcement-learning-from-human-feedback", | ||
99 | "notes": "The authors propose a novel approach that incorporates | 99 | "notes": "The authors propose a novel approach that incorporates | ||
100 | parameter-efficient tuning to better optimize control tokens, thus | 100 | parameter-efficient tuning to better optimize control tokens, thus | ||
101 | benefitting controllable generation.", | 101 | benefitting controllable generation.", | ||
n | 102 | "num_resources": 0, | n | 102 | "num_resources": 1, |
103 | "num_tags": 10, | 103 | "num_tags": 10, | ||
104 | "organization": { | 104 | "organization": { | ||
105 | "approval_status": "approved", | 105 | "approval_status": "approved", | ||
106 | "created": "2024-11-25T12:11:38.292601", | 106 | "created": "2024-11-25T12:11:38.292601", | ||
107 | "description": "", | 107 | "description": "", | ||
108 | "id": "079d46db-32df-4b48-91f3-0a8bc8f69559", | 108 | "id": "079d46db-32df-4b48-91f3-0a8bc8f69559", | ||
109 | "image_url": "", | 109 | "image_url": "", | ||
110 | "is_organization": true, | 110 | "is_organization": true, | ||
111 | "name": "no-organization", | 111 | "name": "no-organization", | ||
112 | "state": "active", | 112 | "state": "active", | ||
113 | "title": "No Organization", | 113 | "title": "No Organization", | ||
114 | "type": "organization" | 114 | "type": "organization" | ||
115 | }, | 115 | }, | ||
116 | "owner_org": "079d46db-32df-4b48-91f3-0a8bc8f69559", | 116 | "owner_org": "079d46db-32df-4b48-91f3-0a8bc8f69559", | ||
117 | "private": false, | 117 | "private": false, | ||
118 | "relationships_as_object": [], | 118 | "relationships_as_object": [], | ||
119 | "relationships_as_subject": [], | 119 | "relationships_as_subject": [], | ||
t | 120 | "resources": [], | t | 120 | "resources": [ |
121 | { | ||||
122 | "cache_last_updated": null, | ||||
123 | "cache_url": null, | ||||
124 | "created": "2024-12-02T18:38:42", | ||||
125 | "data": [ | ||||
126 | "dcterms:title", | ||||
127 | "dcterms:accessRights", | ||||
128 | "dcterms:creator", | ||||
129 | "dcterms:description", | ||||
130 | "dcterms:issued", | ||||
131 | "dcterms:language", | ||||
132 | "dcterms:identifier", | ||||
133 | "dcat:theme", | ||||
134 | "dcterms:type", | ||||
135 | "dcat:keyword", | ||||
136 | "dcat:landingPage", | ||||
137 | "dcterms:hasVersion", | ||||
138 | "dcterms:format", | ||||
139 | "mls:task", | ||||
140 | "datacite:isDescribedBy" | ||||
141 | ], | ||||
142 | "description": "The json representation of the dataset with its | ||||
143 | distributions based on DCAT.", | ||||
144 | "format": "JSON", | ||||
145 | "hash": "", | ||||
146 | "id": "bb21f9f0-6a3f-4ca2-b473-3ba66d7bbc4c", | ||||
147 | "last_modified": "2024-12-02T18:23:00.428889", | ||||
148 | "metadata_modified": "2024-12-02T18:23:00.439646", | ||||
149 | "mimetype": "application/json", | ||||
150 | "mimetype_inner": null, | ||||
151 | "name": "Original Metadata", | ||||
152 | "package_id": "88c98003-16a4-4943-8853-a198911da984", | ||||
153 | "position": 0, | ||||
154 | "resource_type": null, | ||||
155 | "size": 1438, | ||||
156 | "state": "active", | ||||
157 | "url": | ||||
158 | resource/bb21f9f0-6a3f-4ca2-b473-3ba66d7bbc4c/download/metadata.json", | ||||
159 | "url_type": "upload" | ||||
160 | } | ||||
161 | ], | ||||
121 | "services_used_list": "", | 162 | "services_used_list": "", | ||
122 | "state": "active", | 163 | "state": "active", | ||
123 | "tags": [ | 164 | "tags": [ | ||
124 | { | 165 | { | ||
125 | "display_name": "Assistant", | 166 | "display_name": "Assistant", | ||
126 | "id": "edc9d3bd-c8f3-4d30-97af-9ce3607b7f69", | 167 | "id": "edc9d3bd-c8f3-4d30-97af-9ce3607b7f69", | ||
127 | "name": "Assistant", | 168 | "name": "Assistant", | ||
128 | "state": "active", | 169 | "state": "active", | ||
129 | "vocabulary_id": null | 170 | "vocabulary_id": null | ||
130 | }, | 171 | }, | ||
131 | { | 172 | { | ||
132 | "display_name": "Human Feedback", | 173 | "display_name": "Human Feedback", | ||
133 | "id": "b2ef557d-7d5d-4aab-bc50-b4a006541c84", | 174 | "id": "b2ef557d-7d5d-4aab-bc50-b4a006541c84", | ||
134 | "name": "Human Feedback", | 175 | "name": "Human Feedback", | ||
135 | "state": "active", | 176 | "state": "active", | ||
136 | "vocabulary_id": null | 177 | "vocabulary_id": null | ||
137 | }, | 178 | }, | ||
138 | { | 179 | { | ||
139 | "display_name": "Reinforcement Learning", | 180 | "display_name": "Reinforcement Learning", | ||
140 | "id": "0bd14238-9c5d-4905-8165-c7e5a0c0884c", | 181 | "id": "0bd14238-9c5d-4905-8165-c7e5a0c0884c", | ||
141 | "name": "Reinforcement Learning", | 182 | "name": "Reinforcement Learning", | ||
142 | "state": "active", | 183 | "state": "active", | ||
143 | "vocabulary_id": null | 184 | "vocabulary_id": null | ||
144 | }, | 185 | }, | ||
145 | { | 186 | { | ||
146 | "display_name": "controllable generation", | 187 | "display_name": "controllable generation", | ||
147 | "id": "760ddcdc-786c-485c-8838-72cc44a9dcf1", | 188 | "id": "760ddcdc-786c-485c-8838-72cc44a9dcf1", | ||
148 | "name": "controllable generation", | 189 | "name": "controllable generation", | ||
149 | "state": "active", | 190 | "state": "active", | ||
150 | "vocabulary_id": null | 191 | "vocabulary_id": null | ||
151 | }, | 192 | }, | ||
152 | { | 193 | { | ||
153 | "display_name": "human feedback", | 194 | "display_name": "human feedback", | ||
154 | "id": "c52f1c18-6393-4ff3-8d19-3d7aeebf7b31", | 195 | "id": "c52f1c18-6393-4ff3-8d19-3d7aeebf7b31", | ||
155 | "name": "human feedback", | 196 | "name": "human feedback", | ||
156 | "state": "active", | 197 | "state": "active", | ||
157 | "vocabulary_id": null | 198 | "vocabulary_id": null | ||
158 | }, | 199 | }, | ||
159 | { | 200 | { | ||
160 | "display_name": "human-computer interaction", | 201 | "display_name": "human-computer interaction", | ||
161 | "id": "ef099061-893c-427f-80b8-91dd827af8a8", | 202 | "id": "ef099061-893c-427f-80b8-91dd827af8a8", | ||
162 | "name": "human-computer interaction", | 203 | "name": "human-computer interaction", | ||
163 | "state": "active", | 204 | "state": "active", | ||
164 | "vocabulary_id": null | 205 | "vocabulary_id": null | ||
165 | }, | 206 | }, | ||
166 | { | 207 | { | ||
167 | "display_name": "natural language generation", | 208 | "display_name": "natural language generation", | ||
168 | "id": "436cc798-f9c9-4560-abb6-9de128af20be", | 209 | "id": "436cc798-f9c9-4560-abb6-9de128af20be", | ||
169 | "name": "natural language generation", | 210 | "name": "natural language generation", | ||
170 | "state": "active", | 211 | "state": "active", | ||
171 | "vocabulary_id": null | 212 | "vocabulary_id": null | ||
172 | }, | 213 | }, | ||
173 | { | 214 | { | ||
174 | "display_name": "reinforcement learning", | 215 | "display_name": "reinforcement learning", | ||
175 | "id": "bc5598c0-bc87-456a-9505-1552490050f1", | 216 | "id": "bc5598c0-bc87-456a-9505-1552490050f1", | ||
176 | "name": "reinforcement learning", | 217 | "name": "reinforcement learning", | ||
177 | "state": "active", | 218 | "state": "active", | ||
178 | "vocabulary_id": null | 219 | "vocabulary_id": null | ||
179 | }, | 220 | }, | ||
180 | { | 221 | { | ||
181 | "display_name": "single-turn dialogue", | 222 | "display_name": "single-turn dialogue", | ||
182 | "id": "54a50a53-8771-409d-91cd-9ee81f523f77", | 223 | "id": "54a50a53-8771-409d-91cd-9ee81f523f77", | ||
183 | "name": "single-turn dialogue", | 224 | "name": "single-turn dialogue", | ||
184 | "state": "active", | 225 | "state": "active", | ||
185 | "vocabulary_id": null | 226 | "vocabulary_id": null | ||
186 | }, | 227 | }, | ||
187 | { | 228 | { | ||
188 | "display_name": "summarization", | 229 | "display_name": "summarization", | ||
189 | "id": "7404cab5-0604-4721-a0ec-d9b81ae7609f", | 230 | "id": "7404cab5-0604-4721-a0ec-d9b81ae7609f", | ||
190 | "name": "summarization", | 231 | "name": "summarization", | ||
191 | "state": "active", | 232 | "state": "active", | ||
192 | "vocabulary_id": null | 233 | "vocabulary_id": null | ||
193 | } | 234 | } | ||
194 | ], | 235 | ], | ||
195 | "title": "Training a helpful and harmless assistant with | 236 | "title": "Training a helpful and harmless assistant with | ||
196 | reinforcement learning from human feedback", | 237 | reinforcement learning from human feedback", | ||
197 | "type": "dataset", | 238 | "type": "dataset", | ||
198 | "version": "" | 239 | "version": "" | ||
199 | } | 240 | } |