Dataset Groups Activity Stream LibriTTS A popular text-based VC approach is to use an automatic speech recognition (ASR) model to extract phonetic posteriorgram (PPG) as content representation. BibTex: @dataset{H_Zen_and_V_Dang_and_R_Clark_and_Y_Zhang_and_R_J_Weiss_and_Y_Jia_and_Z_Chen_and_Y_Wu_2024, abstract = {A popular text-based VC approach is to use an automatic speech recognition (ASR) model to extract phonetic posteriorgram (PPG) as content representation.}, author = {H. Zen and V. Dang and R. Clark and Y. Zhang and R. J. Weiss and Y. Jia and Z. Chen and Y. Wu}, doi = {10.57702/nmo984po}, institution = {No Organization}, keyword = {'ASR', 'Audio', 'Corpus', 'LibriTTS', 'Multispeaker TTS', 'Speaker Recognition', 'Speech Data', 'Speech Database', 'Speech Synthesis', 'Text-to-Speech', 'Waveform', 'audio', 'autoregressive diffusion transformer', 'clean utterances', 'labeled speech', 'large-scale', 'libritts', 'multi-speaker', 'speech corpus', 'speech editing', 'speech processing', 'speech synthesis', 'text-based', 'text-to-speech', 'voice conversion'}, month = {dec}, publisher = {TIB}, title = {LibriTTS}, url = {https://service.tib.eu/ldmservice/dataset/libritts}, year = {2024} }