diff --git a/openapi.yaml b/openapi.yaml index 0b20862..b1d0958 100644 --- a/openapi.yaml +++ b/openapi.yaml @@ -3866,9 +3866,9 @@ paths: - The `input_text_buffer.commit` event is received **Audio Format:** - - Format: WAV (PCM s16le) + - Format: Raw PCM (s16le, mono) - Sample Rate: 24000 Hz - - Encoding: Base64 + - Encoding: Base64 (per delta event) - Delivered via `conversation.item.audio_output.delta` events **Error Codes:** @@ -3890,7 +3890,7 @@ paths: async def generate_speech(): api_key = os.environ.get("TOGETHER_API_KEY") - url = "wss://api.together.ai/v1/audio/speech/websocket?model=hexgrad/Kokoro-82M&voice=tara" + url = "wss://api.together.ai/v1/audio/speech/websocket?model=hexgrad/Kokoro-82M&voice=af_heart" headers = { "Authorization": f"Bearer {api_key}" @@ -3900,6 +3900,9 @@ paths: # Wait for session created session_msg = await ws.recv() session_data = json.loads(session_msg) + if session_data.get("type") != "session.created": + print(f"Failed to start session: {session_data}") + return print(f"Session created: {session_data['session']['id']}") # Send text for TTS @@ -3941,10 +3944,10 @@ paths: print(f"Error: {error.get('message')}") break - # Save the audio to a file - with open("output.wav", "wb") as f: + # Save the raw PCM samples to a file + with open("output.pcm", "wb") as f: f.write(audio_data) - print("Audio saved to output.wav") + print("Audio saved to output.pcm") # Run send and receive concurrently await asyncio.gather(send_text(), receive_audio()) @@ -3957,7 +3960,7 @@ paths: import fs from 'fs'; const apiKey = process.env.TOGETHER_API_KEY; - const url = 'wss://api.together.ai/v1/audio/speech/websocket?model=hexgrad/Kokoro-82M&voice=tara'; + const url = 'wss://api.together.ai/v1/audio/speech/websocket?model=hexgrad/Kokoro-82M&voice=af_heart'; const ws = new WebSocket(url, { headers: { @@ -4017,11 +4020,11 @@ paths: }); ws.on('close', () => { - // Save the audio to a file + // Save the raw PCM samples to a file if (audioData.length > 0) { const completeAudio = Buffer.concat(audioData); - fs.writeFileSync('output.wav', completeAudio); - console.log('Audio saved to output.wav'); + fs.writeFileSync('output.pcm', completeAudio); + console.log('Audio saved to output.pcm'); } });