togethercomputer · rishabh-bhargava · Apr 27, 2026 · Apr 27, 2026
diff --git a/openapi.yaml b/openapi.yaml
@@ -3866,9 +3866,9 @@ paths:
           - The `input_text_buffer.commit` event is received
 
         **Audio Format:**
-        - Format: WAV (PCM s16le)
+        - Format: Raw PCM (s16le, mono)
         - Sample Rate: 24000 Hz
-        - Encoding: Base64
+        - Encoding: Base64 (per delta event)
         - Delivered via `conversation.item.audio_output.delta` events
 
         **Error Codes:**
@@ -3890,7 +3890,7 @@ paths:
 
             async def generate_speech():
                 api_key = os.environ.get("TOGETHER_API_KEY")
-                url = "wss://api.together.ai/v1/audio/speech/websocket?model=hexgrad/Kokoro-82M&voice=tara"
+                url = "wss://api.together.ai/v1/audio/speech/websocket?model=hexgrad/Kokoro-82M&voice=af_heart"
 
                 headers = {
                     "Authorization": f"Bearer {api_key}"
@@ -3900,6 +3900,9 @@ paths:
                     # Wait for session created
                     session_msg = await ws.recv()
                     session_data = json.loads(session_msg)
+                    if session_data.get("type") != "session.created":
+                        print(f"Failed to start session: {session_data}")
+                        return
                     print(f"Session created: {session_data['session']['id']}")
 
                     # Send text for TTS
@@ -3941,10 +3944,10 @@ paths:
                                 print(f"Error: {error.get('message')}")
                                 break
 
-                        # Save the audio to a file
-                        with open("output.wav", "wb") as f:
+                        # Save the raw PCM samples to a file
+                        with open("output.pcm", "wb") as f:
                             f.write(audio_data)
-                        print("Audio saved to output.wav")
+                        print("Audio saved to output.pcm")
 
                     # Run send and receive concurrently
                     await asyncio.gather(send_text(), receive_audio())
@@ -3957,7 +3960,7 @@ paths:
             import fs from 'fs';
 
             const apiKey = process.env.TOGETHER_API_KEY;
-            const url = 'wss://api.together.ai/v1/audio/speech/websocket?model=hexgrad/Kokoro-82M&voice=tara';
+            const url = 'wss://api.together.ai/v1/audio/speech/websocket?model=hexgrad/Kokoro-82M&voice=af_heart';
 
             const ws = new WebSocket(url, {
               headers: {
@@ -4017,11 +4020,11 @@ paths:
             });
 
             ws.on('close', () => {
-              // Save the audio to a file
+              // Save the raw PCM samples to a file
               if (audioData.length > 0) {
                 const completeAudio = Buffer.concat(audioData);
-                fs.writeFileSync('output.wav', completeAudio);
-                console.log('Audio saved to output.wav');
+                fs.writeFileSync('output.pcm', completeAudio);
+                console.log('Audio saved to output.pcm');
               }
             });