WebSocket Reference
WebSocket Examples
Complete, working examples for implementing SLNG WebSocket APIs across different languages and use cases.
🎯 Both Approaches Work!
Approach 1: Provider-Native Format (Your existing code)
Code// Your existing ElevenLabs code - STILL WORKS! ws.send(JSON.stringify({ text: "Hello world", voice_settings: { stability: 0.5, similarity_boost: 0.75 } })); // Your existing Deepgram code - STILL WORKS! ws.send("Hello world"); ws.send(JSON.stringify({type: "Flush"})); // Your existing Orpheus code - STILL WORKS! ws.send("Hello world"); ws.send(JSON.stringify({type: "control", action: "flush"}));
Approach 2: Unified Protocol (New portable format)
Code// Same code for ALL providers - OPTIONAL! ws.send(JSON.stringify({type: 'input_text', text: 'Hello world'})); ws.send(JSON.stringify({type: 'flush'}));
You choose! Both work. You can even mix them in the same session.
🚀 Quick Start - Universal Client
This example works with ANY TTS provider:
Code// Universal TTS client - works with ALL providers class UniversalTTS { constructor(provider, apiKey) { this.provider = provider; this.apiKey = apiKey; this.ws = null; } async connect(voiceId = 'default') { const url = `wss://api.slng.ai/v1/tts/${this.provider}-websocket-stream`; this.ws = new WebSocket(url, { headers: { 'Authorization': `Bearer ${this.apiKey}` } }); return new Promise((resolve, reject) => { this.ws.on('open', () => { // Same init command for ALL providers this.ws.send(JSON.stringify({ type: 'init', voice_id: voiceId, region: 'us-west' })); }); this.ws.on('message', (data) => { const msg = JSON.parse(data); if (msg.type === 'ready') resolve(); if (msg.type === 'error') reject(new Error(msg.message)); }); }); } synthesize(text) { // Same commands for ALL providers this.ws.send(JSON.stringify({type: 'input_text', text})); this.ws.send(JSON.stringify({type: 'flush'})); } } // This SAME class works with: const orpheus = new UniversalTTS('orpheus', 'key'); const elevenlabs = new UniversalTTS('elevenlabs', 'key'); const deepgram = new UniversalTTS('deepgram', 'key');
📋 Complete Examples by Use Case
1. Real-Time Conversational AI (Chatterbox)
Optimized for ultra-low latency voice interactions:
Codeclass ConversationalAI { constructor(apiKey) { this.apiKey = apiKey; this.ws = null; } async connect() { const url = 'wss://api.slng.ai/v1/tts/chatterbox-websocket-stream'; this.ws = new WebSocket(url, { headers: { 'Authorization': `Bearer ${this.apiKey}` } }); await new Promise((resolve) => { this.ws.on('open', () => { this.ws.send(JSON.stringify({ type: 'init', voice_id: 'echo', style: { rate: 1.2, // Slightly faster for conversation temperature: 0.3 // Lower for consistency } })); }); this.ws.on('message', (data) => { const msg = JSON.parse(data); if (msg.type === 'ready') resolve(); if (msg.type === 'audio_chunk') this.playAudio(msg.data); }); }); } respond(text, emotion = 'neutral') { this.ws.send(JSON.stringify({ type: 'input_text', text: text, metadata: { emotion } })); this.ws.send(JSON.stringify({type: 'flush'})); } interrupt() { // Send stop command - gateway will close backend connection as workaround this.ws.send(JSON.stringify({type: 'stop'})); } playAudio(base64Data) { const buffer = Buffer.from(base64Data, 'base64'); // Play PCM audio immediately for low latency } } // Usage const ai = new ConversationalAI('YOUR_KEY'); await ai.connect(); ai.respond("How can I help you today?", 'friendly');
2. Multi-Provider Streaming (Node.js)
Stream audio from any provider with automatic format handling:
Codeconst fs = require('fs'); const WebSocket = require('ws'); class StreamingTTS { constructor(provider, apiKey, outputFile) { this.provider = provider; this.apiKey = apiKey; this.audioStream = fs.createWriteStream(outputFile); } async synthesize(text, voiceId) { const ws = new WebSocket( `wss://api.slng.ai/v1/tts/${this.provider}-websocket-stream`, { headers: { 'Authorization': `Bearer ${this.apiKey}` }} ); return new Promise((resolve, reject) => { let audioChunks = []; ws.on('open', () => { // Universal protocol ws.send(JSON.stringify({ type: 'init', voice_id: voiceId, audio: { codec: 'pcm16', sample_rate_hz: 24000 } })); }); ws.on('message', (data) => { if (typeof data === 'string') { const msg = JSON.parse(data); switch(msg.type) { case 'ready': ws.send(JSON.stringify({type: 'input_text', text})); ws.send(JSON.stringify({type: 'flush'})); break; case 'audio_chunk': const audio = Buffer.from(msg.data, 'base64'); audioChunks.push(audio); this.audioStream.write(audio); break; case 'audio_end': this.audioStream.end(); ws.close(); resolve(Buffer.concat(audioChunks)); break; } } else { // Binary frame (some providers) audioChunks.push(data); this.audioStream.write(data); } }); ws.on('error', reject); }); } } // Works with ANY provider async function generateAllProviders() { const providers = [ {name: 'orpheus', voice: 'tara'}, {name: 'orpheus-indic', voice: 'kanak'}, {name: 'kokoro', voice: 'af'}, {name: 'cosyvoice', voice: 'emma'}, {name: 'chatterbox', voice: 'nova'}, {name: 'xtts', voice: 'Claribel Dervla'}, {name: 'elevenlabs', voice: 'pNInz6obpgDQGcFmaJgB'}, {name: 'deepgram', voice: 'aura-asteria-en'}, {name: 'cartesia', voice: '694f9389'} ]; for (const provider of providers) { const tts = new StreamingTTS(provider.name, 'KEY', `${provider.name}.pcm`); await tts.synthesize('Hello from SLNG!', provider.voice); console.log(`Generated ${provider.name}.pcm`); } }
3. Python Client with Asyncio
Codeimport asyncio import websockets import json import base64 class UniversalTTS: def __init__(self, provider: str, api_key: str): self.provider = provider self.api_key = api_key self.url = f"wss://api.slng.ai/v1/tts/{provider}-websocket-stream" async def synthesize(self, text: str, voice_id: str = "default"): headers = {"Authorization": f"Bearer {self.api_key}"} async with websockets.connect(self.url, extra_headers=headers) as ws: # Send init (works with ALL providers) await ws.send(json.dumps({ "type": "init", "voice_id": voice_id, "region": "us-west" })) # Wait for ready msg = json.loads(await ws.recv()) assert msg["type"] == "ready" # Send text (universal format) await ws.send(json.dumps({ "type": "input_text", "text": text })) await ws.send(json.dumps({"type": "flush"})) # Collect audio audio_data = [] while True: msg = await ws.recv() if isinstance(msg, str): data = json.loads(msg) if data["type"] == "audio_chunk": audio_data.append(base64.b64decode(data["data"])) elif data["type"] == "audio_end": break else: # Binary audio frame audio_data.append(msg) return b"".join(audio_data) # Usage - same code for all providers! async def main(): providers = { "orpheus": "tara", "orpheus-indic": "kanak", "kokoro": "af", "cosyvoice": "emma", "chatterbox": "nova", "xtts": "Claribel Dervla", "elevenlabs": "pNInz6obpgDQGcFmaJgB", "deepgram": "aura-asteria-en" } for provider, voice in providers.items(): tts = UniversalTTS(provider, "YOUR_KEY") audio = await tts.synthesize("Hello from Python!", voice) print(f"{provider}: Generated {len(audio)} bytes") asyncio.run(main())
4. Browser JavaScript (Web Audio API)
Code<!DOCTYPE html> <html> <head> <title>SLNG WebSocket TTS</title> </head> <body> <select id="provider"> <option value="orpheus">Orpheus</option> <option value="chatterbox">Chatterbox</option> <option value="elevenlabs">ElevenLabs</option> <option value="deepgram">Deepgram</option> </select> <input type="text" id="text" placeholder="Enter text..."> <button onclick="speak()">Speak</button> <script> class WebTTS { constructor() { this.audioContext = new AudioContext(); this.audioQueue = []; this.ws = null; } async connect(provider, apiKey) { const url = `wss://api.slng.ai/v1/tts/${provider}-websocket-stream`; this.ws = new WebSocket(url); this.ws.binaryType = 'arraybuffer'; // Add auth header via subprotocol (browser workaround) this.ws = new WebSocket(url, [`Bearer.${apiKey}`]); return new Promise((resolve) => { this.ws.onopen = () => { this.ws.send(JSON.stringify({ type: 'init', voice_id: 'default', audio: { codec: 'pcm16', sample_rate_hz: 24000 } })); }; this.ws.onmessage = (event) => { if (typeof event.data === 'string') { const msg = JSON.parse(event.data); if (msg.type === 'ready') resolve(); if (msg.type === 'audio_chunk') { this.queueAudio(msg.data); } } }; }); } speak(text) { this.ws.send(JSON.stringify({type: 'input_text', text})); this.ws.send(JSON.stringify({type: 'flush'})); } queueAudio(base64Data) { const binary = atob(base64Data); const buffer = new ArrayBuffer(binary.length); const bytes = new Uint8Array(buffer); for (let i = 0; i < binary.length; i++) { bytes[i] = binary.charCodeAt(i); } // Convert PCM to Web Audio this.audioContext.decodeAudioData(buffer, (audioBuffer) => { const source = this.audioContext.createBufferSource(); source.buffer = audioBuffer; source.connect(this.audioContext.destination); source.start(); }); } } const tts = new WebTTS(); async function speak() { const provider = document.getElementById('provider').value; const text = document.getElementById('text').value; await tts.connect(provider, 'YOUR_API_KEY'); tts.speak(text); } </script> </body> </html>
5. Batch Processing with Progress
Codeclass BatchTTS { async processDocuments(documents, provider = 'orpheus') { const results = []; for (const [index, doc] of documents.entries()) { console.log(`Processing ${index + 1}/${documents.length}`); const ws = new WebSocket( `wss://api.slng.ai/v1/tts/${provider}-websocket-stream`, { headers: { 'Authorization': `Bearer ${this.apiKey}` }} ); const audio = await this.synthesizeDocument(ws, doc); results.push({ id: doc.id, text: doc.text, audio: audio, duration: audio.length / (24000 * 2) // PCM16 @ 24kHz }); ws.close(); } return results; } async synthesizeDocument(ws, doc) { return new Promise((resolve) => { const audioChunks = []; ws.on('open', () => { ws.send(JSON.stringify({ type: 'init', voice_id: doc.voice || 'default', style: doc.style || {} })); }); ws.on('message', (data) => { const msg = JSON.parse(data); if (msg.type === 'ready') { // Send text in paragraphs for natural pauses const paragraphs = doc.text.split('\n\n'); paragraphs.forEach(p => { ws.send(JSON.stringify({type: 'input_text', text: p})); }); ws.send(JSON.stringify({type: 'flush'})); } if (msg.type === 'audio_chunk') { audioChunks.push(Buffer.from(msg.data, 'base64')); } if (msg.type === 'audio_end') { resolve(Buffer.concat(audioChunks)); } }); }); } }
6. Error Handling & Reconnection
Codeclass RobustTTS { constructor(provider, apiKey, options = {}) { this.provider = provider; this.apiKey = apiKey; this.maxRetries = options.maxRetries || 3; this.retryDelay = options.retryDelay || 1000; } async connectWithRetry(attempt = 1) { try { await this.connect(); } catch (error) { if (attempt < this.maxRetries) { console.log(`Retry ${attempt}/${this.maxRetries} in ${this.retryDelay}ms`); await new Promise(r => setTimeout(r, this.retryDelay)); return this.connectWithRetry(attempt + 1); } throw error; } } async connect() { const url = `wss://api.slng.ai/v1/tts/${this.provider}-websocket-stream`; this.ws = new WebSocket(url, { headers: { 'Authorization': `Bearer ${this.apiKey}` } }); return new Promise((resolve, reject) => { const timeout = setTimeout(() => { reject(new Error('Connection timeout')); this.ws.close(); }, 5000); this.ws.on('open', () => { clearTimeout(timeout); this.ws.send(JSON.stringify({ type: 'init', voice_id: 'default' })); }); this.ws.on('message', (data) => { const msg = JSON.parse(data); if (msg.type === 'ready') { clearTimeout(timeout); resolve(); } if (msg.type === 'error') { clearTimeout(timeout); reject(new Error(msg.message)); } }); this.ws.on('error', (error) => { clearTimeout(timeout); reject(error); }); }); } async synthesizeSafe(text) { try { if (!this.ws || this.ws.readyState !== WebSocket.OPEN) { await this.connectWithRetry(); } return await this.synthesize(text); } catch (error) { console.error('Synthesis failed:', error); // Attempt reconnection await this.connectWithRetry(); return await this.synthesize(text); } } }
🔧 Advanced Patterns
Provider Feature Detection
Codeclass AdaptiveTTS { constructor(provider) { this.provider = provider; this.capabilities = null; } async connect() { // ... connection code ... // Detect capabilities from ready event ws.on('message', (data) => { const msg = JSON.parse(data); if (msg.type === 'ready') { this.capabilities = msg.capabilities || { supports_stop: provider.includes('orpheus'), supports_configure: provider.includes('elevenlabs'), supports_emotions: provider.includes('kokoro') }; } }); } async interrupt() { if (this.capabilities?.supports_stop) { this.ws.send(JSON.stringify({type: 'stop'})); } else { // Fallback: close and reconnect this.ws.close(); await this.connect(); } } }
Audio Processing Pipeline
Codeclass AudioPipeline { constructor() { this.processors = []; } addProcessor(fn) { this.processors.push(fn); } async processAudioChunk(chunk) { let data = chunk; for (const processor of this.processors) { data = await processor(data); } return data; } } // Usage const pipeline = new AudioPipeline(); pipeline.addProcessor(normalizeVolume); pipeline.addProcessor(addReverb); pipeline.addProcessor(convertToMp3);
🎯 Best Practices
DO ✅
- Use the unified protocol for all new implementations
- Send
flush
after text for immediate synthesis - Handle both JSON and binary audio frames
- Implement reconnection logic for production
- Process audio chunks as they arrive (don't wait for all)
DON'T ❌
- Mix unified and legacy protocols in the same session
- Assume all features work on all providers
- Buffer entire audio before playback (increases latency)
- Ignore error events
- Send commands before receiving 'ready'
📚 See Also
- API Reference - Complete technical details
- Unified Protocol Guide - Migration and concepts
- Provider Matrix - Feature comparison
Last modified on