Option B: Unified Protocol (Write once, run anywhere)
Code
// Same code for ALL providersws.send(JSON.stringify({ type: 'init', voice_id: 'tara'}));ws.send(JSON.stringify({ type: 'input_text', text: 'Hello world'}));
*Note: Kokoro generates complete audio before streaming, resulting in slightly higher initial latency but consistent quality. Audio is chunked and streamed once generation completes.
{ "type": "init", "voice_id": "kanak", "language": "hi", // hi, bn, ta, te, mr, gu, kn, ml // Supported languages: // hi = Hindi, bn = Bengali, ta = Tamil, te = Telugu // mr = Marathi, gu = Gujarati, kn = Kannada, ml = Malayalam}
Session Management with context_id
The context_id parameter enables session tracking and correlation across WebSocket connections:
How it works:
Optional: If not provided, a UUID is auto-generated
Persistent: The same context_id is included in all server events
Provider Support:
Cartesia: Native support - maintains conversation context
All Others: Gateway-level tracking for correlation
Usage:
Code
// Initialize with custom context_id{ "type": "init", "context_id": "conversation-123", // Your custom ID "voice_id": "tara"}// All events will include the context_id{ "type": "audio_chunk", "data": "...", "context_id": "conversation-123" // Same ID returned}
Benefits:
Track sessions across reconnections
Correlate events in logging
Maintain conversation context (Cartesia)
Debug multi-session scenarios
Events from Server
TTS Events
Event
Description
Fields
ready
Session ready
session, capabilities, context_id
audio_chunk
Audio data
data (base64), sequence, duration_ms, context_id
audio_end
Synthesis done
id, total_duration_ms, context_id
error
Error occurred
code, message, retryable, context_id
session_ended
Connection closed
reason, usage, context_id
STT Events
Event
Description
Fields
ready
Session ready
model_id, sample_rate, encoding
transcript
Partial transcription
text, is_final, confidence, timestamp
final_transcript
Complete transcription
text, audio_duration_seconds, timestamp
error
Error occurred
code, message
Complete Example
Code
const WebSocket = require('ws');class TTSClient { constructor(apiKey, provider = 'orpheus') { this.apiKey = apiKey; this.provider = provider; this.ws = null; } async connect(voiceId = 'tara', contextId = null) { const url = `wss://api.slng.ai/v1/tts/${this.provider}-websocket-stream`; this.ws = new WebSocket(url, { headers: { 'Authorization': `Bearer ${this.apiKey}` } }); return new Promise((resolve, reject) => { this.ws.on('open', () => { // Use unified protocol - works with ALL providers this.ws.send(JSON.stringify({ type: 'init', voice_id: voiceId, context_id: contextId || `session-${Date.now()}` })); }); this.ws.on('message', (data) => { const msg = JSON.parse(data); if (msg.type === 'ready') { console.log('Connected to', this.provider); resolve(); } if (msg.type === 'audio_chunk') { this.handleAudio(msg.data); } if (msg.type === 'error') { reject(new Error(msg.message)); } }); }); } synthesize(text) { // Unified protocol - same for all providers this.ws.send(JSON.stringify({ type: 'input_text', text: text })); this.ws.send(JSON.stringify({ type: 'flush' })); } handleAudio(base64Data) { const buffer = Buffer.from(base64Data, 'base64'); // Process PCM audio (24kHz, 16-bit, mono) console.log(`Received ${buffer.length} bytes of audio`); } close() { this.ws.close(); }}// Usage - works with ANY provider!async function main() { const client = new TTSClient('YOUR_API_KEY', 'orpheus'); await client.connect('tara'); client.synthesize('Hello from SLNG WebSocket API!'); // Switch providers with zero code changes const elevenlabs = new TTSClient('YOUR_API_KEY', 'elevenlabs'); await elevenlabs.connect('21m00Tcm4TlvDq8ikWAM'); elevenlabs.synthesize('Same code, different provider!');}main().catch(console.error);
STT WebSocket Example
Code
const WebSocket = require('ws');const fs = require('fs');class STTClient { constructor(apiKey, model = 'kyutai') { this.apiKey = apiKey; this.model = model; this.ws = null; } async connect(language = 'en') { const url = `wss://api.slng.ai/v1/stt/${this.model}-websocket-stream?language=${language}`; this.ws = new WebSocket(url, { headers: { 'Authorization': `Bearer ${this.apiKey}` } }); return new Promise((resolve, reject) => { this.ws.on('open', () => { // Initialize STT session this.ws.send(JSON.stringify({ type: 'start', sample_rate: 16000, encoding: 'pcm16', language: language })); }); this.ws.on('message', (data) => { const msg = JSON.parse(data); if (msg.type === 'ready') { console.log('STT ready:', msg.model_id); resolve(); } if (msg.type === 'transcript') { console.log(`${msg.is_final ? 'Final' : 'Partial'}: ${msg.text}`); } if (msg.type === 'final_transcript') { console.log(`Complete transcription (${msg.audio_duration_seconds}s): ${msg.text}`); } }); }); } streamAudio(audioBuffer) { // Send raw PCM audio (16kHz, 16-bit, mono) this.ws.send(audioBuffer); } finalize() { // Get final transcription this.ws.send(JSON.stringify({ type: 'finalize' })); }}// Usageasync function transcribe() { const stt = new STTClient('YOUR_API_KEY', 'kyutai'); await stt.connect('en'); // Stream audio from file or microphone const audioData = fs.readFileSync('audio.pcm'); stt.streamAudio(audioData); // Get final result stt.finalize();}transcribe().catch(console.error);
Limits & Safety
Gateway-Level Limits (Applied to All Providers)
Account Tier Limits
Limit
Free
Pro ($49/mo)
Enterprise
Concurrent connections
1
5
50
Max connection duration
5 minutes
30 minutes
120 minutes
Max message size
16KB
32KB
128KB
Messages per minute
20
60
120
Max text per message
500 chars
2,000 chars
10,000 chars
Max audio chunk
16KB
32KB
64KB
Idle timeout
5 minutes
5 minutes
5 minutes
Heartbeat interval
30 seconds
30 seconds
30 seconds
Provider-Specific Limits
These are enforced by the upstream providers in addition to our gateway limits: