Complete, copy-paste ready examples for transcribing audio files using HTTP.
Quick Start
Basic File Transcription
Codecurl https://api.slng.ai/v1/stt/slng/openai/whisper:large-v3 \ -H "Authorization: Bearer YOUR_API_KEY" \ -H "Content-Type: application/json" \ -d '{"url":"https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav","language":"en"}'
Response:
Code{{ "text": "Micro-Machine Man, presenting the most midget...", "language": "en", "segments": [ { "start": 1.09, "end": 30.302, "text":"Micro-Machine Man, presenting the most midget...", "speaker": null }, { "start": 30.594, "end": 58.558, "text": " Where is the boat with the vehicle, and the man with the gunpowder at the end of his screen? You call it the car wash. Where is the toll bridge? And these play sets fit together to form a micromachine world. Micromachine pocket play sets. Such a menace in driving so perfectly precise and dazzlingly detailed. You ought to pocket them all. Micromachines are micromachine pocket play sets. They certainly find a home. The smaller they are, the better they are.", "speaker": null } ], "model_time": 2.6508004665374756, "audio_duration": 59.77675, "duration": 57.467999999999996 }
HTTP Examples by Language
cURL
Basic Audio File
Codecurl https://api.slng.ai/v1/stt/slng/openai/whisper:large-v3 \ -H "Authorization: Bearer YOUR_API_KEY" \ -F "[email protected]"
With Language Hint
Codecurl https://api.slng.ai/v1/stt/deepgram/nova:2 \ -H "Authorization: Bearer YOUR_API_KEY" \ -F "[email protected]" \ -F "language=en"
Multiple Audio Formats
Supported formats: MP3, WAV, FLAC, OGG, M4A, WebM
Code# WAV file curl https://api.slng.ai/v1/stt/slng/openai/whisper:large-v3 \ -H "Authorization: Bearer YOUR_API_KEY" \ -F "[email protected]" # FLAC file curl https://api.slng.ai/v1/stt/slng/openai/whisper:large-v3 \ -H "Authorization: Bearer YOUR_API_KEY" \ -F "[email protected]"
JavaScript/TypeScript
Basic File Upload
Codeasync function transcribeAudio(audioFile) { const formData = new FormData(); formData.append("file", audioFile); const response = await fetch( "https://api.slng.ai/v1/stt/slng/openai/whisper:large-v3", { method: "POST", headers: { Authorization: "Bearer YOUR_API_KEY", }, body: formData, }, ); const result = await response.json(); return result.text; } // Usage with file input const fileInput = document.querySelector('input[type="file"]'); fileInput.addEventListener("change", async (e) => { const file = e.target.files[0]; const transcription = await transcribeAudio(file); console.log("Transcription:", transcription); });
Complete Implementation with Error Handling
Codeinterface TranscriptionSegment { start: number; end: number; text: string; } interface TranscriptionResult { text: string; segments: TranscriptionSegment[]; language?: string; duration?: number; } async function transcribeFile( file: File, provider: string = "slng", model: string = "whisper", variant: string = "large-v3", apiKey: string = "YOUR_API_KEY", ): Promise<TranscriptionResult> { const url = `https://api.slng.ai/v1/stt/${provider}/${model}:${variant}`; const formData = new FormData(); formData.append("file", file); try { const response = await fetch(url, { method: "POST", headers: { Authorization: `Bearer ${apiKey}`, }, body: formData, }); if (!response.ok) { const error = await response.text(); throw new Error(`HTTP ${response.status}: ${error}`); } const result = await response.json(); return result; } catch (error) { console.error("Transcription error:", error); throw error; } } // Usage const file = document.querySelector('input[type="file"]').files[0]; const result = await transcribeFile(file); console.log("Full text:", result.text); console.log("Segments with timestamps:", result.segments);
Batch Processing Multiple Files
Codeasync function transcribeBatch(audioFiles) { const results = await Promise.all( audioFiles.map(async (file) => { try { const formData = new FormData(); formData.append("file", file); const response = await fetch( "https://api.slng.ai/v1/stt/slng/openai/whisper:large-v3", { method: "POST", headers: { Authorization: "Bearer YOUR_API_KEY", }, body: formData, }, ); const result = await response.json(); return { filename: file.name, transcription: result.text, segments: result.segments, success: true, }; } catch (error) { return { filename: file.name, error: error.message, success: false, }; } }), ); return results; } // Usage const files = Array.from(document.querySelector('input[type="file"]').files); const results = await transcribeBatch(files); results.forEach((result) => { if (result.success) { console.log(`${result.filename}: ${result.transcription}`); } else { console.error(`${result.filename} failed: ${result.error}`); } });
Using Deepgram Nova for Real-time Files
Codeasync function transcribeWithDeepgram(audioFile) { const formData = new FormData(); formData.append("file", audioFile); formData.append("language", "en"); formData.append("punctuate", "true"); formData.append("diarize", "true"); // Speaker identification const response = await fetch("https://api.slng.ai/v1/stt/deepgram/nova:2", { method: "POST", headers: { Authorization: "Bearer YOUR_API_KEY", }, body: formData, }); const result = await response.json(); return result; }
Python
Basic File Transcription
Codeimport requests def transcribe_audio(file_path: str, api_key: str = "YOUR_API_KEY") -> str: """ Transcribe an audio file using SLNG API. Args: file_path: Path to audio file api_key: SLNG API key Returns: Transcribed text """ url = "https://api.slng.ai/v1/stt/slng/openai/whisper:large-v3" headers = { "Authorization": f"Bearer {api_key}" } with open(file_path, "rb") as audio_file: files = {"file": audio_file} response = requests.post(url, headers=headers, files=files) response.raise_for_status() result = response.json() return result["text"] # Usage transcription = transcribe_audio("recording.mp3") print(transcription)
Complete Implementation with Segments
Codeimport requests from typing import List, Dict, Optional from pathlib import Path class TranscriptionSegment: def __init__(self, start: float, end: float, text: str): self.start = start self.end = end self.text = text def __repr__(self): return f"[{self.start:.2f}s - {self.end:.2f}s]: {self.text}" class TranscriptionResult: def __init__(self, text: str, segments: List[Dict]): self.text = text self.segments = [ TranscriptionSegment(s["start"], s["end"], s["text"]) for s in segments ] def get_text_at_time(self, time: float) -> Optional[str]: """Get the text being spoken at a specific time.""" for segment in self.segments: if segment.start <= time <= segment.end: return segment.text return None def transcribe_file( file_path: str, provider: str = "slng", model: str = "whisper", variant: str = "large-v3", api_key: str = "YOUR_API_KEY" ) -> TranscriptionResult: """ Transcribe an audio file with full segment information. Args: file_path: Path to audio file provider: STT provider (slng, deepgram) model: Model name (whisper, nova) variant: Model variant (large-v3, 2) api_key: SLNG API key Returns: TranscriptionResult with text and segments """ url = f"https://api.slng.ai/v1/stt/{provider}/{model}:{variant}" headers = { "Authorization": f"Bearer {api_key}" } file_path = Path(file_path) if not file_path.exists(): raise FileNotFoundError(f"Audio file not found: {file_path}") with open(file_path, "rb") as audio_file: files = {"file": (file_path.name, audio_file, "audio/mpeg")} try: response = requests.post(url, headers=headers, files=files, timeout=120) response.raise_for_status() result = response.json() return TranscriptionResult( text=result["text"], segments=result.get("segments", []) ) except requests.exceptions.HTTPError as e: print(f"HTTP Error: {e}") print(f"Response: {response.text}") raise except requests.exceptions.RequestException as e: print(f"Request Error: {e}") raise # Usage result = transcribe_file("meeting.mp3") print("Full transcription:") print(result.text) print("\nSegments with timestamps:") for segment in result.segments: print(segment) # Get text at specific time text_at_30s = result.get_text_at_time(30.0) print(f"\nAt 30 seconds: {text_at_30s}")
Batch Processing Directory
Codeimport os from pathlib import Path from concurrent.futures import ThreadPoolExecutor, as_completed def transcribe_directory( directory: str, api_key: str = "YOUR_API_KEY", max_workers: int = 5 ) -> Dict[str, str]: """ Transcribe all audio files in a directory. Args: directory: Path to directory containing audio files api_key: SLNG API key max_workers: Maximum concurrent transcriptions Returns: Dictionary mapping filenames to transcriptions """ audio_extensions = {'.mp3', '.wav', '.flac', '.m4a', '.ogg'} directory = Path(directory) audio_files = [ f for f in directory.iterdir() if f.suffix.lower() in audio_extensions ] print(f"Found {len(audio_files)} audio files") results = {} with ThreadPoolExecutor(max_workers=max_workers) as executor: # Submit all transcription tasks future_to_file = { executor.submit(transcribe_file, str(f), api_key=api_key): f for f in audio_files } # Process completed tasks for future in as_completed(future_to_file): file_path = future_to_file[future] try: result = future.result() results[file_path.name] = result.text print(f"✓ {file_path.name}") except Exception as e: print(f"✗ {file_path.name}: {e}") results[file_path.name] = f"ERROR: {e}" return results # Usage results = transcribe_directory("./audio_files", max_workers=3) # Save to file with open("transcriptions.txt", "w") as f: for filename, transcription in results.items(): f.write(f"=== {filename} ===\n") f.write(f"{transcription}\n\n") print("Transcriptions saved to transcriptions.txt")
Model Comparison
SLNG Whisper (High Accuracy)
Best for: Batch transcription, meetings, podcasts, content creation
Codecurl https://api.slng.ai/v1/stt/slng/openai/whisper:large-v3 \ -H "Authorization: Bearer YOUR_API_KEY" \ -F "[email protected]"
Features:
- Highest accuracy
- Multi-language support (100+ languages)
- Robust to background noise
- Batch processing optimized
- Segment timestamps included
Deepgram Nova (Real-time Optimized)
Best for: Near-real-time file processing, customer calls, IVR recordings
Codecurl https://api.slng.ai/v1/stt/deepgram/nova:2 \ -H "Authorization: Bearer YOUR_API_KEY" \ -F "[email protected]" \ -F "language=en" \ -F "punctuate=true" \ -F "diarize=true"
Features:
- Fast processing
- Speaker diarization
- Automatic punctuation
- Word-level timestamps
- Confidence scores
Common Parameters
SLNG Whisper
| Parameter | Type | Description |
|---|---|---|
file | file | Audio file (required) |
language | string | Language code (optional, auto-detected) |
temperature | number | Sampling temperature 0-1 (default: 0) |
Deepgram Nova
| Parameter | Type | Description |
|---|---|---|
file | file | Audio file (required) |
language | string | Language code (en, es, fr, etc.) |
punctuate | boolean | Add punctuation (default: false) |
diarize | boolean | Speaker identification (default: false) |
smart_format | boolean | Format numbers, dates (default: false) |
Response Format
Standard Response
Code{ "text": "Complete transcription text goes here", "segments": [ { "start": 0.0, "end": 2.5, "text": "First segment of speech" }, { "start": 2.5, "end": 5.8, "text": "Second segment of speech" } ], "language": "en", "duration": 5.8 }
With Diarization (Deepgram)
Code{ "text": "Complete transcription text", "segments": [ { "start": 0.0, "end": 2.5, "text": "Hello, how can I help you?", "speaker": 0 }, { "start": 2.5, "end": 5.0, "text": "I need help with my account", "speaker": 1 } ] }
Error Handling
HTTP Status Codes
| Code | Meaning | Solution |
|---|---|---|
200 | Success | Transcription returned |
400 | Bad Request | Check file format and parameters |
401 | Unauthorized | Verify API key |
413 | File Too Large | Reduce file size or split audio |
415 | Unsupported Format | Convert to supported format |
429 | Rate Limited | Implement backoff |
500 | Server Error | Retry with exponential backoff |
Example with Retry Logic
Codeimport time from requests.exceptions import HTTPError def transcribe_with_retry( file_path: str, max_retries: int = 3, api_key: str = "YOUR_API_KEY" ) -> str: """Transcribe with exponential backoff retry.""" for attempt in range(max_retries): try: url = "https://api.slng.ai/v1/stt/slng/openai/whisper:large-v3" headers = {"Authorization": f"Bearer {api_key}"} with open(file_path, "rb") as f: files = {"file": f} response = requests.post(url, headers=headers, files=files, timeout=120) if response.status_code == 429: # Rate limited - wait and retry delay = (2 ** attempt) * 1000 # Exponential backoff print(f"Rate limited. Waiting {delay}ms...") time.sleep(delay / 1000) continue response.raise_for_status() return response.json()["text"] except HTTPError as e: if attempt == max_retries - 1: raise print(f"Attempt {attempt + 1} failed: {e}") time.sleep(2 ** attempt) raise Exception("Max retries exceeded") # Usage text = transcribe_with_retry("audio.mp3")
Use Cases
Meeting Transcription
Coderesult = transcribe_file("team_meeting.mp3") # Generate meeting summary print("=== MEETING TRANSCRIPT ===\n") print(result.text) print("\n=== TIMELINE ===\n") for segment in result.segments: minutes = int(segment.start // 60) seconds = int(segment.start % 60) print(f"[{minutes:02d}:{seconds:02d}] {segment.text}")
Podcast Episode
Code# Transcribe long-form content result = transcribe_file("podcast_episode.mp3", model="whisper", variant="large-v3") # Create searchable transcript with timestamps with open("episode_transcript.txt", "w") as f: f.write(f"Full Text:\n{result.text}\n\n") f.write("Timestamped Segments:\n") for segment in result.segments: f.write(f"[{segment.start:.1f}s] {segment.text}\n")
Customer Call Analysis
Code# Use Deepgram for speaker separation result = transcribe_file( "customer_call.wav", provider="deepgram", model="nova", variant="2" ) # Separate by speaker agent_text = [] customer_text = [] for segment in result.segments: if segment.get("speaker") == 0: agent_text.append(segment.text) else: customer_text.append(segment.text) print("Agent:", " ".join(agent_text)) print("Customer:", " ".join(customer_text))
Last modified on