Speech-to-Text

STT HTTP Examples

Complete, copy-paste ready examples for transcribing audio files using HTTP.

Quick Start

Basic File Transcription

Code
 
curl https://api.slng.ai/v1/stt/slng/openai/whisper:large-v3 \
  -H "Authorization: Bearer YOUR_API_KEY" \
  -H "Content-Type: application/json" \
  -d '{"url":"https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav","language":"en"}'

Response:


Code
 
{{
  "text": "Micro-Machine Man, presenting the most midget...",
  "language": "en",
  "segments": [
    {
      "start": 1.09,
      "end": 30.302,
      "text":"Micro-Machine Man, presenting the most midget...",
      "speaker": null
    },
    {
      "start": 30.594,
      "end": 58.558,
      "text": " Where is the boat with the vehicle, and the man with the gunpowder at the end of his screen? You call it the car wash. Where is the toll bridge? And these play sets fit together to form a micromachine world. Micromachine pocket play sets. Such a menace in driving so perfectly precise and dazzlingly detailed. You ought to pocket them all. Micromachines are micromachine pocket play sets. They certainly find a home. The smaller they are, the better they are.",
      "speaker": null
    }
  ],
  "model_time": 2.6508004665374756,
  "audio_duration": 59.77675,
  "duration": 57.467999999999996
}

HTTP Examples by Language

cURL

Basic Audio File

Code
 
curl https://api.slng.ai/v1/stt/slng/openai/whisper:large-v3 \
  -H "Authorization: Bearer YOUR_API_KEY" \
  -F "[email protected]"

With Language Hint

Code
 
curl https://api.slng.ai/v1/stt/deepgram/nova:2 \
  -H "Authorization: Bearer YOUR_API_KEY" \
  -F "[email protected]" \
  -F "language=en"

Multiple Audio Formats

Supported formats: MP3, WAV, FLAC, OGG, M4A, WebM

Code
 
# WAV file
curl https://api.slng.ai/v1/stt/slng/openai/whisper:large-v3 \
  -H "Authorization: Bearer YOUR_API_KEY" \
  -F "[email protected]"

# FLAC file
curl https://api.slng.ai/v1/stt/slng/openai/whisper:large-v3 \
  -H "Authorization: Bearer YOUR_API_KEY" \
  -F "[email protected]"

JavaScript/TypeScript

Basic File Upload


Code
 
async function transcribeAudio(audioFile) {
  const formData = new FormData();
  formData.append("file", audioFile);

  const response = await fetch(
    "https://api.slng.ai/v1/stt/slng/openai/whisper:large-v3",
    {
      method: "POST",
      headers: {
        Authorization: "Bearer YOUR_API_KEY",
      },
      body: formData,
    },
  );

  const result = await response.json();
  return result.text;
}

// Usage with file input
const fileInput = document.querySelector('input[type="file"]');
fileInput.addEventListener("change", async (e) => {
  const file = e.target.files[0];
  const transcription = await transcribeAudio(file);
  console.log("Transcription:", transcription);
});

Complete Implementation with Error Handling


Code
 
interface TranscriptionSegment {
  start: number;
  end: number;
  text: string;
}

interface TranscriptionResult {
  text: string;
  segments: TranscriptionSegment[];
  language?: string;
  duration?: number;
}

async function transcribeFile(
  file: File,
  provider: string = "slng",
  model: string = "whisper",
  variant: string = "large-v3",
  apiKey: string = "YOUR_API_KEY",
): Promise<TranscriptionResult> {
  const url = `https://api.slng.ai/v1/stt/${provider}/${model}:${variant}`;

  const formData = new FormData();
  formData.append("file", file);

  try {
    const response = await fetch(url, {
      method: "POST",
      headers: {
        Authorization: `Bearer ${apiKey}`,
      },
      body: formData,
    });

    if (!response.ok) {
      const error = await response.text();
      throw new Error(`HTTP ${response.status}: ${error}`);
    }

    const result = await response.json();
    return result;
  } catch (error) {
    console.error("Transcription error:", error);
    throw error;
  }
}

// Usage
const file = document.querySelector('input[type="file"]').files[0];
const result = await transcribeFile(file);

console.log("Full text:", result.text);
console.log("Segments with timestamps:", result.segments);

Batch Processing Multiple Files


Code
 
async function transcribeBatch(audioFiles) {
  const results = await Promise.all(
    audioFiles.map(async (file) => {
      try {
        const formData = new FormData();
        formData.append("file", file);

        const response = await fetch(
          "https://api.slng.ai/v1/stt/slng/openai/whisper:large-v3",
          {
            method: "POST",
            headers: {
              Authorization: "Bearer YOUR_API_KEY",
            },
            body: formData,
          },
        );

        const result = await response.json();
        return {
          filename: file.name,
          transcription: result.text,
          segments: result.segments,
          success: true,
        };
      } catch (error) {
        return {
          filename: file.name,
          error: error.message,
          success: false,
        };
      }
    }),
  );

  return results;
}

// Usage
const files = Array.from(document.querySelector('input[type="file"]').files);
const results = await transcribeBatch(files);

results.forEach((result) => {
  if (result.success) {
    console.log(`${result.filename}: ${result.transcription}`);
  } else {
    console.error(`${result.filename} failed: ${result.error}`);
  }
});

Using Deepgram Nova for Real-time Files


Code
 
async function transcribeWithDeepgram(audioFile) {
  const formData = new FormData();
  formData.append("file", audioFile);
  formData.append("language", "en");
  formData.append("punctuate", "true");
  formData.append("diarize", "true"); // Speaker identification

  const response = await fetch("https://api.slng.ai/v1/stt/deepgram/nova:2", {
    method: "POST",
    headers: {
      Authorization: "Bearer YOUR_API_KEY",
    },
    body: formData,
  });

  const result = await response.json();
  return result;
}

Python

Basic File Transcription


Code
 
import requests

def transcribe_audio(file_path: str, api_key: str = "YOUR_API_KEY") -> str:
    """
    Transcribe an audio file using SLNG API.

    Args:
        file_path: Path to audio file
        api_key: SLNG API key

    Returns:
        Transcribed text
    """
    url = "https://api.slng.ai/v1/stt/slng/openai/whisper:large-v3"

    headers = {
        "Authorization": f"Bearer {api_key}"
    }

    with open(file_path, "rb") as audio_file:
        files = {"file": audio_file}
        response = requests.post(url, headers=headers, files=files)

    response.raise_for_status()
    result = response.json()

    return result["text"]

# Usage
transcription = transcribe_audio("recording.mp3")
print(transcription)

Complete Implementation with Segments


Code
 
import requests
from typing import List, Dict, Optional
from pathlib import Path

class TranscriptionSegment:
    def __init__(self, start: float, end: float, text: str):
        self.start = start
        self.end = end
        self.text = text

    def __repr__(self):
        return f"[{self.start:.2f}s - {self.end:.2f}s]: {self.text}"

class TranscriptionResult:
    def __init__(self, text: str, segments: List[Dict]):
        self.text = text
        self.segments = [
            TranscriptionSegment(s["start"], s["end"], s["text"])
            for s in segments
        ]

    def get_text_at_time(self, time: float) -> Optional[str]:
        """Get the text being spoken at a specific time."""
        for segment in self.segments:
            if segment.start <= time <= segment.end:
                return segment.text
        return None

def transcribe_file(
    file_path: str,
    provider: str = "slng",
    model: str = "whisper",
    variant: str = "large-v3",
    api_key: str = "YOUR_API_KEY"
) -> TranscriptionResult:
    """
    Transcribe an audio file with full segment information.

    Args:
        file_path: Path to audio file
        provider: STT provider (slng, deepgram)
        model: Model name (whisper, nova)
        variant: Model variant (large-v3, 2)
        api_key: SLNG API key

    Returns:
        TranscriptionResult with text and segments
    """
    url = f"https://api.slng.ai/v1/stt/{provider}/{model}:{variant}"

    headers = {
        "Authorization": f"Bearer {api_key}"
    }

    file_path = Path(file_path)
    if not file_path.exists():
        raise FileNotFoundError(f"Audio file not found: {file_path}")

    with open(file_path, "rb") as audio_file:
        files = {"file": (file_path.name, audio_file, "audio/mpeg")}

        try:
            response = requests.post(url, headers=headers, files=files, timeout=120)
            response.raise_for_status()
            result = response.json()

            return TranscriptionResult(
                text=result["text"],
                segments=result.get("segments", [])
            )

        except requests.exceptions.HTTPError as e:
            print(f"HTTP Error: {e}")
            print(f"Response: {response.text}")
            raise
        except requests.exceptions.RequestException as e:
            print(f"Request Error: {e}")
            raise

# Usage
result = transcribe_file("meeting.mp3")

print("Full transcription:")
print(result.text)
print("\nSegments with timestamps:")
for segment in result.segments:
    print(segment)

# Get text at specific time
text_at_30s = result.get_text_at_time(30.0)
print(f"\nAt 30 seconds: {text_at_30s}")

Batch Processing Directory


Code
 
import os
from pathlib import Path
from concurrent.futures import ThreadPoolExecutor, as_completed

def transcribe_directory(
    directory: str,
    api_key: str = "YOUR_API_KEY",
    max_workers: int = 5
) -> Dict[str, str]:
    """
    Transcribe all audio files in a directory.

    Args:
        directory: Path to directory containing audio files
        api_key: SLNG API key
        max_workers: Maximum concurrent transcriptions

    Returns:
        Dictionary mapping filenames to transcriptions
    """
    audio_extensions = {'.mp3', '.wav', '.flac', '.m4a', '.ogg'}
    directory = Path(directory)

    audio_files = [
        f for f in directory.iterdir()
        if f.suffix.lower() in audio_extensions
    ]

    print(f"Found {len(audio_files)} audio files")

    results = {}

    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        # Submit all transcription tasks
        future_to_file = {
            executor.submit(transcribe_file, str(f), api_key=api_key): f
            for f in audio_files
        }

        # Process completed tasks
        for future in as_completed(future_to_file):
            file_path = future_to_file[future]
            try:
                result = future.result()
                results[file_path.name] = result.text
                print(f"✓ {file_path.name}")
            except Exception as e:
                print(f"✗ {file_path.name}: {e}")
                results[file_path.name] = f"ERROR: {e}"

    return results

# Usage
results = transcribe_directory("./audio_files", max_workers=3)

# Save to file
with open("transcriptions.txt", "w") as f:
    for filename, transcription in results.items():
        f.write(f"=== {filename} ===\n")
        f.write(f"{transcription}\n\n")

print("Transcriptions saved to transcriptions.txt")

Model Comparison

SLNG Whisper (High Accuracy)

Best for: Batch transcription, meetings, podcasts, content creation

Code
 
curl https://api.slng.ai/v1/stt/slng/openai/whisper:large-v3 \
  -H "Authorization: Bearer YOUR_API_KEY" \
  -F "[email protected]"

Features:

Highest accuracy
Multi-language support (100+ languages)
Robust to background noise
Batch processing optimized
Segment timestamps included

Deepgram Nova (Real-time Optimized)

Best for: Near-real-time file processing, customer calls, IVR recordings

Code
 
curl https://api.slng.ai/v1/stt/deepgram/nova:2 \
  -H "Authorization: Bearer YOUR_API_KEY" \
  -F "[email protected]" \
  -F "language=en" \
  -F "punctuate=true" \
  -F "diarize=true"

Features:

Fast processing
Speaker diarization
Automatic punctuation
Word-level timestamps
Confidence scores

Common Parameters

SLNG Whisper

Parameter	Type	Description
`file`	file	Audio file (required)
`language`	string	Language code (optional, auto-detected)
`temperature`	number	Sampling temperature 0-1 (default: 0)

Deepgram Nova

Parameter	Type	Description
`file`	file	Audio file (required)
`language`	string	Language code (`en`, `es`, `fr`, etc.)
`punctuate`	boolean	Add punctuation (default: false)
`diarize`	boolean	Speaker identification (default: false)
`smart_format`	boolean	Format numbers, dates (default: false)

Response Format

Standard Response


Code
 
{
  "text": "Complete transcription text goes here",
  "segments": [
    {
      "start": 0.0,
      "end": 2.5,
      "text": "First segment of speech"
    },
    {
      "start": 2.5,
      "end": 5.8,
      "text": "Second segment of speech"
    }
  ],
  "language": "en",
  "duration": 5.8
}

With Diarization (Deepgram)


Code
 
{
  "text": "Complete transcription text",
  "segments": [
    {
      "start": 0.0,
      "end": 2.5,
      "text": "Hello, how can I help you?",
      "speaker": 0
    },
    {
      "start": 2.5,
      "end": 5.0,
      "text": "I need help with my account",
      "speaker": 1
    }
  ]
}

Error Handling

HTTP Status Codes

Code	Meaning	Solution
`200`	Success	Transcription returned
`400`	Bad Request	Check file format and parameters
`401`	Unauthorized	Verify API key
`413`	File Too Large	Reduce file size or split audio
`415`	Unsupported Format	Convert to supported format
`429`	Rate Limited	Implement backoff
`500`	Server Error	Retry with exponential backoff

Example with Retry Logic


Code
 
import time
from requests.exceptions import HTTPError

def transcribe_with_retry(
    file_path: str,
    max_retries: int = 3,
    api_key: str = "YOUR_API_KEY"
) -> str:
    """Transcribe with exponential backoff retry."""

    for attempt in range(max_retries):
        try:
            url = "https://api.slng.ai/v1/stt/slng/openai/whisper:large-v3"
            headers = {"Authorization": f"Bearer {api_key}"}

            with open(file_path, "rb") as f:
                files = {"file": f}
                response = requests.post(url, headers=headers, files=files, timeout=120)

            if response.status_code == 429:
                # Rate limited - wait and retry
                delay = (2 ** attempt) * 1000  # Exponential backoff
                print(f"Rate limited. Waiting {delay}ms...")
                time.sleep(delay / 1000)
                continue

            response.raise_for_status()
            return response.json()["text"]

        except HTTPError as e:
            if attempt == max_retries - 1:
                raise
            print(f"Attempt {attempt + 1} failed: {e}")
            time.sleep(2 ** attempt)

    raise Exception("Max retries exceeded")

# Usage
text = transcribe_with_retry("audio.mp3")

Use Cases

Meeting Transcription


Code
 
result = transcribe_file("team_meeting.mp3")

# Generate meeting summary
print("=== MEETING TRANSCRIPT ===\n")
print(result.text)
print("\n=== TIMELINE ===\n")
for segment in result.segments:
    minutes = int(segment.start // 60)
    seconds = int(segment.start % 60)
    print(f"[{minutes:02d}:{seconds:02d}] {segment.text}")

Podcast Episode


Code
 
# Transcribe long-form content
result = transcribe_file("podcast_episode.mp3", model="whisper", variant="large-v3")

# Create searchable transcript with timestamps
with open("episode_transcript.txt", "w") as f:
    f.write(f"Full Text:\n{result.text}\n\n")
    f.write("Timestamped Segments:\n")
    for segment in result.segments:
        f.write(f"[{segment.start:.1f}s] {segment.text}\n")

Customer Call Analysis


Code
 
# Use Deepgram for speaker separation
result = transcribe_file(
    "customer_call.wav",
    provider="deepgram",
    model="nova",
    variant="2"
)

# Separate by speaker
agent_text = []
customer_text = []

for segment in result.segments:
    if segment.get("speaker") == 0:
        agent_text.append(segment.text)
    else:
        customer_text.append(segment.text)

print("Agent:", " ".join(agent_text))
print("Customer:", " ".join(customer_text))

Last modified on February 12, 2026

TTS WebSocket Examples STT WebSocket Examples

Speech-to-Text

STT HTTP Examples

Complete, copy-paste ready examples for transcribing audio files using HTTP.

Quick Start

Basic File Transcription

Code
 
curl https://api.slng.ai/v1/stt/slng/openai/whisper:large-v3 \
  -H "Authorization: Bearer YOUR_API_KEY" \
  -H "Content-Type: application/json" \
  -d '{"url":"https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav","language":"en"}'

Response:


Code
 
{{
  "text": "Micro-Machine Man, presenting the most midget...",
  "language": "en",
  "segments": [
    {
      "start": 1.09,
      "end": 30.302,
      "text":"Micro-Machine Man, presenting the most midget...",
      "speaker": null
    },
    {
      "start": 30.594,
      "end": 58.558,
      "text": " Where is the boat with the vehicle, and the man with the gunpowder at the end of his screen? You call it the car wash. Where is the toll bridge? And these play sets fit together to form a micromachine world. Micromachine pocket play sets. Such a menace in driving so perfectly precise and dazzlingly detailed. You ought to pocket them all. Micromachines are micromachine pocket play sets. They certainly find a home. The smaller they are, the better they are.",
      "speaker": null
    }
  ],
  "model_time": 2.6508004665374756,
  "audio_duration": 59.77675,
  "duration": 57.467999999999996
}

HTTP Examples by Language

cURL

Basic Audio File

Code
 
curl https://api.slng.ai/v1/stt/slng/openai/whisper:large-v3 \
  -H "Authorization: Bearer YOUR_API_KEY" \
  -F "[email protected]"

With Language Hint

Code
 
curl https://api.slng.ai/v1/stt/deepgram/nova:2 \
  -H "Authorization: Bearer YOUR_API_KEY" \
  -F "[email protected]" \
  -F "language=en"

Multiple Audio Formats

Supported formats: MP3, WAV, FLAC, OGG, M4A, WebM

Code
 
# WAV file
curl https://api.slng.ai/v1/stt/slng/openai/whisper:large-v3 \
  -H "Authorization: Bearer YOUR_API_KEY" \
  -F "[email protected]"

# FLAC file
curl https://api.slng.ai/v1/stt/slng/openai/whisper:large-v3 \
  -H "Authorization: Bearer YOUR_API_KEY" \
  -F "[email protected]"

JavaScript/TypeScript

Basic File Upload


Code
 
async function transcribeAudio(audioFile) {
  const formData = new FormData();
  formData.append("file", audioFile);

  const response = await fetch(
    "https://api.slng.ai/v1/stt/slng/openai/whisper:large-v3",
    {
      method: "POST",
      headers: {
        Authorization: "Bearer YOUR_API_KEY",
      },
      body: formData,
    },
  );

  const result = await response.json();
  return result.text;
}

// Usage with file input
const fileInput = document.querySelector('input[type="file"]');
fileInput.addEventListener("change", async (e) => {
  const file = e.target.files[0];
  const transcription = await transcribeAudio(file);
  console.log("Transcription:", transcription);
});

Complete Implementation with Error Handling


Code
 
interface TranscriptionSegment {
  start: number;
  end: number;
  text: string;
}

interface TranscriptionResult {
  text: string;
  segments: TranscriptionSegment[];
  language?: string;
  duration?: number;
}

async function transcribeFile(
  file: File,
  provider: string = "slng",
  model: string = "whisper",
  variant: string = "large-v3",
  apiKey: string = "YOUR_API_KEY",
): Promise<TranscriptionResult> {
  const url = `https://api.slng.ai/v1/stt/${provider}/${model}:${variant}`;

  const formData = new FormData();
  formData.append("file", file);

  try {
    const response = await fetch(url, {
      method: "POST",
      headers: {
        Authorization: `Bearer ${apiKey}`,
      },
      body: formData,
    });

    if (!response.ok) {
      const error = await response.text();
      throw new Error(`HTTP ${response.status}: ${error}`);
    }

    const result = await response.json();
    return result;
  } catch (error) {
    console.error("Transcription error:", error);
    throw error;
  }
}

// Usage
const file = document.querySelector('input[type="file"]').files[0];
const result = await transcribeFile(file);

console.log("Full text:", result.text);
console.log("Segments with timestamps:", result.segments);

Batch Processing Multiple Files


Code
 
async function transcribeBatch(audioFiles) {
  const results = await Promise.all(
    audioFiles.map(async (file) => {
      try {
        const formData = new FormData();
        formData.append("file", file);

        const response = await fetch(
          "https://api.slng.ai/v1/stt/slng/openai/whisper:large-v3",
          {
            method: "POST",
            headers: {
              Authorization: "Bearer YOUR_API_KEY",
            },
            body: formData,
          },
        );

        const result = await response.json();
        return {
          filename: file.name,
          transcription: result.text,
          segments: result.segments,
          success: true,
        };
      } catch (error) {
        return {
          filename: file.name,
          error: error.message,
          success: false,
        };
      }
    }),
  );

  return results;
}

// Usage
const files = Array.from(document.querySelector('input[type="file"]').files);
const results = await transcribeBatch(files);

results.forEach((result) => {
  if (result.success) {
    console.log(`${result.filename}: ${result.transcription}`);
  } else {
    console.error(`${result.filename} failed: ${result.error}`);
  }
});

Using Deepgram Nova for Real-time Files


Code
 
async function transcribeWithDeepgram(audioFile) {
  const formData = new FormData();
  formData.append("file", audioFile);
  formData.append("language", "en");
  formData.append("punctuate", "true");
  formData.append("diarize", "true"); // Speaker identification

  const response = await fetch("https://api.slng.ai/v1/stt/deepgram/nova:2", {
    method: "POST",
    headers: {
      Authorization: "Bearer YOUR_API_KEY",
    },
    body: formData,
  });

  const result = await response.json();
  return result;
}

Python

Basic File Transcription


Code
 
import requests

def transcribe_audio(file_path: str, api_key: str = "YOUR_API_KEY") -> str:
    """
    Transcribe an audio file using SLNG API.

    Args:
        file_path: Path to audio file
        api_key: SLNG API key

    Returns:
        Transcribed text
    """
    url = "https://api.slng.ai/v1/stt/slng/openai/whisper:large-v3"

    headers = {
        "Authorization": f"Bearer {api_key}"
    }

    with open(file_path, "rb") as audio_file:
        files = {"file": audio_file}
        response = requests.post(url, headers=headers, files=files)

    response.raise_for_status()
    result = response.json()

    return result["text"]

# Usage
transcription = transcribe_audio("recording.mp3")
print(transcription)

Complete Implementation with Segments


Code
 
import requests
from typing import List, Dict, Optional
from pathlib import Path

class TranscriptionSegment:
    def __init__(self, start: float, end: float, text: str):
        self.start = start
        self.end = end
        self.text = text

    def __repr__(self):
        return f"[{self.start:.2f}s - {self.end:.2f}s]: {self.text}"

class TranscriptionResult:
    def __init__(self, text: str, segments: List[Dict]):
        self.text = text
        self.segments = [
            TranscriptionSegment(s["start"], s["end"], s["text"])
            for s in segments
        ]

    def get_text_at_time(self, time: float) -> Optional[str]:
        """Get the text being spoken at a specific time."""
        for segment in self.segments:
            if segment.start <= time <= segment.end:
                return segment.text
        return None

def transcribe_file(
    file_path: str,
    provider: str = "slng",
    model: str = "whisper",
    variant: str = "large-v3",
    api_key: str = "YOUR_API_KEY"
) -> TranscriptionResult:
    """
    Transcribe an audio file with full segment information.

    Args:
        file_path: Path to audio file
        provider: STT provider (slng, deepgram)
        model: Model name (whisper, nova)
        variant: Model variant (large-v3, 2)
        api_key: SLNG API key

    Returns:
        TranscriptionResult with text and segments
    """
    url = f"https://api.slng.ai/v1/stt/{provider}/{model}:{variant}"

    headers = {
        "Authorization": f"Bearer {api_key}"
    }

    file_path = Path(file_path)
    if not file_path.exists():
        raise FileNotFoundError(f"Audio file not found: {file_path}")

    with open(file_path, "rb") as audio_file:
        files = {"file": (file_path.name, audio_file, "audio/mpeg")}

        try:
            response = requests.post(url, headers=headers, files=files, timeout=120)
            response.raise_for_status()
            result = response.json()

            return TranscriptionResult(
                text=result["text"],
                segments=result.get("segments", [])
            )

        except requests.exceptions.HTTPError as e:
            print(f"HTTP Error: {e}")
            print(f"Response: {response.text}")
            raise
        except requests.exceptions.RequestException as e:
            print(f"Request Error: {e}")
            raise

# Usage
result = transcribe_file("meeting.mp3")

print("Full transcription:")
print(result.text)
print("\nSegments with timestamps:")
for segment in result.segments:
    print(segment)

# Get text at specific time
text_at_30s = result.get_text_at_time(30.0)
print(f"\nAt 30 seconds: {text_at_30s}")

Batch Processing Directory


Code
 
import os
from pathlib import Path
from concurrent.futures import ThreadPoolExecutor, as_completed

def transcribe_directory(
    directory: str,
    api_key: str = "YOUR_API_KEY",
    max_workers: int = 5
) -> Dict[str, str]:
    """
    Transcribe all audio files in a directory.

    Args:
        directory: Path to directory containing audio files
        api_key: SLNG API key
        max_workers: Maximum concurrent transcriptions

    Returns:
        Dictionary mapping filenames to transcriptions
    """
    audio_extensions = {'.mp3', '.wav', '.flac', '.m4a', '.ogg'}
    directory = Path(directory)

    audio_files = [
        f for f in directory.iterdir()
        if f.suffix.lower() in audio_extensions
    ]

    print(f"Found {len(audio_files)} audio files")

    results = {}

    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        # Submit all transcription tasks
        future_to_file = {
            executor.submit(transcribe_file, str(f), api_key=api_key): f
            for f in audio_files
        }

        # Process completed tasks
        for future in as_completed(future_to_file):
            file_path = future_to_file[future]
            try:
                result = future.result()
                results[file_path.name] = result.text
                print(f"✓ {file_path.name}")
            except Exception as e:
                print(f"✗ {file_path.name}: {e}")
                results[file_path.name] = f"ERROR: {e}"

    return results

# Usage
results = transcribe_directory("./audio_files", max_workers=3)

# Save to file
with open("transcriptions.txt", "w") as f:
    for filename, transcription in results.items():
        f.write(f"=== {filename} ===\n")
        f.write(f"{transcription}\n\n")

print("Transcriptions saved to transcriptions.txt")

Model Comparison

SLNG Whisper (High Accuracy)

Best for: Batch transcription, meetings, podcasts, content creation

Code
 
curl https://api.slng.ai/v1/stt/slng/openai/whisper:large-v3 \
  -H "Authorization: Bearer YOUR_API_KEY" \
  -F "[email protected]"

Features:

Highest accuracy
Multi-language support (100+ languages)
Robust to background noise
Batch processing optimized
Segment timestamps included

Deepgram Nova (Real-time Optimized)

Best for: Near-real-time file processing, customer calls, IVR recordings

Code
 
curl https://api.slng.ai/v1/stt/deepgram/nova:2 \
  -H "Authorization: Bearer YOUR_API_KEY" \
  -F "[email protected]" \
  -F "language=en" \
  -F "punctuate=true" \
  -F "diarize=true"

Features:

Fast processing
Speaker diarization
Automatic punctuation
Word-level timestamps
Confidence scores

Common Parameters

SLNG Whisper

Parameter	Type	Description
`file`	file	Audio file (required)
`language`	string	Language code (optional, auto-detected)
`temperature`	number	Sampling temperature 0-1 (default: 0)

Deepgram Nova

Parameter	Type	Description
`file`	file	Audio file (required)
`language`	string	Language code (`en`, `es`, `fr`, etc.)
`punctuate`	boolean	Add punctuation (default: false)
`diarize`	boolean	Speaker identification (default: false)
`smart_format`	boolean	Format numbers, dates (default: false)

Response Format

Standard Response


Code
 
{
  "text": "Complete transcription text goes here",
  "segments": [
    {
      "start": 0.0,
      "end": 2.5,
      "text": "First segment of speech"
    },
    {
      "start": 2.5,
      "end": 5.8,
      "text": "Second segment of speech"
    }
  ],
  "language": "en",
  "duration": 5.8
}

With Diarization (Deepgram)


Code
 
{
  "text": "Complete transcription text",
  "segments": [
    {
      "start": 0.0,
      "end": 2.5,
      "text": "Hello, how can I help you?",
      "speaker": 0
    },
    {
      "start": 2.5,
      "end": 5.0,
      "text": "I need help with my account",
      "speaker": 1
    }
  ]
}

Error Handling

HTTP Status Codes

Code	Meaning	Solution
`200`	Success	Transcription returned
`400`	Bad Request	Check file format and parameters
`401`	Unauthorized	Verify API key
`413`	File Too Large	Reduce file size or split audio
`415`	Unsupported Format	Convert to supported format
`429`	Rate Limited	Implement backoff
`500`	Server Error	Retry with exponential backoff

Example with Retry Logic


Code
 
import time
from requests.exceptions import HTTPError

def transcribe_with_retry(
    file_path: str,
    max_retries: int = 3,
    api_key: str = "YOUR_API_KEY"
) -> str:
    """Transcribe with exponential backoff retry."""

    for attempt in range(max_retries):
        try:
            url = "https://api.slng.ai/v1/stt/slng/openai/whisper:large-v3"
            headers = {"Authorization": f"Bearer {api_key}"}

            with open(file_path, "rb") as f:
                files = {"file": f}
                response = requests.post(url, headers=headers, files=files, timeout=120)

            if response.status_code == 429:
                # Rate limited - wait and retry
                delay = (2 ** attempt) * 1000  # Exponential backoff
                print(f"Rate limited. Waiting {delay}ms...")
                time.sleep(delay / 1000)
                continue

            response.raise_for_status()
            return response.json()["text"]

        except HTTPError as e:
            if attempt == max_retries - 1:
                raise
            print(f"Attempt {attempt + 1} failed: {e}")
            time.sleep(2 ** attempt)

    raise Exception("Max retries exceeded")

# Usage
text = transcribe_with_retry("audio.mp3")

Use Cases

Meeting Transcription


Code
 
result = transcribe_file("team_meeting.mp3")

# Generate meeting summary
print("=== MEETING TRANSCRIPT ===\n")
print(result.text)
print("\n=== TIMELINE ===\n")
for segment in result.segments:
    minutes = int(segment.start // 60)
    seconds = int(segment.start % 60)
    print(f"[{minutes:02d}:{seconds:02d}] {segment.text}")

Podcast Episode


Code
 
# Transcribe long-form content
result = transcribe_file("podcast_episode.mp3", model="whisper", variant="large-v3")

# Create searchable transcript with timestamps
with open("episode_transcript.txt", "w") as f:
    f.write(f"Full Text:\n{result.text}\n\n")
    f.write("Timestamped Segments:\n")
    for segment in result.segments:
        f.write(f"[{segment.start:.1f}s] {segment.text}\n")

Customer Call Analysis


Code
 
# Use Deepgram for speaker separation
result = transcribe_file(
    "customer_call.wav",
    provider="deepgram",
    model="nova",
    variant="2"
)

# Separate by speaker
agent_text = []
customer_text = []

for segment in result.segments:
    if segment.get("speaker") == 0:
        agent_text.append(segment.text)
    else:
        customer_text.append(segment.text)

print("Agent:", " ".join(agent_text))
print("Customer:", " ".join(customer_text))

Last modified on February 12, 2026

TTS WebSocket Examples STT WebSocket Examples