Deepgram Core Workflow A: Pre-recorded Transcription

Overview

Implement a complete pre-recorded audio transcription workflow using Deepgram's Nova-2 model.

Prerequisites

Completed deepgram-install-auth setup
Understanding of async patterns
Audio files or URLs to transcribe

Instructions

Step 1: Set Up Transcription Service

Create a service class to handle transcription operations.

Step 2: Implement File and URL Transcription

Add methods for both local files and remote URLs.

Step 3: Add Feature Options

Configure punctuation, diarization, and formatting.

Step 4: Process Results

Extract and format transcription results.

Output

Transcription service class
Support for file and URL transcription
Configurable transcription options
Formatted transcript output

Error Handling

Error Cause Solution

Audio Too Long Exceeds limits Split into chunks or use async

Unsupported Format Invalid audio type Convert to WAV/MP3/FLAC

Empty Response No speech detected Check audio quality

Timeout Large file processing Use callback URL pattern

Examples

TypeScript Transcription Service

// services/transcription.ts import { createClient } from '@deepgram/sdk'; import { readFile } from 'fs/promises';

export interface TranscriptionOptions { model?: 'nova-2' | 'nova' | 'enhanced' | 'base'; language?: string; punctuate?: boolean; diarize?: boolean; smartFormat?: boolean; utterances?: boolean; paragraphs?: boolean; }

export interface TranscriptionResult { transcript: string; confidence: number; words: Array<{ word: string; start: number; end: number; confidence: number; }>; utterances?: Array<{ speaker: number; transcript: string; start: number; end: number; }>; }

export class TranscriptionService { private client;

constructor(apiKey: string) { this.client = createClient(apiKey); }

async transcribeUrl( url: string, options: TranscriptionOptions = {} ): Promise<TranscriptionResult> { const { result, error } = await this.client.listen.prerecorded.transcribeUrl( { url }, { model: options.model || 'nova-2', language: options.language || 'en', punctuate: options.punctuate ?? true, diarize: options.diarize ?? false, smart_format: options.smartFormat ?? true, utterances: options.utterances ?? false, paragraphs: options.paragraphs ?? false, } );

if (error) throw new Error(error.message);

return this.formatResult(result);

}

async transcribeFile( filePath: string, options: TranscriptionOptions = {} ): Promise<TranscriptionResult> { const audio = await readFile(filePath); const mimetype = this.getMimeType(filePath);

const { result, error } = await this.client.listen.prerecorded.transcribeFile(
  audio,
  {
    model: options.model || 'nova-2',
    language: options.language || 'en',
    punctuate: options.punctuate ?? true,
    diarize: options.diarize ?? false,
    smart_format: options.smartFormat ?? true,
    mimetype,
  }
);

if (error) throw new Error(error.message);

return this.formatResult(result);

}

private formatResult(result: any): TranscriptionResult { const channel = result.results.channels[0]; const alternative = channel.alternatives[0];

return {
  transcript: alternative.transcript,
  confidence: alternative.confidence,
  words: alternative.words || [],
  utterances: result.results.utterances,
};

}

private getMimeType(filePath: string): string { const ext = filePath.split('.').pop()?.toLowerCase(); const mimeTypes: Record<string, string> = { wav: 'audio/wav', mp3: 'audio/mpeg', flac: 'audio/flac', ogg: 'audio/ogg', m4a: 'audio/mp4', webm: 'audio/webm', }; return mimeTypes[ext || ''] || 'audio/wav'; } }

Batch Transcription

// services/batch-transcription.ts import { TranscriptionService, TranscriptionResult } from './transcription';

export async function batchTranscribe( files: string[], options: { concurrency?: number } = {} ): Promise<Map<string, TranscriptionResult | Error>> { const service = new TranscriptionService(process.env.DEEPGRAM_API_KEY!); const results = new Map<string, TranscriptionResult | Error>(); const concurrency = options.concurrency || 5;

// Process in batches for (let i = 0; i < files.length; i += concurrency) { const batch = files.slice(i, i + concurrency);

const batchResults = await Promise.allSettled(
  batch.map(file => service.transcribeFile(file))
);

batchResults.forEach((result, index) => {
  const file = batch[index];
  if (result.status === 'fulfilled') {
    results.set(file, result.value);
  } else {
    results.set(file, result.reason);
  }
});

}

return results; }

Speaker Diarization

// Example with speaker diarization const result = await service.transcribeFile('./meeting.wav', { diarize: true, utterances: true, });

// Format as conversation result.utterances?.forEach(utterance => { console.log(Speaker ${utterance.speaker}: ${utterance.transcript}); });

Python Example

services/transcription.py

from deepgram import DeepgramClient, PrerecordedOptions, FileSource from pathlib import Path from typing import Optional import mimetypes

class TranscriptionService: def init(self, api_key: str): self.client = DeepgramClient(api_key)

def transcribe_url(
    self,
    url: str,
    model: str = 'nova-2',
    language: str = 'en',
    diarize: bool = False
) -> dict:
    options = PrerecordedOptions(
        model=model,
        language=language,
        smart_format=True,
        punctuate=True,
        diarize=diarize,
    )

    response = self.client.listen.rest.v("1").transcribe_url(
        {"url": url},
        options
    )

    return self._format_result(response)

def transcribe_file(
    self,
    file_path: str,
    model: str = 'nova-2',
    diarize: bool = False
) -> dict:
    with open(file_path, 'rb') as f:
        audio = f.read()

    mimetype, _ = mimetypes.guess_type(file_path)

    source = FileSource(audio, mimetype or 'audio/wav')

    options = PrerecordedOptions(
        model=model,
        smart_format=True,
        punctuate=True,
        diarize=diarize,
    )

    response = self.client.listen.rest.v("1").transcribe_file(
        source,
        options
    )

    return self._format_result(response)

def _format_result(self, response) -> dict:
    channel = response.results.channels[0]
    alternative = channel.alternatives[0]

    return {
        'transcript': alternative.transcript,
        'confidence': alternative.confidence,
        'words': alternative.words,
    }

Resources

Deepgram Pre-recorded API
Deepgram Models
Speaker Diarization

Next Steps

Proceed to deepgram-core-workflow-b for real-time streaming transcription.

deepgram-core-workflow-a

Safety Notice

Copy this and send it to your AI assistant to learn

services/transcription.py

Source Transparency

Related Skills

backtesting-trading-strategies

svg-icon-generator

performance-lighthouse-runner

mindmap-generator