maui-speech-to-text

.NET MAUI Speech-to-Text Implementation

Add on-device speech recognition to any .NET MAUI app using CommunityToolkit.Maui.

Quick Start

Install Package

Look up the current version of CommunityToolkit.Maui on NuGet before adding:

Configure MauiProgram.cs

using CommunityToolkit.Maui; using CommunityToolkit.Maui.Media;

builder.UseMauiCommunityToolkit();

// Register services builder.Services.AddSingleton<ISpeechToText>(SpeechToText.Default); builder.Services.AddSingleton<ISpeechRecognitionService, SpeechRecognitionService>();

Platform Permissions

iOS (Info.plist):

<key>NSSpeechRecognitionUsageDescription</key> <string>App uses speech recognition for hands-free input.</string> <key>NSMicrophoneUsageDescription</key> <string>App needs microphone access to hear your voice.</string>

Android (AndroidManifest.xml):

<uses-permission android:name="android.permission.RECORD_AUDIO" />

Service Interface

public interface ISpeechRecognitionService { SpeechRecognitionState State { get; } event EventHandler<SpeechRecognitionState>? StateChanged; event EventHandler<string>? PartialResultReceived; Task<bool> IsAvailableAsync(); Task<bool> RequestPermissionsAsync(); Task<SpeechRecognitionResultDto> StartListeningAsync(CancellationToken cancellationToken = default); Task StopListeningAsync(); }

Supporting Types

public enum SpeechRecognitionState { Idle = 0, Listening = 1, Processing = 2, Error = 3 }

public record SpeechRecognitionResultDto { public bool Success { get; init; } public string? Transcript { get; init; } public double Confidence { get; init; } public string? ErrorMessage { get; init; } }

Service Implementation

using CommunityToolkit.Maui.Media; using Microsoft.Extensions.Logging; using System.Globalization;

public class SpeechRecognitionService : ISpeechRecognitionService { private readonly ISpeechToText _speechToText; private readonly ILogger<SpeechRecognitionService> _logger; private SpeechRecognitionState _state = SpeechRecognitionState.Idle; private CancellationTokenSource? _currentCts; private TaskCompletionSource<SpeechRecognitionResultDto>? _recognitionTcs;

public SpeechRecognitionState State
{
    get => _state;
    private set
    {
        if (_state != value)
        {
            _state = value;
            StateChanged?.Invoke(this, value);
        }
    }
}

public event EventHandler&#x3C;SpeechRecognitionState>? StateChanged;
public event EventHandler&#x3C;string>? PartialResultReceived;

public SpeechRecognitionService(ISpeechToText speechToText, ILogger&#x3C;SpeechRecognitionService> logger)
{
    _speechToText = speechToText;
    _logger = logger;
    _speechToText.RecognitionResultCompleted += OnRecognitionResultCompleted;
}

public Task&#x3C;bool> IsAvailableAsync() => Task.FromResult(true);

public async Task&#x3C;bool> RequestPermissionsAsync()
{
    try
    {
        // Check current status first
        var micStatus = await Permissions.CheckStatusAsync&#x3C;Permissions.Microphone>();
        var speechStatus = await Permissions.CheckStatusAsync&#x3C;Permissions.Speech>();

        // If not granted, request permissions from user
        if (micStatus != PermissionStatus.Granted)
            micStatus = await Permissions.RequestAsync&#x3C;Permissions.Microphone>();
        
        if (speechStatus != PermissionStatus.Granted)
            speechStatus = await Permissions.RequestAsync&#x3C;Permissions.Speech>();

        return micStatus == PermissionStatus.Granted &#x26;&#x26; speechStatus == PermissionStatus.Granted;
    }
    catch (Exception ex)
    {
        _logger.LogError(ex, "Error requesting speech recognition permissions");
        return false;
    }
}

public async Task&#x3C;SpeechRecognitionResultDto> StartListeningAsync(CancellationToken cancellationToken = default)
{
    if (State == SpeechRecognitionState.Listening)
    {
        return new SpeechRecognitionResultDto { Success = false, ErrorMessage = "Already listening" };
    }

    _currentCts = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken);
    _recognitionTcs = new TaskCompletionSource&#x3C;SpeechRecognitionResultDto>();
    State = SpeechRecognitionState.Listening;

    using var timeoutCts = new CancellationTokenSource(TimeSpan.FromSeconds(60));
    using var combinedCts = CancellationTokenSource.CreateLinkedTokenSource(_currentCts.Token, timeoutCts.Token);

    try
    {
        _speechToText.RecognitionResultUpdated += OnRecognitionResultUpdated;

        var options = new SpeechToTextOptions
        {
            Culture = CultureInfo.CurrentCulture,
            ShouldReportPartialResults = true
        };

        await _speechToText.StartListenAsync(options, combinedCts.Token);

        var timeoutTask = Task.Delay(TimeSpan.FromSeconds(60), combinedCts.Token);
        var completedTask = await Task.WhenAny(_recognitionTcs.Task, timeoutTask);

        if (completedTask == timeoutTask)
        {
            await StopListeningAsync();
            return new SpeechRecognitionResultDto { Success = false, ErrorMessage = "Listening timed out." };
        }

        return await _recognitionTcs.Task;
    }
    catch (OperationCanceledException)
    {
        State = SpeechRecognitionState.Idle;
        return new SpeechRecognitionResultDto { Success = false, ErrorMessage = "Cancelled" };
    }
    catch (Exception ex)
    {
        _logger.LogError(ex, "Error during speech recognition");
        State = SpeechRecognitionState.Error;
        return new SpeechRecognitionResultDto { Success = false, ErrorMessage = ex.Message };
    }
    finally
    {
        _speechToText.RecognitionResultUpdated -= OnRecognitionResultUpdated;
        _currentCts?.Dispose();
        _currentCts = null;
    }
}

public async Task StopListeningAsync()
{
    try { await _speechToText.StopListenAsync(CancellationToken.None); }
    catch (Exception ex) { _logger.LogWarning(ex, "Error stopping speech recognition"); }
    State = SpeechRecognitionState.Idle;
}

private void OnRecognitionResultUpdated(object? sender, SpeechToTextRecognitionResultUpdatedEventArgs args)
{
    PartialResultReceived?.Invoke(this, args.RecognitionResult);
}

private void OnRecognitionResultCompleted(object? sender, SpeechToTextRecognitionResultCompletedEventArgs args)
{
    State = SpeechRecognitionState.Processing;
    var result = args.RecognitionResult;

    if (result.IsSuccessful &#x26;&#x26; !string.IsNullOrEmpty(result.Text))
    {
        State = SpeechRecognitionState.Idle;
        _recognitionTcs?.TrySetResult(new SpeechRecognitionResultDto
        {
            Success = true,
            Transcript = result.Text,
            Confidence = 1.0
        });
    }
    else
    {
        State = SpeechRecognitionState.Error;
        _recognitionTcs?.TrySetResult(new SpeechRecognitionResultDto
        {
            Success = false,
            ErrorMessage = result.Exception?.Message ?? "No speech recognized"
        });
    }
}

}

UI Integration Patterns

The service works with any .NET MAUI UI approach. Below are patterns for each.

MVVM with XAML

ViewModel:

public partial class MyViewModel : ObservableObject { private readonly ISpeechRecognitionService _speechService; private CancellationTokenSource? _voiceCts;

[ObservableProperty] private bool _isRecording;
[ObservableProperty] private string _voiceTranscript = "";
[ObservableProperty] private SpeechRecognitionState _voiceState;

public MyViewModel(ISpeechRecognitionService speechService)
{
    _speechService = speechService;
}

[RelayCommand]
private async Task ToggleRecordingAsync()
{
    if (IsRecording)
        await StopRecordingAsync();
    else
        await StartRecordingAsync();
}

private async Task StartRecordingAsync()
{
    if (!await _speechService.RequestPermissionsAsync())
        return;

    _voiceCts = new CancellationTokenSource();
    IsRecording = true;
    VoiceTranscript = "";
    VoiceState = SpeechRecognitionState.Listening;
    
    _speechService.PartialResultReceived += OnPartialResult;
    _ = ListenLoopAsync();
}

private async Task ListenLoopAsync()
{
    try
    {
        while (IsRecording &#x26;&#x26; !(_voiceCts?.IsCancellationRequested ?? true))
        {
            var result = await _speechService.StartListeningAsync(_voiceCts?.Token ?? default);
            if (result.Success &#x26;&#x26; !string.IsNullOrWhiteSpace(result.Transcript))
                await ProcessTranscriptAsync(result.Transcript);
            if (IsRecording) await Task.Delay(100);
        }
    }
    catch (OperationCanceledException) { }
    finally
    {
        _speechService.PartialResultReceived -= OnPartialResult;
        IsRecording = false;
        VoiceState = SpeechRecognitionState.Idle;
    }
}

private async Task StopRecordingAsync()
{
    _voiceCts?.Cancel();
    await _speechService.StopListeningAsync();
    _speechService.PartialResultReceived -= OnPartialResult;
    IsRecording = false;
    VoiceState = SpeechRecognitionState.Idle;
}

private void OnPartialResult(object? sender, string text) => VoiceTranscript = text;
private Task ProcessTranscriptAsync(string transcript) => Task.CompletedTask; // Your logic

}

XAML:

MVVM with C# Markup

public class MyPage : ContentPage { public MyPage(MyViewModel vm) { BindingContext = vm; Content = new VerticalStackLayout { new Button() .Text("Record") .BindCommand(nameof(vm.ToggleRecordingCommand)), new Label() .Bind(Label.TextProperty, nameof(vm.VoiceTranscript)) }; } }

MauiReactor

partial class MyPage : Component<MyPageState> { [Inject] ISpeechRecognitionService _speechService; private CancellationTokenSource? _voiceCts;

public override VisualNode Render() => ContentPage(
    VStack(
        Button(State.IsRecording ? "Stop" : "Record")
            .OnClicked(ToggleRecordingAsync),
        Label(State.VoiceTranscript)
    )
);

private async void ToggleRecordingAsync()
{
    if (State.IsRecording) await StopRecordingAsync();
    else await StartRecordingAsync();
}

private async Task StartRecordingAsync()
{
    if (!await _speechService.RequestPermissionsAsync()) return;
    _voiceCts = new CancellationTokenSource();
    SetState(s => { s.IsRecording = true; s.VoiceTranscript = ""; });
    _speechService.PartialResultReceived += OnPartialResult;
    _ = ListenLoopAsync();
}

private async Task ListenLoopAsync()
{
    try
    {
        while (State.IsRecording &#x26;&#x26; !(_voiceCts?.IsCancellationRequested ?? true))
        {
            var result = await _speechService.StartListeningAsync(_voiceCts?.Token ?? default);
            if (result.Success &#x26;&#x26; !string.IsNullOrWhiteSpace(result.Transcript))
                await ProcessTranscriptAsync(result.Transcript);
            if (State.IsRecording) await Task.Delay(100);
        }
    }
    catch (OperationCanceledException) { }
    finally
    {
        _speechService.PartialResultReceived -= OnPartialResult;
        SetState(s => s.IsRecording = false);
    }
}

private async Task StopRecordingAsync()
{
    _voiceCts?.Cancel();
    await _speechService.StopListeningAsync();
    _speechService.PartialResultReceived -= OnPartialResult;
    SetState(s => s.IsRecording = false);
}

private void OnPartialResult(object? sender, string text) => SetState(s => s.VoiceTranscript = text);
private Task ProcessTranscriptAsync(string transcript) => Task.CompletedTask;

}

class MyPageState { public bool IsRecording; public string VoiceTranscript = ""; }

Key Implementation Notes

Permission Handling: Always call RequestPermissionsAsync() before starting speech recognition. It checks status first, then prompts the user if not already granted.

60-Second Timeout: Built-in safety timeout prevents indefinite listening sessions.

Partial Results: Subscribe to PartialResultReceived for live transcription feedback during speech.

Continuous Listening: Loop StartListeningAsync with small delays for continuous conversation mode.

Cancellation: Always use CancellationTokenSource for clean shutdown and proper resource cleanup.

Natural Language Output: CommunityToolkit.Maui's ISpeechToText returns normalized, punctuated text—not raw phonemes or garbled noise.

UI Agnostic: The ISpeechRecognitionService interface works identically regardless of UI framework (XAML, C# Markup, MauiReactor).

maui-speech-to-text

Safety Notice

Copy this and send it to your AI assistant to learn

Source Transparency

Related Skills

maui-performance

maui-data-binding

maui-rest-api

maui-shell-navigation