maui-speech-to-text

.NET MAUI Speech-to-Text Implementation

Safety Notice

This listing is imported from skills.sh public index metadata. Review upstream SKILL.md and repository scripts before running.

Copy this and send it to your AI assistant to learn

Install skill "maui-speech-to-text" with this command: npx skills add davidortinau/maui-skills/davidortinau-maui-skills-maui-speech-to-text

.NET MAUI Speech-to-Text Implementation

Add on-device speech recognition to any .NET MAUI app using CommunityToolkit.Maui.

Quick Start

  1. Install Package

Look up the current version of CommunityToolkit.Maui on NuGet before adding:

<PackageReference Include="CommunityToolkit.Maui" Version="[CURRENT_VERSION]" />

  1. Configure MauiProgram.cs

using CommunityToolkit.Maui; using CommunityToolkit.Maui.Media;

builder.UseMauiCommunityToolkit();

// Register services builder.Services.AddSingleton<ISpeechToText>(SpeechToText.Default); builder.Services.AddSingleton<ISpeechRecognitionService, SpeechRecognitionService>();

  1. Platform Permissions

iOS (Info.plist):

<key>NSSpeechRecognitionUsageDescription</key> <string>App uses speech recognition for hands-free input.</string> <key>NSMicrophoneUsageDescription</key> <string>App needs microphone access to hear your voice.</string>

Android (AndroidManifest.xml):

<uses-permission android:name="android.permission.RECORD_AUDIO" />

Service Interface

public interface ISpeechRecognitionService { SpeechRecognitionState State { get; } event EventHandler<SpeechRecognitionState>? StateChanged; event EventHandler<string>? PartialResultReceived; Task<bool> IsAvailableAsync(); Task<bool> RequestPermissionsAsync(); Task<SpeechRecognitionResultDto> StartListeningAsync(CancellationToken cancellationToken = default); Task StopListeningAsync(); }

Supporting Types

public enum SpeechRecognitionState { Idle = 0, Listening = 1, Processing = 2, Error = 3 }

public record SpeechRecognitionResultDto { public bool Success { get; init; } public string? Transcript { get; init; } public double Confidence { get; init; } public string? ErrorMessage { get; init; } }

Service Implementation

using CommunityToolkit.Maui.Media; using Microsoft.Extensions.Logging; using System.Globalization;

public class SpeechRecognitionService : ISpeechRecognitionService { private readonly ISpeechToText _speechToText; private readonly ILogger<SpeechRecognitionService> _logger; private SpeechRecognitionState _state = SpeechRecognitionState.Idle; private CancellationTokenSource? _currentCts; private TaskCompletionSource<SpeechRecognitionResultDto>? _recognitionTcs;

public SpeechRecognitionState State
{
    get => _state;
    private set
    {
        if (_state != value)
        {
            _state = value;
            StateChanged?.Invoke(this, value);
        }
    }
}

public event EventHandler&#x3C;SpeechRecognitionState>? StateChanged;
public event EventHandler&#x3C;string>? PartialResultReceived;

public SpeechRecognitionService(ISpeechToText speechToText, ILogger&#x3C;SpeechRecognitionService> logger)
{
    _speechToText = speechToText;
    _logger = logger;
    _speechToText.RecognitionResultCompleted += OnRecognitionResultCompleted;
}

public Task&#x3C;bool> IsAvailableAsync() => Task.FromResult(true);

public async Task&#x3C;bool> RequestPermissionsAsync()
{
    try
    {
        // Check current status first
        var micStatus = await Permissions.CheckStatusAsync&#x3C;Permissions.Microphone>();
        var speechStatus = await Permissions.CheckStatusAsync&#x3C;Permissions.Speech>();

        // If not granted, request permissions from user
        if (micStatus != PermissionStatus.Granted)
            micStatus = await Permissions.RequestAsync&#x3C;Permissions.Microphone>();
        
        if (speechStatus != PermissionStatus.Granted)
            speechStatus = await Permissions.RequestAsync&#x3C;Permissions.Speech>();

        return micStatus == PermissionStatus.Granted &#x26;&#x26; speechStatus == PermissionStatus.Granted;
    }
    catch (Exception ex)
    {
        _logger.LogError(ex, "Error requesting speech recognition permissions");
        return false;
    }
}

public async Task&#x3C;SpeechRecognitionResultDto> StartListeningAsync(CancellationToken cancellationToken = default)
{
    if (State == SpeechRecognitionState.Listening)
    {
        return new SpeechRecognitionResultDto { Success = false, ErrorMessage = "Already listening" };
    }

    _currentCts = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken);
    _recognitionTcs = new TaskCompletionSource&#x3C;SpeechRecognitionResultDto>();
    State = SpeechRecognitionState.Listening;

    using var timeoutCts = new CancellationTokenSource(TimeSpan.FromSeconds(60));
    using var combinedCts = CancellationTokenSource.CreateLinkedTokenSource(_currentCts.Token, timeoutCts.Token);

    try
    {
        _speechToText.RecognitionResultUpdated += OnRecognitionResultUpdated;

        var options = new SpeechToTextOptions
        {
            Culture = CultureInfo.CurrentCulture,
            ShouldReportPartialResults = true
        };

        await _speechToText.StartListenAsync(options, combinedCts.Token);

        var timeoutTask = Task.Delay(TimeSpan.FromSeconds(60), combinedCts.Token);
        var completedTask = await Task.WhenAny(_recognitionTcs.Task, timeoutTask);

        if (completedTask == timeoutTask)
        {
            await StopListeningAsync();
            return new SpeechRecognitionResultDto { Success = false, ErrorMessage = "Listening timed out." };
        }

        return await _recognitionTcs.Task;
    }
    catch (OperationCanceledException)
    {
        State = SpeechRecognitionState.Idle;
        return new SpeechRecognitionResultDto { Success = false, ErrorMessage = "Cancelled" };
    }
    catch (Exception ex)
    {
        _logger.LogError(ex, "Error during speech recognition");
        State = SpeechRecognitionState.Error;
        return new SpeechRecognitionResultDto { Success = false, ErrorMessage = ex.Message };
    }
    finally
    {
        _speechToText.RecognitionResultUpdated -= OnRecognitionResultUpdated;
        _currentCts?.Dispose();
        _currentCts = null;
    }
}

public async Task StopListeningAsync()
{
    try { await _speechToText.StopListenAsync(CancellationToken.None); }
    catch (Exception ex) { _logger.LogWarning(ex, "Error stopping speech recognition"); }
    State = SpeechRecognitionState.Idle;
}

private void OnRecognitionResultUpdated(object? sender, SpeechToTextRecognitionResultUpdatedEventArgs args)
{
    PartialResultReceived?.Invoke(this, args.RecognitionResult);
}

private void OnRecognitionResultCompleted(object? sender, SpeechToTextRecognitionResultCompletedEventArgs args)
{
    State = SpeechRecognitionState.Processing;
    var result = args.RecognitionResult;

    if (result.IsSuccessful &#x26;&#x26; !string.IsNullOrEmpty(result.Text))
    {
        State = SpeechRecognitionState.Idle;
        _recognitionTcs?.TrySetResult(new SpeechRecognitionResultDto
        {
            Success = true,
            Transcript = result.Text,
            Confidence = 1.0
        });
    }
    else
    {
        State = SpeechRecognitionState.Error;
        _recognitionTcs?.TrySetResult(new SpeechRecognitionResultDto
        {
            Success = false,
            ErrorMessage = result.Exception?.Message ?? "No speech recognized"
        });
    }
}

}

UI Integration Patterns

The service works with any .NET MAUI UI approach. Below are patterns for each.

MVVM with XAML

ViewModel:

public partial class MyViewModel : ObservableObject { private readonly ISpeechRecognitionService _speechService; private CancellationTokenSource? _voiceCts;

[ObservableProperty] private bool _isRecording;
[ObservableProperty] private string _voiceTranscript = "";
[ObservableProperty] private SpeechRecognitionState _voiceState;

public MyViewModel(ISpeechRecognitionService speechService)
{
    _speechService = speechService;
}

[RelayCommand]
private async Task ToggleRecordingAsync()
{
    if (IsRecording)
        await StopRecordingAsync();
    else
        await StartRecordingAsync();
}

private async Task StartRecordingAsync()
{
    if (!await _speechService.RequestPermissionsAsync())
        return;

    _voiceCts = new CancellationTokenSource();
    IsRecording = true;
    VoiceTranscript = "";
    VoiceState = SpeechRecognitionState.Listening;
    
    _speechService.PartialResultReceived += OnPartialResult;
    _ = ListenLoopAsync();
}

private async Task ListenLoopAsync()
{
    try
    {
        while (IsRecording &#x26;&#x26; !(_voiceCts?.IsCancellationRequested ?? true))
        {
            var result = await _speechService.StartListeningAsync(_voiceCts?.Token ?? default);
            if (result.Success &#x26;&#x26; !string.IsNullOrWhiteSpace(result.Transcript))
                await ProcessTranscriptAsync(result.Transcript);
            if (IsRecording) await Task.Delay(100);
        }
    }
    catch (OperationCanceledException) { }
    finally
    {
        _speechService.PartialResultReceived -= OnPartialResult;
        IsRecording = false;
        VoiceState = SpeechRecognitionState.Idle;
    }
}

private async Task StopRecordingAsync()
{
    _voiceCts?.Cancel();
    await _speechService.StopListeningAsync();
    _speechService.PartialResultReceived -= OnPartialResult;
    IsRecording = false;
    VoiceState = SpeechRecognitionState.Idle;
}

private void OnPartialResult(object? sender, string text) => VoiceTranscript = text;
private Task ProcessTranscriptAsync(string transcript) => Task.CompletedTask; // Your logic

}

XAML:

<Button Text="{Binding IsRecording, Converter={StaticResource BoolToRecordText}}" Command="{Binding ToggleRecordingCommand}" /> <Label Text="{Binding VoiceTranscript}" />

MVVM with C# Markup

public class MyPage : ContentPage { public MyPage(MyViewModel vm) { BindingContext = vm; Content = new VerticalStackLayout { new Button() .Text("Record") .BindCommand(nameof(vm.ToggleRecordingCommand)), new Label() .Bind(Label.TextProperty, nameof(vm.VoiceTranscript)) }; } }

MauiReactor

partial class MyPage : Component<MyPageState> { [Inject] ISpeechRecognitionService _speechService; private CancellationTokenSource? _voiceCts;

public override VisualNode Render() => ContentPage(
    VStack(
        Button(State.IsRecording ? "Stop" : "Record")
            .OnClicked(ToggleRecordingAsync),
        Label(State.VoiceTranscript)
    )
);

private async void ToggleRecordingAsync()
{
    if (State.IsRecording) await StopRecordingAsync();
    else await StartRecordingAsync();
}

private async Task StartRecordingAsync()
{
    if (!await _speechService.RequestPermissionsAsync()) return;
    _voiceCts = new CancellationTokenSource();
    SetState(s => { s.IsRecording = true; s.VoiceTranscript = ""; });
    _speechService.PartialResultReceived += OnPartialResult;
    _ = ListenLoopAsync();
}

private async Task ListenLoopAsync()
{
    try
    {
        while (State.IsRecording &#x26;&#x26; !(_voiceCts?.IsCancellationRequested ?? true))
        {
            var result = await _speechService.StartListeningAsync(_voiceCts?.Token ?? default);
            if (result.Success &#x26;&#x26; !string.IsNullOrWhiteSpace(result.Transcript))
                await ProcessTranscriptAsync(result.Transcript);
            if (State.IsRecording) await Task.Delay(100);
        }
    }
    catch (OperationCanceledException) { }
    finally
    {
        _speechService.PartialResultReceived -= OnPartialResult;
        SetState(s => s.IsRecording = false);
    }
}

private async Task StopRecordingAsync()
{
    _voiceCts?.Cancel();
    await _speechService.StopListeningAsync();
    _speechService.PartialResultReceived -= OnPartialResult;
    SetState(s => s.IsRecording = false);
}

private void OnPartialResult(object? sender, string text) => SetState(s => s.VoiceTranscript = text);
private Task ProcessTranscriptAsync(string transcript) => Task.CompletedTask;

}

class MyPageState { public bool IsRecording; public string VoiceTranscript = ""; }

Key Implementation Notes

Permission Handling: Always call RequestPermissionsAsync() before starting speech recognition. It checks status first, then prompts the user if not already granted.

60-Second Timeout: Built-in safety timeout prevents indefinite listening sessions.

Partial Results: Subscribe to PartialResultReceived for live transcription feedback during speech.

Continuous Listening: Loop StartListeningAsync with small delays for continuous conversation mode.

Cancellation: Always use CancellationTokenSource for clean shutdown and proper resource cleanup.

Natural Language Output: CommunityToolkit.Maui's ISpeechToText returns normalized, punctuated text—not raw phonemes or garbled noise.

UI Agnostic: The ISpeechRecognitionService interface works identically regardless of UI framework (XAML, C# Markup, MauiReactor).

Source Transparency

This detail page is rendered from real SKILL.md content. Trust labels are metadata-based hints, not a safety guarantee.

Related Skills

Related by shared tags or category signals.

General

maui-performance

No summary provided by upstream source.

Repository SourceNeeds Review
General

maui-data-binding

No summary provided by upstream source.

Repository SourceNeeds Review
General

maui-rest-api

No summary provided by upstream source.

Repository SourceNeeds Review
General

maui-shell-navigation

No summary provided by upstream source.

Repository SourceNeeds Review