pax_global_header00006660000000000000000000000064146554770620014532gustar00rootroot0000000000000052 comment=bd57cc6039d16a55fc047c237d5bdd28dcd4898d wyoming-1.6.0/000077500000000000000000000000001465547706200132275ustar00rootroot00000000000000wyoming-1.6.0/.github/000077500000000000000000000000001465547706200145675ustar00rootroot00000000000000wyoming-1.6.0/.github/workflows/000077500000000000000000000000001465547706200166245ustar00rootroot00000000000000wyoming-1.6.0/.github/workflows/test.yml000066400000000000000000000011411465547706200203230ustar00rootroot00000000000000--- name: test # yamllint disable-line rule:truthy on: push: branches: [master] pull_request: permissions: contents: read jobs: test_linux: name: "test on linux" runs-on: ubuntu-latest strategy: matrix: python_version: ["3.8", "3.9", "3.10", "3.11"] steps: - uses: actions/checkout@v4.1.1 - uses: actions/setup-python@v5 with: python-version: "${{ matrix.python_version }}" cache: "pip" cache-dependency-path: requirements_dev.txt - run: script/setup --dev - run: script/lint - run: script/test wyoming-1.6.0/.gitignore000066400000000000000000000002041465547706200152130ustar00rootroot00000000000000.DS_Store .idea *.log tmp/ *.py[cod] *.egg /build htmlcov .projectile .venv/ venv/ .mypy_cache/ *.egg-info/ .tox/ /local/ /dist/ wyoming-1.6.0/.isort.cfg000066400000000000000000000001611465547706200151240ustar00rootroot00000000000000[settings] multi_line_output=3 include_trailing_comma=True force_grid_wrap=0 use_parentheses=True line_length=88 wyoming-1.6.0/CHANGELOG.md000066400000000000000000000014721465547706200150440ustar00rootroot00000000000000# Changelog ## 1.5.4 - Add support for voice timers - `timer-started` - `timer-updated` - `timer-cancelled` - `timer-finished` - Add `speaker` field to `detect` event - Refactor HTTP servers ## 1.5.3 - Add `phrase` to wake word model info - Add tests to CI ## 1.5.2 - Fix missing VERSION file ## 1.5.1 - Add `version` to info artifacts - Use Python package version in Wyoming JSON header - Add `pause-satellite` message ## 1.5.0 - Add `ping` and `pong` messages - Add `satellite-connected` and `satellite-disconnected` messages ## 1.4.2 - Add `streaming-started` and `streaming-stopped` ## 1.3.0 - Add `intent` and `satellite` to info message - Add optional `text` response to `Intent` message - Add `context` to intent/handle events - Use internal `audioop` replacement - Add zeroconf discovery wyoming-1.6.0/LICENSE.md000066400000000000000000000020571465547706200146370ustar00rootroot00000000000000MIT License Copyright (c) 2023 Michael Hansen Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. wyoming-1.6.0/README.md000066400000000000000000000353621465547706200145170ustar00rootroot00000000000000# Wyoming Protocol A peer-to-peer protocol for voice assistants (basically [JSONL](https://jsonlines.org/) + PCM audio) ``` text { "type": "...", "data": { ... }, "data_length": ..., "payload_length": ... }\n ``` Used in [Rhasspy](https://github.com/rhasspy/rhasspy3/) and [Home Assistant](https://www.home-assistant.io/integrations/wyoming) for communication with voice services. This is an open standard of the [Open Home Foundation](https://www.openhomefoundation.org/). ## Wyoming Projects * Voice satellites * [Satellite](https://github.com/rhasspy/wyoming-satellite) for Home Assistant * Audio input/output * [mic-external](https://github.com/rhasspy/wyoming-mic-external) * [snd-external](https://github.com/rhasspy/wyoming-snd-external) * [SDL2](https://github.com/rhasspy/wyoming-sdl2) * Wake word detection * [openWakeWord](https://github.com/rhasspy/wyoming-openwakeword) * [porcupine1](https://github.com/rhasspy/wyoming-porcupine1) * [snowboy](https://github.com/rhasspy/wyoming-snowboy) * Speech-to-text * [Faster Whisper](https://github.com/rhasspy/wyoming-faster-whisper) * [Vosk](https://github.com/rhasspy/wyoming-vosk) * [Whisper.cpp](https://github.com/rhasspy/wyoming-whisper-cpp) * Text-to-speech * [Piper](https://github.com/rhasspy/wyoming-piper) * Intent handling * [handle-external](https://github.com/rhasspy/wyoming-handle-external) ## Format 1. A JSON object header as a single line with `\n` (UTF-8, required) * `type` - event type (string, required) * `data` - event data (object, optional) * `data_length` - bytes of additional data (int, optional) * `payload_length` - bytes of binary payload (int, optional) 2. Additional data (UTF-8, optional) * JSON object with additional event-specific data * Merged on top of header `data` * Exactly `data_length` bytes long * Immediately follows header `\n` 3. Payload * Typically PCM audio but can be any binary data * Exactly `payload_length` bytes long * Immediately follows additional data or header `\n` if no additional data ## Event Types Available events with `type` and fields. ### Audio Send raw audio and indicate begin/end of audio streams. * `audio-chunk` - chunk of raw PCM audio * `rate` - sample rate in hertz (int, required) * `width` - sample width in bytes (int, required) * `channels` - number of channels (int, required) * `timestamp` - timestamp of audio chunk in milliseconds (int, optional) * Payload is raw PCM audio samples * `audio-start` - start of an audio stream * `rate` - sample rate in hertz (int, required) * `width` - sample width in bytes (int, required) * `channels` - number of channels (int, required) * `timestamp` - timestamp in milliseconds (int, optional) * `audio-stop` - end of an audio stream * `timestamp` - timestamp in milliseconds (int, optional) ### Info Describe available services. * `describe` - request for available voice services * `info` - response describing available voice services * `asr` - list speech recognition services (optional) * `models` - list of available models (required) * `name` - unique name (required) * `languages` - supported languages by model (list of string, required) * `attribution` (required) * `name` - name of creator (required) * `url` - URL of creator (required) * `installed` - true if currently installed (bool, required) * `description` - human-readable description (string, optional) * `version` - version of the model (string, optional) * `tts` - list text to speech services (optional) * `models` - list of available models * `name` - unique name (required) * `languages` - supported languages by model (list of string, required) * `speakers` - list of speakers (optional) * `name` - unique name of speaker (required) * `attribution` (required) * `name` - name of creator (required) * `url` - URL of creator (required) * `installed` - true if currently installed (bool, required) * `description` - human-readable description (string, optional) * `version` - version of the model (string, optional) * `wake` - list wake word detection services( optional ) * `models` - list of available models (required) * `name` - unique name (required) * `languages` - supported languages by model (list of string, required) * `attribution` (required) * `name` - name of creator (required) * `url` - URL of creator (required) * `installed` - true if currently installed (bool, required) * `description` - human-readable description (string, optional) * `version` - version of the model (string, optional) * `handle` - list intent handling services (optional) * `models` - list of available models (required) * `name` - unique name (required) * `languages` - supported languages by model (list of string, required) * `attribution` (required) * `name` - name of creator (required) * `url` - URL of creator (required) * `installed` - true if currently installed (bool, required) * `description` - human-readable description (string, optional) * `version` - version of the model (string, optional) * `intent` - list intent recognition services (optional) * `models` - list of available models (required) * `name` - unique name (required) * `languages` - supported languages by model (list of string, required) * `attribution` (required) * `name` - name of creator (required) * `url` - URL of creator (required) * `installed` - true if currently installed (bool, required) * `description` - human-readable description (string, optional) * `version` - version of the model (string, optional) * `satellite` - information about voice satellite (optional) * `area` - name of area where satellite is located (string, optional) * `has_vad` - true if the end of voice commands will be detected locally (boolean, optional) * `active_wake_words` - list of wake words that are actively being listend for (list of string, optional) * `max_active_wake_words` - maximum number of local wake words that can be run simultaneously (number, optional) * `supports_trigger` - true if satellite supports remotely-triggered pipelines * `mic` - list of audio input services (optional) * `mic_format` - audio input format (required) * `rate` - sample rate in hertz (int, required) * `width` - sample width in bytes (int, required) * `channels` - number of channels (int, required) * `snd` - list of audio output services (optional) * `snd_format` - audio output format (required) * `rate` - sample rate in hertz (int, required) * `width` - sample width in bytes (int, required) * `channels` - number of channels (int, required) ### Speech Recognition Transcribe audio into text. * `transcribe` - request to transcribe an audio stream * `name` - name of model to use (string, optional) * `language` - language of spoken audio (string, optional) * `context` - context from previous interactions (object, optional) * `transcript` - response with transcription * `text` - text transcription of spoken audio (string, required) * `context` - context for next interaction (object, optional) ### Text to Speech Synthesize audio from text. * `synthesize` - request to generate audio from text * `text` - text to speak (string, required) * `voice` - use a specific voice (optional) * `name` - name of voice (string, optional) * `language` - language of voice (string, optional) * `speaker` - speaker of voice (string, optional) ### Wake Word Detect wake words in an audio stream. * `detect` - request detection of specific wake word(s) * `names` - wake word names to detect (list of string, optional) * `detection` - response when detection occurs * `name` - name of wake word that was detected (int, optional) * `timestamp` - timestamp of audio chunk in milliseconds when detection occurred (int optional) * `not-detected` - response when audio stream ends without a detection ### Voice Activity Detection Detects speech and silence in an audio stream. * `voice-started` - user has started speaking * `timestamp` - timestamp of audio chunk when speaking started in milliseconds (int, optional) * `voice-stopped` - user has stopped speaking * `timestamp` - timestamp of audio chunk when speaking stopped in milliseconds (int, optional) ### Intent Recognition Recognizes intents from text. * `recognize` - request to recognize an intent from text * `text` - text to recognize (string, required) * `context` - context from previous interactions (object, optional) * `intent` - response with recognized intent * `name` - name of intent (string, required) * `entities` - list of entities (optional) * `name` - name of entity (string, required) * `value` - value of entity (any, optional) * `text` - response for user (string, optional) * `context` - context for next interactions (object, optional) * `not-recognized` - response indicating no intent was recognized * `text` - response for user (string, optional) * `context` - context for next interactions (object, optional) ### Intent Handling Handle structured intents or text directly. * `handled` - response when intent was successfully handled * `text` - response for user (string, optional) * `context` - context for next interactions (object, optional) * `not-handled` - response when intent was not handled * `text` - response for user (string, optional) * `context` - context for next interactions (object, optional) ### Audio Output Play audio stream. * `played` - response when audio finishes playing ### Voice Satellite Control of one or more remote voice satellites connected to a central server. * `run-satellite` - informs satellite that server is ready to run pipelines * `pause-satellite` - informs satellite that server is not ready anymore to run pipelines * `satellite-connected` - satellite has connected to the server * `satellite-disconnected` - satellite has been disconnected from the server * `streaming-started` - satellite has started streaming audio to the server * `streaming-stopped` - satellite has stopped streaming audio to the server Pipelines are run on the server, but can be triggered remotely from the server as well. * `run-pipeline` - runs a pipeline on the server or asks the satellite to run it when possible * `start_stage` - pipeline stage to start at (string, required) * `end_stage` - pipeline stage to end at (string, required) * `wake_word_name` - name of detected wake word that started this pipeline (string, optional) * From client only * `wake_word_names` - names of wake words to listen for (list of string, optional) * From server only * `start_stage` must be "wake" * `announce_text` - text to speak on the satellite * From server only * `start_stage` must be "tts" * `restart_on_end` - true if the server should re-run the pipeline after it ends (boolean, default is false) * Only used for always-on streaming satellites ### Timers * `timer-started` - a new timer has started * `id` - unique id of timer (string, required) * `total_seconds` - number of seconds the timer should run for (int, required) * `name` - user-provided name for timer (string, optional) * `start_hours` - hours the timer should run for as spoken by user (int, optional) * `start_minutes` - minutes the timer should run for as spoken by user (int, optional) * `start_seconds` - seconds the timer should run for as spoken by user (int, optional) * `command` - optional command that the server will execute when the timer is finished * `text` - text of command to execute (string, required) * `language` - language of the command (string, optional) * `timer-updated` - timer has been paused/resumed or time has been added/removed * `id` - unique id of timer (string, required) * `is_active` - true if timer is running, false if paused (bool, required) * `total_seconds` - number of seconds that the timer should run for now (int, required) * `timer-cancelled` - timer was cancelled * `id` - unique id of timer (string, required) * `timer-finished` - timer finished without being cancelled * `id` - unique id of timer (string, required) ## Event Flow * → is an event from client to server * ← is an event from server to client ### Service Description 1. → `describe` (required) 2. ← `info` (required) ### Speech to Text 1. → `transcribe` event with `name` of model to use or `language` (optional) 2. → `audio-start` (required) 3. → `audio-chunk` (required) * Send audio chunks until silence is detected 4. → `audio-stop` (required) 5. ← `transcript` * Contains text transcription of spoken audio ### Text to Speech 1. → `synthesize` event with `text` (required) 2. ← `audio-start` 3. ← `audio-chunk` * One or more audio chunks 4. ← `audio-stop` ### Wake Word Detection 1. → `detect` event with `names` of wake words to detect (optional) 2. → `audio-start` (required) 3. → `audio-chunk` (required) * Keep sending audio chunks until a `detection` is received 4. ← `detection` * Sent for each wake word detection 5. → `audio-stop` (optional) * Manually end audio stream 6. ← `not-detected` * Sent after `audio-stop` if no detections occurred ### Voice Activity Detection 1. → `audio-chunk` (required) * Send audio chunks until silence is detected 2. ← `voice-started` * When speech starts 3. ← `voice-stopped` * When speech stops ### Intent Recognition 1. → `recognize` (required) 2. ← `intent` if successful 3. ← `not-recognized` if not successful ### Intent Handling For structured intents: 1. → `intent` (required) 2. ← `handled` if successful 3. ← `not-handled` if not successful For text only: 1. → `transcript` with `text` to handle (required) 2. ← `handled` if successful 3. ← `not-handled` if not successful ### Audio Output 1. → `audio-start` (required) 2. → `audio-chunk` (required) * One or more audio chunks 3. → `audio-stop` (required) 4. ← `played` wyoming-1.6.0/mypy.ini000066400000000000000000000001301465547706200147200ustar00rootroot00000000000000[mypy] ignore_missing_imports = true [mypy-setuptools.*] ignore_missing_imports = True wyoming-1.6.0/pylintrc000066400000000000000000000014151465547706200150170ustar00rootroot00000000000000[MESSAGES CONTROL] disable= format, abstract-method, cyclic-import, duplicate-code, global-statement, import-outside-toplevel, inconsistent-return-statements, locally-disabled, not-context-manager, too-few-public-methods, too-many-arguments, too-many-branches, too-many-instance-attributes, too-many-lines, too-many-locals, too-many-public-methods, too-many-return-statements, too-many-statements, too-many-boolean-expressions, unnecessary-pass, unused-argument, broad-except, too-many-nested-blocks, invalid-name, unused-import, fixme, useless-super-delegation, missing-module-docstring, missing-class-docstring, missing-function-docstring, import-error, consider-using-with [FORMAT] expected-line-ending-format=LF wyoming-1.6.0/requirements_dev.txt000066400000000000000000000001671465547706200173550ustar00rootroot00000000000000black==22.12.0 flake8==6.0.0 isort==5.11.3 mypy==0.991 pylint==2.15.9 pytest==7.4.3 pytest-asyncio==0.23.3 tox==4.13.0 wyoming-1.6.0/script/000077500000000000000000000000001465547706200145335ustar00rootroot00000000000000wyoming-1.6.0/script/format000077500000000000000000000007621465547706200157560ustar00rootroot00000000000000#!/usr/bin/env python3 import subprocess import venv from pathlib import Path _DIR = Path(__file__).parent _PROGRAM_DIR = _DIR.parent _VENV_DIR = _PROGRAM_DIR / ".venv" _MODULE_DIR = _PROGRAM_DIR / "wyoming" _TESTS_DIR = _PROGRAM_DIR / "tests" context = venv.EnvBuilder().ensure_directories(_VENV_DIR) subprocess.check_call( [context.env_exe, "-m", "black", str(_MODULE_DIR), str(_TESTS_DIR)] ) subprocess.check_call( [context.env_exe, "-m", "isort", str(_MODULE_DIR), str(_TESTS_DIR)] ) wyoming-1.6.0/script/lint000077500000000000000000000014541465547706200154330ustar00rootroot00000000000000#!/usr/bin/env python3 import subprocess import venv from pathlib import Path _DIR = Path(__file__).parent _PROGRAM_DIR = _DIR.parent _VENV_DIR = _PROGRAM_DIR / ".venv" _MODULE_DIR = _PROGRAM_DIR / "wyoming" _TESTS_DIR = _PROGRAM_DIR / "tests" context = venv.EnvBuilder().ensure_directories(_VENV_DIR) subprocess.check_call( [context.env_exe, "-m", "black", str(_MODULE_DIR), str(_TESTS_DIR), "--check"] ) subprocess.check_call( [context.env_exe, "-m", "isort", str(_MODULE_DIR), str(_TESTS_DIR), "--check"] ) subprocess.check_call( [context.env_exe, "-m", "flake8", str(_MODULE_DIR), str(_TESTS_DIR)] ) subprocess.check_call( [context.env_exe, "-m", "pylint", str(_MODULE_DIR), str(_TESTS_DIR)] ) subprocess.check_call( [context.env_exe, "-m", "mypy", str(_MODULE_DIR), str(_TESTS_DIR)] ) wyoming-1.6.0/script/package000077500000000000000000000005071465547706200160560ustar00rootroot00000000000000#!/usr/bin/env python3 import subprocess import venv from pathlib import Path _DIR = Path(__file__).parent _PROGRAM_DIR = _DIR.parent _VENV_DIR = _PROGRAM_DIR / ".venv" context = venv.EnvBuilder().ensure_directories(_VENV_DIR) subprocess.check_call( [context.env_exe, _PROGRAM_DIR / "setup.py", "bdist_wheel", "sdist"] ) wyoming-1.6.0/script/setup000077500000000000000000000011661465547706200156250ustar00rootroot00000000000000#!/usr/bin/env python3 import subprocess import venv from pathlib import Path _DIR = Path(__file__).parent _PROGRAM_DIR = _DIR.parent _VENV_DIR = _PROGRAM_DIR / ".venv" # Create virtual environment builder = venv.EnvBuilder(with_pip=True) context = builder.ensure_directories(_VENV_DIR) builder.create(_VENV_DIR) # Upgrade dependencies pip = [context.env_exe, "-m", "pip"] subprocess.check_call(pip + ["install", "--upgrade", "pip"]) subprocess.check_call(pip + ["install", "--upgrade", "setuptools", "wheel"]) # Install requirements subprocess.check_call(pip + ["install", "-r", str(_PROGRAM_DIR / "requirements_dev.txt")]) wyoming-1.6.0/script/test000077500000000000000000000005461465547706200154450ustar00rootroot00000000000000#!/usr/bin/env python3 import subprocess import sys import venv from pathlib import Path _DIR = Path(__file__).parent _PROGRAM_DIR = _DIR.parent _VENV_DIR = _PROGRAM_DIR / ".venv" _TEST_DIR = _PROGRAM_DIR / "tests" context = venv.EnvBuilder().ensure_directories(_VENV_DIR) subprocess.check_call([context.env_exe, "-m", "pytest", _TEST_DIR] + sys.argv[1:]) wyoming-1.6.0/setup.cfg000066400000000000000000000010431465547706200150460ustar00rootroot00000000000000[flake8] # To work with Black max-line-length = 88 # E501: line too long # W503: Line break occurred before a binary operator # E203: Whitespace before ':' # D202 No blank lines allowed after function docstring # W504 line break after binary operator ignore = E501, W503, E203, D202, W504 # F401 import unused per-file-ignores = programs/asr/faster-whisper/src/faster_whisper/__init__.py:F401 [isort] multi_line_output = 3 include_trailing_comma=True force_grid_wrap=0 use_parentheses=True line_length=88 indent = " " wyoming-1.6.0/setup.py000066400000000000000000000024511465547706200147430ustar00rootroot00000000000000#!/usr/bin/env python3 from pathlib import Path from setuptools import setup this_dir = Path(__file__).parent module_dir = this_dir / "wyoming" version_path = module_dir / "VERSION" version = version_path.read_text(encoding="utf-8").strip() # ----------------------------------------------------------------------------- setup( name="wyoming", version=version, description="Protocol for Rhasspy Voice Assistant", url="http://github.com/rhasspy/wyoming", author="Michael Hansen", author_email="mike@rhasspy.org", license="MIT", packages=["wyoming", "wyoming.util"], package_data={"wyoming": [str(p.relative_to(module_dir)) for p in (version_path,)]}, classifiers=[ "Development Status :: 3 - Alpha", "Intended Audience :: Developers", "Topic :: Text Processing :: Linguistic", "License :: OSI Approved :: MIT License", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", ], keywords="voice assistant rhasspy", extras_require={ "zeroconf": ["zeroconf==0.88.0"], "http": ["Flask==3.0.2", "swagger-ui-py==23.9.23"], }, ) wyoming-1.6.0/tests/000077500000000000000000000000001465547706200143715ustar00rootroot00000000000000wyoming-1.6.0/tests/test_audio.py000066400000000000000000000030101465547706200170750ustar00rootroot00000000000000"""Test audio utilities.""" import io import wave from wyoming.audio import AudioChunk, AudioChunkConverter, wav_to_chunks def test_chunk_converter() -> None: """Test audio chunk converter.""" converter = AudioChunkConverter(rate=16000, width=2, channels=1) input_chunk = AudioChunk( rate=48000, width=4, channels=2, audio=bytes(1 * 48000 * 4 * 2), # 1 sec ) output_chunk = converter.convert(input_chunk) assert output_chunk.rate == 16000 assert output_chunk.width == 2 assert output_chunk.channels == 1 assert len(output_chunk.audio) == 1 * 16000 * 2 * 1 # 1 sec def test_wav_to_chunks() -> None: """Test WAV file to audio chunks.""" with io.BytesIO() as wav_io: wav_write: wave.Wave_write = wave.open(wav_io, "wb") with wav_write: wav_write.setframerate(16000) wav_write.setsampwidth(2) wav_write.setnchannels(1) wav_write.writeframes(bytes(1 * 16000 * 2 * 1)) # 1 sec wav_io.seek(0) wav_bytes = wav_io.getvalue() with io.BytesIO(wav_bytes) as wav_io: wav_read: wave.Wave_read = wave.open(wav_io, "rb") chunks = list(wav_to_chunks(wav_read, samples_per_chunk=1000)) assert len(chunks) == 16 for chunk in chunks: assert isinstance(chunk, AudioChunk) assert chunk.rate == 16000 assert chunk.width == 2 assert chunk.channels == 1 assert len(chunk.audio) == 1000 * 2 # 1000 samples wyoming-1.6.0/tests/test_client.py000066400000000000000000000020061465547706200172560ustar00rootroot00000000000000"""Client tests.""" from pathlib import Path import pytest from wyoming.client import ( AsyncClient, AsyncStdioClient, AsyncTcpClient, AsyncUnixClient, ) def test_from_uri() -> None: """Test AsyncClient.from_uri""" # Bad scheme with pytest.raises(ValueError): AsyncClient.from_uri("ftp://127.0.0.1:5000") # Missing hostname with pytest.raises(ValueError): AsyncClient.from_uri("tcp://:5000") # Missing port with pytest.raises(ValueError): AsyncClient.from_uri("tcp://127.0.0.1") stdio_client = AsyncClient.from_uri("stdio://") assert isinstance(stdio_client, AsyncStdioClient) tcp_client = AsyncClient.from_uri("tcp://127.0.0.1:5000") assert isinstance(tcp_client, AsyncTcpClient) assert tcp_client.host == "127.0.0.1" assert tcp_client.port == 5000 unix_client = AsyncClient.from_uri("unix:///path/to/socket") assert isinstance(unix_client, AsyncUnixClient) assert unix_client.socket_path == Path("/path/to/socket") wyoming-1.6.0/tests/test_event.py000066400000000000000000000155241465547706200171320ustar00rootroot00000000000000"""Test event reading/writing.""" import io import json from typing import Iterable import pytest from wyoming import __version__ as wyoming_version from wyoming.event import ( Event, async_read_event, async_write_event, read_event, write_event, ) PAYLOAD = b"test\npayload" DATA = {"test": "data"} DATA_BYTES = json.dumps(DATA, ensure_ascii=False).encode("utf-8") class FakeStreamWriter: def __init__(self) -> None: self._undrained_data = bytes() self._value = bytes() def write(self, data: bytes) -> None: self._undrained_data += data def writelines(self, data: Iterable[bytes]) -> None: for line in data: self.write(line) async def drain(self) -> None: self._value += self._undrained_data self._undrained_data = bytes() def getvalue(self) -> bytes: return self._value class FakeStreamReader: def __init__(self, value: bytes) -> None: self._value_io = io.BytesIO(value) async def readline(self) -> bytes: return self._value_io.readline() async def readexactly(self, n: int) -> bytes: data = self._value_io.read(n) assert len(data) == n return data # ----------------------------------------------------------------------------- def test_write_event() -> None: """Test synchronous event writing.""" event = Event(type="test-event", data=DATA, payload=PAYLOAD) with io.BytesIO() as buf: write_event(event, buf) buf.seek(0) event_bytes = buf.getvalue() with io.BytesIO(event_bytes) as reader: # First line is JSON with event type and data/payload lengths. # # A "data" field may also be present here, but it should be short to # avoid overflowing a line buffer. assert json.loads(reader.readline()) == { "type": event.type, "version": wyoming_version, "data_length": len(DATA_BYTES), "payload_length": len(PAYLOAD), } # Data dict comes next, encoded as UTF-8 JSON. # It should be merged on top of the "data" field, if present. assert reader.read(len(DATA_BYTES)) == DATA_BYTES # Payload comes last assert reader.read(len(PAYLOAD)) == PAYLOAD # No more data assert reader.read(1) == b"" @pytest.mark.asyncio async def test_async_write_event() -> None: """Test asynchronous event writing.""" event = Event(type="test-event", data=DATA, payload=PAYLOAD) writer = FakeStreamWriter() await async_write_event(event, writer) # type: ignore event_bytes = writer.getvalue() with io.BytesIO(event_bytes) as reader: # First line is JSON with event type and data/payload lengths. # # A "data" field may also be present here, but it should be short to # avoid overflowing a line buffer. assert json.loads(reader.readline()) == { "type": event.type, "version": wyoming_version, "data_length": len(DATA_BYTES), "payload_length": len(PAYLOAD), } # Data dict comes next, encoded as UTF-8 JSON. # It should be merged on top of the "data" field, if present. assert reader.read(len(DATA_BYTES)) == DATA_BYTES # Payload comes last assert reader.read(len(PAYLOAD)) == PAYLOAD # No more data assert reader.read(1) == b"" def test_write_event_no_payload() -> None: """Test synchronous event writing without a payload.""" event = Event(type="test-event", data=DATA, payload=None) with io.BytesIO() as buf: write_event(event, buf) buf.seek(0) event_bytes = buf.getvalue() with io.BytesIO(event_bytes) as reader: assert json.loads(reader.readline()) == { "type": event.type, "version": wyoming_version, "data_length": len(DATA_BYTES), } assert reader.read(len(DATA_BYTES)) == DATA_BYTES # No payload assert reader.read(1) == b"" @pytest.mark.asyncio async def test_async_write_event_no_payload() -> None: """Test asynchronous event writing without a payload.""" event = Event(type="test-event", data=DATA, payload=None) writer = FakeStreamWriter() await async_write_event(event, writer) # type: ignore event_bytes = writer.getvalue() with io.BytesIO(event_bytes) as reader: assert json.loads(reader.readline()) == { "type": event.type, "version": wyoming_version, "data_length": len(DATA_BYTES), } assert reader.read(len(DATA_BYTES)) == DATA_BYTES # No payload assert reader.read(1) == b"" def test_read_event() -> None: """Test synchronous event reading.""" header = { "type": "test-event", "version": wyoming_version, "data_length": len(DATA_BYTES), "payload_length": len(PAYLOAD), # inline data "data": { "test": "this will be overwritten by DATA", "test2": "this will not", }, } with io.BytesIO() as buf: # First line is JSON with event type and data/payload lengths. header_bytes = json.dumps(header, ensure_ascii=False).encode("utf-8") buf.write(header_bytes) buf.write(b"\n") # Data dict comes next, encoded as UTF-8 JSON. buf.write(DATA_BYTES) # Payload comes last buf.write(PAYLOAD) buf.seek(0) event_bytes = buf.getvalue() with io.BytesIO(event_bytes) as reader: event = read_event(reader) assert event == Event( type="test-event", # inline data was overwritten data={"test": "data", "test2": "this will not"}, payload=PAYLOAD, ) @pytest.mark.asyncio async def test_async_read_event() -> None: """Test asynchronous event reading.""" header = { "type": "test-event", "version": wyoming_version, "data_length": len(DATA_BYTES), "payload_length": len(PAYLOAD), # inline data "data": { "test": "this will be overwritten by DATA", "test2": "this will not", }, } with io.BytesIO() as buf: # First line is JSON with event type and data/payload lengths. header_bytes = json.dumps(header, ensure_ascii=False).encode("utf-8") buf.write(header_bytes) buf.write(b"\n") # Data dict comes next, encoded as UTF-8 JSON. buf.write(DATA_BYTES) # Payload comes last buf.write(PAYLOAD) buf.seek(0) event_bytes = buf.getvalue() reader = FakeStreamReader(event_bytes) event = await async_read_event(reader) # type: ignore assert event == Event( type="test-event", # inline data was overwritten data={"test": "data", "test2": "this will not"}, payload=PAYLOAD, ) wyoming-1.6.0/tests/test_pyaudioop.py000066400000000000000000000122231465547706200200130ustar00rootroot00000000000000import sys from wyoming import pyaudioop def pack(width, data): return b"".join(v.to_bytes(width, sys.byteorder, signed=True) for v in data) def unpack(width, data): return [ int.from_bytes(data[i : i + width], sys.byteorder, signed=True) for i in range(0, len(data), width) ] packs = {w: (lambda *data, width=w: pack(width, data)) for w in (1, 2, 3, 4)} maxvalues = {w: (1 << (8 * w - 1)) - 1 for w in (1, 2, 3, 4)} minvalues = {w: -1 << (8 * w - 1) for w in (1, 2, 3, 4)} datas = { 1: b"\x00\x12\x45\xbb\x7f\x80\xff", 2: packs[2](0, 0x1234, 0x4567, -0x4567, 0x7FFF, -0x8000, -1), 3: packs[3](0, 0x123456, 0x456789, -0x456789, 0x7FFFFF, -0x800000, -1), 4: packs[4](0, 0x12345678, 0x456789AB, -0x456789AB, 0x7FFFFFFF, -0x80000000, -1), } INVALID_DATA = [ (b"abc", 0), (b"abc", 2), (b"ab", 3), (b"abc", 4), ] def test_lin2lin() -> None: """Test sample width conversions.""" for w in 1, 2, 4: assert pyaudioop.lin2lin(datas[w], w, w) == datas[w] assert pyaudioop.lin2lin(bytearray(datas[w]), w, w) == datas[w] assert pyaudioop.lin2lin(memoryview(datas[w]), w, w) == datas[w] assert pyaudioop.lin2lin(datas[1], 1, 2) == packs[2]( 0, 0x1200, 0x4500, -0x4500, 0x7F00, -0x8000, -0x100 ) assert pyaudioop.lin2lin(datas[1], 1, 4) == packs[4]( 0, 0x12000000, 0x45000000, -0x45000000, 0x7F000000, -0x80000000, -0x1000000 ) assert pyaudioop.lin2lin(datas[2], 2, 1) == b"\x00\x12\x45\xba\x7f\x80\xff" assert pyaudioop.lin2lin(datas[2], 2, 4) == packs[4]( 0, 0x12340000, 0x45670000, -0x45670000, 0x7FFF0000, -0x80000000, -0x10000 ) assert pyaudioop.lin2lin(datas[4], 4, 1) == b"\x00\x12\x45\xba\x7f\x80\xff" assert pyaudioop.lin2lin(datas[4], 4, 2) == packs[2]( 0, 0x1234, 0x4567, -0x4568, 0x7FFF, -0x8000, -1 ) def test_tomono() -> None: """Test mono channel conversion.""" for w in 1, 2, 4: data1 = datas[w] data2 = bytearray(2 * len(data1)) for k in range(w): data2[k :: 2 * w] = data1[k::w] assert pyaudioop.tomono(data2, w, 1, 0) == data1 assert pyaudioop.tomono(data2, w, 0, 1) == b"\0" * len(data1) for k in range(w): data2[k + w :: 2 * w] = data1[k::w] assert pyaudioop.tomono(data2, w, 0.5, 0.5) == data1 assert pyaudioop.tomono(bytearray(data2), w, 0.5, 0.5) == data1 assert pyaudioop.tomono(memoryview(data2), w, 0.5, 0.5) == data1 def test_tostereo() -> None: """Test stereo channel conversion.""" for w in 1, 2, 4: data1 = datas[w] data2 = bytearray(2 * len(data1)) for k in range(w): data2[k :: 2 * w] = data1[k::w] assert pyaudioop.tostereo(data1, w, 1, 0) == data2 assert pyaudioop.tostereo(data1, w, 0, 0) == b"\0" * len(data2) for k in range(w): data2[k + w :: 2 * w] = data1[k::w] assert pyaudioop.tostereo(data1, w, 1, 1) == data2 assert pyaudioop.tostereo(bytearray(data1), w, 1, 1) == data2 assert pyaudioop.tostereo(memoryview(data1), w, 1, 1) == data2 def test_ratecv() -> None: """Test sample rate conversion.""" for w in 1, 2, 4: assert pyaudioop.ratecv(b"", w, 1, 8000, 8000, None) == (b"", (-1, ((0, 0),))) assert pyaudioop.ratecv(bytearray(), w, 1, 8000, 8000, None) == ( b"", (-1, ((0, 0),)), ) assert pyaudioop.ratecv(memoryview(b""), w, 1, 8000, 8000, None) == ( b"", (-1, ((0, 0),)), ) assert pyaudioop.ratecv(b"", w, 5, 8000, 8000, None) == ( b"", (-1, ((0, 0),) * 5), ) assert pyaudioop.ratecv(b"", w, 1, 8000, 16000, None) == (b"", (-2, ((0, 0),))) assert pyaudioop.ratecv(datas[w], w, 1, 8000, 8000, None)[0] == datas[w] assert pyaudioop.ratecv(datas[w], w, 1, 8000, 8000, None, 1, 0)[0] == datas[w] state = None d1, state = pyaudioop.ratecv(b"\x00\x01\x02", 1, 1, 8000, 16000, state) d2, state = pyaudioop.ratecv(b"\x00\x01\x02", 1, 1, 8000, 16000, state) assert d1 + d2 == b"\000\000\001\001\002\001\000\000\001\001\002" for w in 1, 2, 4: d0, state0 = pyaudioop.ratecv(datas[w], w, 1, 8000, 16000, None) d, state = b"", None for i in range(0, len(datas[w]), w): d1, state = pyaudioop.ratecv(datas[w][i : i + w], w, 1, 8000, 16000, state) d += d1 assert d == d0 assert state == state0 # Not sure why this is still failing, but the crackling is gone! # expected = { # 1: packs[1](0, 0x0D, 0x37, -0x26, 0x55, -0x4B, -0x14), # 2: packs[2](0, 0x0DA7, 0x3777, -0x2630, 0x5673, -0x4A64, -0x129A), # 3: packs[3](0, 0x0DA740, 0x377776, -0x262FCA, 0x56740C, -0x4A62FD, -0x1298C0), # 4: packs[4]( # 0, 0x0DA740DA, 0x37777776, -0x262FC962, 0x56740DA6, -0x4A62FC96, -0x1298BF26 # ), # } # for w in 1, 2, 4: # assert ( # pyaudioop.ratecv(datas[w], w, 1, 8000, 8000, None, 3, 1)[0] == expected[w] # ) # assert ( # pyaudioop.ratecv(datas[w], w, 1, 8000, 8000, None, 30, 10)[0] == expected[w] # ) wyoming-1.6.0/tests/test_server.py000066400000000000000000000065701465547706200173200ustar00rootroot00000000000000"""Server tests.""" import asyncio import socket import tempfile from pathlib import Path import pytest from wyoming.client import AsyncClient from wyoming.event import Event from wyoming.ping import Ping, Pong from wyoming.server import ( AsyncEventHandler, AsyncServer, AsyncStdioServer, AsyncTcpServer, AsyncUnixServer, ) class PingHandler(AsyncEventHandler): async def handle_event(self, event: Event) -> bool: if Ping.is_type(event.type): ping = Ping.from_event(event) await self.write_event(Pong(text=ping.text).event()) return False return True def test_from_uri() -> None: """Test AsyncServer.from_uri""" # Bad scheme with pytest.raises(ValueError): AsyncServer.from_uri("ftp://127.0.0.1:5000") # Missing hostname with pytest.raises(ValueError): AsyncServer.from_uri("tcp://:5000") # Missing port with pytest.raises(ValueError): AsyncServer.from_uri("tcp://127.0.0.1") stdio_server = AsyncServer.from_uri("stdio://") assert isinstance(stdio_server, AsyncStdioServer) tcp_server = AsyncServer.from_uri("tcp://127.0.0.1:5000") assert isinstance(tcp_server, AsyncTcpServer) assert tcp_server.host == "127.0.0.1" assert tcp_server.port == 5000 unix_server = AsyncServer.from_uri("unix:///path/to/socket") assert isinstance(unix_server, AsyncUnixServer) assert unix_server.socket_path == Path("/path/to/socket") @pytest.mark.asyncio async def test_unix_server() -> None: """Test sending events to and from a Unix socket server.""" with tempfile.TemporaryDirectory() as temp_dir: socket_path = Path(temp_dir) / "test.socket" uri = f"unix://{socket_path}" unix_server = AsyncServer.from_uri(uri) await unix_server.start(PingHandler) # Wait for path to exist while not socket_path.exists(): await asyncio.sleep(0.1) client = AsyncClient.from_uri(uri) await client.connect() await client.write_event(Ping(text="test").event()) event = await asyncio.wait_for(client.read_event(), timeout=1) assert event is not None assert Pong.is_type(event.type) assert Pong.from_event(event).text == "test" await client.disconnect() await unix_server.stop() @pytest.mark.asyncio async def test_tcp_server() -> None: """Test sending events to and from a TCP server.""" sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.bind(("127.0.0.1", 0)) sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) port = sock.getsockname()[1] sock.close() uri = f"tcp://127.0.0.1:{port}" tcp_server = AsyncServer.from_uri(uri) await tcp_server.start(PingHandler) # Wait for socket to open for _ in range(10): try: sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.connect(("127.0.0.1", port)) break except ConnectionRefusedError: await asyncio.sleep(0.1) client = AsyncClient.from_uri(uri) await client.connect() await client.write_event(Ping(text="test").event()) event = await asyncio.wait_for(client.read_event(), timeout=1) assert event is not None assert Pong.is_type(event.type) assert Pong.from_event(event).text == "test" await client.disconnect() await tcp_server.stop() wyoming-1.6.0/tox.ini000066400000000000000000000003701465547706200145420ustar00rootroot00000000000000[tox] env_list = py{38,39,310,311,312} minversion = 4.12.1 [testenv] description = run the tests with pytest package = wheel wheel_build_env = .pkg deps = pytest>=7,<8 pytest-asyncio<1 commands = pytest {tty:--color=yes} {posargs} wyoming-1.6.0/wyoming/000077500000000000000000000000001465547706200147205ustar00rootroot00000000000000wyoming-1.6.0/wyoming/VERSION000066400000000000000000000000061465547706200157640ustar00rootroot000000000000001.6.0 wyoming-1.6.0/wyoming/__init__.py000066400000000000000000000002751465547706200170350ustar00rootroot00000000000000"""The Wyoming protocol for Rhasspy.""" from .version import __version__ __author__ = "Michael Hansen" __email__ = "mike@rhasspy.org" __all__ = ["__author__", "__email__", "__version__"] wyoming-1.6.0/wyoming/asr.py000066400000000000000000000033451465547706200160640ustar00rootroot00000000000000"""Speech to text.""" from dataclasses import dataclass from typing import Any, Dict, Optional from .event import Event, Eventable DOMAIN = "asr" _TRANSCRIPT_TYPE = "transcript" _TRANSCRIBE_TYPE = "transcribe" @dataclass class Transcript(Eventable): """Transcription response from ASR system""" text: str """Text transcription of spoken audio""" context: Optional[Dict[str, Any]] = None """Context for next interaction.""" @staticmethod def is_type(event_type: str) -> bool: return event_type == _TRANSCRIPT_TYPE def event(self) -> Event: return Event(type=_TRANSCRIPT_TYPE, data={"text": self.text}) @staticmethod def from_event(event: Event) -> "Transcript": assert event.data is not None return Transcript(text=event.data["text"]) @dataclass class Transcribe(Eventable): """Transcription request to ASR system. Followed by AudioStart, AudioChunk+, AudioStop """ name: Optional[str] = None """Name of ASR model to use""" language: Optional[str] = None """Language of spoken audio to follow""" context: Optional[Dict[str, Any]] = None """Context from previous interactions.""" @staticmethod def is_type(event_type: str) -> bool: return event_type == _TRANSCRIBE_TYPE def event(self) -> Event: data: Dict[str, Any] = {} if self.name is not None: data["name"] = self.name if self.language is not None: data["language"] = self.language return Event(type=_TRANSCRIBE_TYPE, data=data) @staticmethod def from_event(event: Event) -> "Transcribe": data = event.data or {} return Transcribe(name=data.get("name"), language=data.get("language")) wyoming-1.6.0/wyoming/audio.py000066400000000000000000000175531465547706200164060ustar00rootroot00000000000000"""Audio input/output.""" import argparse import io import sys import wave from dataclasses import dataclass from typing import Iterable, Optional, Union try: # Use built-in audioop until it's removed in Python 3.13 import audioop # pylint: disable=deprecated-module except ImportError: from . import pyaudioop as audioop # type: ignore[no-redef] from .event import Event, Eventable from .util.dataclasses_json import DataClassJsonMixin _CHUNK_TYPE = "audio-chunk" _START_TYPE = "audio-start" _STOP_TYPE = "audio-stop" @dataclass class AudioFormat(DataClassJsonMixin): """Base class for events with audio format information.""" rate: int """Hertz""" width: int """Bytes""" channels: int """Mono = 1""" @dataclass class AudioChunk(AudioFormat, Eventable): """Chunk of raw PCM audio.""" audio: bytes """Raw audio""" timestamp: Optional[int] = None """Milliseconds""" @staticmethod def is_type(event_type: str) -> bool: return event_type == _CHUNK_TYPE def event(self) -> Event: return Event( type=_CHUNK_TYPE, data={ "rate": self.rate, "width": self.width, "channels": self.channels, "timestamp": self.timestamp, }, payload=self.audio, ) @staticmethod def from_event(event: Event) -> "AudioChunk": assert event.data is not None return AudioChunk( rate=event.data["rate"], width=event.data["width"], channels=event.data["channels"], audio=event.payload or bytes(), timestamp=event.data.get("timestamp"), ) @property def samples(self) -> int: return len(self.audio) // (self.width * self.channels) @property def seconds(self) -> float: return self.samples / self.rate @property def milliseconds(self) -> int: return int(self.seconds * 1_000) @dataclass class AudioStart(AudioFormat, Eventable): """Audio stream has started.""" timestamp: Optional[int] = None """Milliseconds""" @staticmethod def is_type(event_type: str) -> bool: return event_type == _START_TYPE def event(self) -> Event: return Event( type=_START_TYPE, data={ "rate": self.rate, "width": self.width, "channels": self.channels, "timestamp": self.timestamp, }, ) @staticmethod def from_event(event: Event) -> "AudioStart": assert event.data is not None return AudioStart( rate=event.data["rate"], width=event.data["width"], channels=event.data["channels"], timestamp=event.data.get("timestamp"), ) @dataclass class AudioStop(Eventable): """Audio stream has stopped.""" timestamp: Optional[int] = None """Milliseconds""" @staticmethod def is_type(event_type: str) -> bool: return event_type == _STOP_TYPE def event(self) -> Event: return Event( type=_STOP_TYPE, data={"timestamp": self.timestamp}, ) @staticmethod def from_event(event: Event) -> "AudioStop": return AudioStop(timestamp=event.data.get("timestamp")) @dataclass class AudioChunkConverter: """Converts audio chunks using audioop/pyaudioop.""" rate: Optional[int] = None width: Optional[int] = None channels: Optional[int] = None _ratecv_state = None def convert(self, chunk: AudioChunk) -> AudioChunk: """Converts sample rate, width, and channels as necessary.""" if ( ((self.rate is None) or (chunk.rate == self.rate)) and ((self.width is None) or (chunk.width == self.width)) and ((self.channels is None) or (chunk.channels == self.channels)) ): return chunk audio_bytes = chunk.audio width = chunk.width if (self.width is not None) and (chunk.width != self.width): # Convert sample width audio_bytes = audioop.lin2lin(audio_bytes, chunk.width, self.width) width = self.width channels = chunk.channels if (self.channels is not None) and (chunk.channels != self.channels): # Convert to mono or stereo if self.channels == 1: audio_bytes = audioop.tomono(audio_bytes, width, 1.0, 1.0) elif self.channels == 2: audio_bytes = audioop.tostereo(audio_bytes, width, 1.0, 1.0) else: raise ValueError(f"Cannot convert to channels: {self.channels}") channels = self.channels rate = chunk.rate if (self.rate is not None) and (chunk.rate != self.rate): # Resample audio_bytes, self._ratecv_state = audioop.ratecv( audio_bytes, width, channels, chunk.rate, self.rate, self._ratecv_state, ) rate = self.rate return AudioChunk( rate, width, channels, audio_bytes, timestamp=chunk.timestamp, ) def wav_to_chunks( wav_file: wave.Wave_read, samples_per_chunk: int, timestamp: int = 0, start_event: bool = False, stop_event: bool = False, ) -> Iterable[Union[AudioStart, AudioChunk, AudioStop]]: """Splits WAV file into AudioChunks.""" rate = wav_file.getframerate() width = wav_file.getsampwidth() channels = wav_file.getnchannels() if start_event: yield AudioStart(rate=rate, width=width, channels=channels, timestamp=0) audio_bytes = wav_file.readframes(samples_per_chunk) while audio_bytes: chunk = AudioChunk( rate=rate, width=width, channels=channels, audio=audio_bytes, timestamp=timestamp, ) yield chunk timestamp += chunk.milliseconds audio_bytes = wav_file.readframes(samples_per_chunk) if stop_event: yield AudioStop(timestamp=timestamp) # ----------------------------------------------------------------------------- def main() -> None: parser = argparse.ArgumentParser() parser.add_argument("--rate", type=int) parser.add_argument("--width", type=int) parser.add_argument("--channels", type=int) parser.add_argument("--samples-per-chunk", type=int, default=1024) args = parser.parse_args() converter = AudioChunkConverter( rate=args.rate, width=args.width, channels=args.channels ) with io.BytesIO( sys.stdin.buffer.read() ) as input_wav_io, io.BytesIO() as output_wav_io: input_wav_file: wave.Wave_read = wave.open(input_wav_io, "rb") output_wav_file: wave.Wave_write = wave.open(output_wav_io, "wb") with input_wav_file, output_wav_file: # Input rate = input_wav_file.getframerate() width = input_wav_file.getsampwidth() channels = input_wav_file.getnchannels() # Output output_wav_file.setframerate(args.rate if args.rate is not None else rate) output_wav_file.setsampwidth( args.width if args.width is not None else width ) output_wav_file.setnchannels( args.channels if args.channels is not None else channels ) audio_bytes = input_wav_file.readframes(args.samples_per_chunk) while audio_bytes: chunk = converter.convert( AudioChunk(rate, width, channels, audio_bytes) ) output_wav_file.writeframes(chunk.audio) audio_bytes = input_wav_file.readframes(args.samples_per_chunk) sys.stdout.buffer.write(output_wav_io.getvalue()) if __name__ == "__main__": main() wyoming-1.6.0/wyoming/client.py000066400000000000000000000067631465547706200165640ustar00rootroot00000000000000import asyncio from abc import ABC from pathlib import Path from typing import Optional, Union from urllib.parse import urlparse from .event import ( Event, async_get_stdin, async_get_stdout, async_read_event, async_write_event, ) class AsyncClient(ABC): """Base class for Wyoming async client.""" def __init__(self) -> None: self._reader: Optional[asyncio.StreamReader] = None self._writer: Optional[asyncio.StreamWriter] = None async def read_event(self) -> Optional[Event]: assert self._reader is not None return await async_read_event(self._reader) async def write_event(self, event: Event) -> None: assert self._writer is not None await async_write_event(event, self._writer) async def connect(self) -> None: pass async def __aenter__(self): await self.connect() return self async def disconnect(self) -> None: pass async def __aexit__(self, exc_type, exc_value, traceback): await self.disconnect() @staticmethod def from_uri(uri: str) -> "AsyncClient": result = urlparse(uri) if result.scheme == "unix": return AsyncUnixClient(result.path) if result.scheme == "tcp": if (result.hostname is None) or (result.port is None): raise ValueError("A port must be specified when using a 'tcp://' URI") return AsyncTcpClient(result.hostname, result.port) if result.scheme == "stdio": return AsyncStdioClient() raise ValueError("Only 'stdio://', 'unix://', or 'tcp://' are supported") class AsyncTcpClient(AsyncClient): """TCP Wyoming client.""" def __init__(self, host: str, port: int) -> None: super().__init__() self.host = host self.port = port async def connect(self) -> None: self._reader, self._writer = await asyncio.open_connection( host=self.host, port=self.port, ) async def disconnect(self) -> None: writer = self._writer self._reader = None self._writer = None if writer is not None: writer.close() await writer.wait_closed() class AsyncUnixClient(AsyncClient): """Unix domain socket Wyoming client.""" def __init__(self, socket_path: Union[str, Path]) -> None: super().__init__() self.socket_path = Path(socket_path) async def connect(self) -> None: self._reader, self._writer = await asyncio.open_unix_connection( path=self.socket_path ) async def disconnect(self) -> None: writer = self._writer self._reader = None self._writer = None if writer is not None: writer.close() await writer.wait_closed() class AsyncStdioClient(AsyncClient): """Standard output Wyoming client.""" def __init__(self) -> None: super().__init__() self._reader: Optional[asyncio.StreamReader] = None self._writer: Optional[asyncio.StreamWriter] = None async def read_event(self) -> Optional[Event]: if self._reader is None: self._reader = await async_get_stdin() assert self._reader is not None return await async_read_event(self._reader) async def write_event(self, event: Event) -> None: if self._writer is None: self._writer = await async_get_stdout() assert self._writer is not None await async_write_event(event, self._writer) wyoming-1.6.0/wyoming/error.py000066400000000000000000000015161465547706200164260ustar00rootroot00000000000000"""Error event.""" from dataclasses import dataclass from typing import Any, Dict, Optional from .event import Event, Eventable _ERROR_TYPE = "error" @dataclass class Error(Eventable): """Error with text and an optional code.""" text: str """Human-readable error message.""" code: Optional[str] = None """Machine-readable error code.""" @staticmethod def is_type(event_type: str) -> bool: return event_type == _ERROR_TYPE def event(self) -> Event: data: Dict[str, Any] = {"text": self.text} if self.code is not None: data["code"] = self.code return Event(type=_ERROR_TYPE, data=data) @staticmethod def from_event(event: Event) -> "Error": assert event.data is not None return Error(text=event.data["text"], code=event.data.get("code")) wyoming-1.6.0/wyoming/event.py000066400000000000000000000135261465547706200164220ustar00rootroot00000000000000import asyncio import json import os import sys from abc import ABC, abstractmethod from dataclasses import dataclass, field from typing import Any, BinaryIO, Dict, Iterable, Optional from .version import __version__ _TYPE = "type" _DATA = "data" _DATA_LENGTH = "data_length" _PAYLOAD_LENGTH = "payload_length" _NEWLINE = "\n".encode() _VERSION = "version" _VERSION_NUMBER = __version__ @dataclass class Event: type: str data: Dict[str, Any] = field(default_factory=dict) payload: Optional[bytes] = None def to_dict(self) -> Dict[str, Any]: return {_TYPE: self.type, _DATA: self.data} @staticmethod def from_dict(event_dict: Dict[str, Any]) -> "Event": return Event(type=event_dict["type"], data=event_dict.get("data", {})) class Eventable(ABC): @abstractmethod def event(self) -> Event: pass @staticmethod @abstractmethod def is_type(event_type: str) -> bool: pass def to_dict(self) -> Dict[str, Any]: return self.event().data async def async_get_stdin( loop: Optional[asyncio.AbstractEventLoop] = None, ) -> asyncio.StreamReader: """Get StreamReader for stdin.""" if loop is None: loop = asyncio.get_running_loop() reader = asyncio.StreamReader() await loop.connect_read_pipe( lambda: asyncio.StreamReaderProtocol(reader), sys.stdin ) return reader async def async_get_stdout( loop: Optional[asyncio.AbstractEventLoop] = None, ) -> asyncio.StreamWriter: """Get StreamWriter for stdout.""" if loop is None: loop = asyncio.get_running_loop() writer_transport, writer_protocol = await loop.connect_write_pipe( lambda: asyncio.streams.FlowControlMixin(loop=loop), os.fdopen(sys.stdout.fileno(), "wb"), ) return asyncio.streams.StreamWriter(writer_transport, writer_protocol, None, loop) async def async_read_event(reader: asyncio.StreamReader) -> Optional[Event]: try: json_line = await reader.readline() if not json_line: return None event_dict = json.loads(json_line) data_length = event_dict.get(_DATA_LENGTH) if (data_length is not None) and (data_length > 0): # Merge data data_bytes = await reader.readexactly(data_length) data_dict = event_dict.get(_DATA, {}) data_dict.update(json.loads(data_bytes)) event_dict[_DATA] = data_dict payload_length = event_dict.get(_PAYLOAD_LENGTH) payload: Optional[bytes] = None if (payload_length is not None) and (payload_length > 0): payload = await reader.readexactly(payload_length) return Event( type=event_dict[_TYPE], data=event_dict.get(_DATA), payload=payload ) except (KeyboardInterrupt, ValueError): pass return None async def async_write_event(event: Event, writer: asyncio.StreamWriter): event_dict: Dict[str, Any] = event.to_dict() event_dict[_VERSION] = _VERSION_NUMBER data_dict = event_dict.pop(_DATA, None) data_bytes: Optional[bytes] = None if data_dict: data_bytes = json.dumps(data_dict, ensure_ascii=False).encode("utf-8") event_dict[_DATA_LENGTH] = len(data_bytes) if event.payload: event_dict[_PAYLOAD_LENGTH] = len(event.payload) json_line = json.dumps(event_dict, ensure_ascii=False) try: writer.writelines((json_line.encode(), _NEWLINE)) if data_bytes: writer.write(data_bytes) if event.payload: writer.write(event.payload) await writer.drain() except KeyboardInterrupt: pass async def async_write_events(events: Iterable[Event], writer: asyncio.StreamWriter): try: await asyncio.gather(*(async_write_event(event, writer) for event in events)) except KeyboardInterrupt: pass def read_event(reader: Optional[BinaryIO] = None) -> Optional[Event]: if reader is None: reader = sys.stdin.buffer try: json_line = reader.readline() if not json_line: return None event_dict = json.loads(json_line) data_length = event_dict.get(_DATA_LENGTH) if (data_length is not None) and (data_length > 0): # Merge data data_bytes = reader.read(data_length) while len(data_bytes) < data_length: data_bytes += reader.read(data_length - len(data_bytes)) data_dict = event_dict.get(_DATA, {}) data_dict.update(json.loads(data_bytes)) event_dict[_DATA] = data_dict payload_length = event_dict.get(_PAYLOAD_LENGTH) payload: Optional[bytes] = None if payload_length is not None: payload = reader.read(payload_length) while len(payload) < payload_length: payload += reader.read(payload_length - len(payload)) return Event( type=event_dict[_TYPE], data=event_dict.get(_DATA), payload=payload ) except (KeyboardInterrupt, ValueError): pass return None def write_event(event: Event, writer: Optional[BinaryIO] = None): if writer is None: writer = sys.stdout.buffer event_dict: Dict[str, Any] = event.to_dict() event_dict[_VERSION] = _VERSION_NUMBER data_dict = event_dict.pop(_DATA, None) data_bytes: Optional[bytes] = None if data_dict: data_bytes = json.dumps(data_dict, ensure_ascii=False).encode("utf-8") event_dict[_DATA_LENGTH] = len(data_bytes) if event.payload: event_dict[_PAYLOAD_LENGTH] = len(event.payload) json_line = json.dumps(event_dict, ensure_ascii=False) try: writer.writelines((json_line.encode(), _NEWLINE)) if data_bytes: writer.write(data_bytes) if event.payload: writer.write(event.payload) writer.flush() except KeyboardInterrupt: pass wyoming-1.6.0/wyoming/handle.py000066400000000000000000000035201465547706200165250ustar00rootroot00000000000000"""Intent recognition and handling.""" from dataclasses import dataclass from typing import Any, Dict, Optional from .event import Event, Eventable DOMAIN = "handle" _HANDLED_TYPE = "handled" _NOT_HANDLED_TYPE = "not-handled" @dataclass class Handled(Eventable): """Result of successful intent handling.""" text: Optional[str] = None """Human-readable response.""" context: Optional[Dict[str, Any]] = None """Context for next interaction.""" @staticmethod def is_type(event_type: str) -> bool: return event_type == _HANDLED_TYPE def event(self) -> Event: data: Dict[str, Any] = {} if self.text is not None: data["text"] = self.text if self.context is not None: data["context"] = self.context return Event(type=_HANDLED_TYPE, data=data) @staticmethod def from_event(event: Event) -> "Handled": assert event.data is not None return Handled(text=event.data.get("text"), context=event.data.get("context")) @dataclass class NotHandled(Eventable): """Result of intent handling failure.""" text: Optional[str] = None """Human-readable response.""" context: Optional[Dict[str, Any]] = None """Context for next interaction.""" @staticmethod def is_type(event_type: str) -> bool: return event_type == _NOT_HANDLED_TYPE def event(self) -> Event: data: Dict[str, Any] = {} if self.text is not None: data["text"] = self.text if self.context is not None: data["context"] = self.context return Event(type=_NOT_HANDLED_TYPE, data=data) @staticmethod def from_event(event: Event) -> "NotHandled": assert event.data is not None return NotHandled( text=event.data.get("text"), context=event.data.get("context") ) wyoming-1.6.0/wyoming/http/000077500000000000000000000000001465547706200156775ustar00rootroot00000000000000wyoming-1.6.0/wyoming/http/__init__.py000066400000000000000000000000001465547706200177760ustar00rootroot00000000000000wyoming-1.6.0/wyoming/http/asr_server.py000066400000000000000000000047341465547706200204340ustar00rootroot00000000000000"""HTTP server for automated speech recognition (ASR).""" import io import logging import wave from pathlib import Path from flask import Response, jsonify, request from wyoming.asr import Transcribe, Transcript from wyoming.audio import wav_to_chunks from wyoming.client import AsyncClient from wyoming.error import Error from .shared import get_app, get_argument_parser _DIR = Path(__file__).parent CONF_PATH = _DIR / "conf" / "asr.yaml" def main(): parser = get_argument_parser() parser.add_argument("--model", help="Default model name for transcription") parser.add_argument("--language", help="Default language for transcription") parser.add_argument("--samples-per-chunk", type=int, default=1024) args = parser.parse_args() logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO) app = get_app("asr", CONF_PATH, args) @app.route("/api/speech-to-text", methods=["POST"]) async def api_stt() -> Response: uri = request.args.get("uri", args.uri) if not uri: raise ValueError("URI is required") model_name = request.args.get("model", args.model) language = request.args.get("language", args.model) async with AsyncClient.from_uri(uri) as client: await client.write_event( Transcribe(name=model_name, language=language).event() ) with io.BytesIO(request.data) as wav_io: with wave.open(wav_io, "rb") as wav_file: chunks = wav_to_chunks( wav_file, samples_per_chunk=args.samples_per_chunk, start_event=True, stop_event=True, ) for chunk in chunks: await client.write_event(chunk.event()) while True: event = await client.read_event() if event is None: raise RuntimeError("Client disconnected") if Transcript.is_type(event.type): transcript = Transcript.from_event(event) return jsonify(transcript.to_dict()) if Error.is_type(event.type): error = Error.from_event(event) raise RuntimeError( f"Unexpected error from client: code={error.code}, text={error.text}" ) app.run(args.host, args.port) if __name__ == "__main__": main() wyoming-1.6.0/wyoming/http/conf/000077500000000000000000000000001465547706200166245ustar00rootroot00000000000000wyoming-1.6.0/wyoming/http/conf/asr.yaml000066400000000000000000000023111465547706200202720ustar00rootroot00000000000000--- openapi: "3.0.0" info: title: 'Wyoming ASR' version: '1.0.0' description: 'API for Automated Speech Recognition' paths: /api/info: get: summary: 'Get service information' responses: '200': description: OK content: application/json: schema: /api/speech-to-text: post: summary: 'Transcribe WAV data to text' requestBody: description: 'WAV data (16-bit 16Khz mono preferred)' required: true content: audio/wav: schema: type: string format: binary parameters: - in: query name: uri description: 'URI of Wyoming ASR service' schema: type: string - in: query name: model description: 'Name of model to use for transcription' schema: type: string - in: query name: language description: 'Language to use for transcription' schema: type: string responses: '200': description: OK content: application/json: schema: type: object wyoming-1.6.0/wyoming/http/conf/tts.yaml000066400000000000000000000037471465547706200203350ustar00rootroot00000000000000--- openapi: "3.0.0" info: title: 'Wyoming TTS' version: '1.0.0' description: 'API for Text to Speech' paths: /api/info: get: summary: 'Get service information' responses: '200': description: OK content: application/json: schema: /api/text-to-speech: post: summary: 'Synthesize speech from text' requestBody: description: 'Text to speak' required: true content: text/plain: schema: type: string parameters: - in: query name: uri description: 'URI of Wyoming TTS service' schema: type: string - in: query name: voice description: 'Name of voice to use for synthesis' schema: type: string - in: query name: speaker description: 'Name of voice speaker to use for synthesis' schema: type: string responses: '200': description: OK content: audio/wav: schema: type: string format: binary get: summary: 'Synthesize speech from text' parameters: - in: query name: text description: 'Text to speak' required: true schema: type: string - in: query name: uri description: 'URI of Wyoming TTS service' schema: type: string - in: query name: voice description: 'Name of voice to use for synthesis' schema: type: string - in: query name: speaker description: 'Name of voice speaker to use for synthesis' schema: type: string responses: '200': description: OK content: audio/wav: schema: type: string format: binary wyoming-1.6.0/wyoming/http/conf/wake.yaml000066400000000000000000000016331465547706200204420ustar00rootroot00000000000000--- openapi: "3.0.0" info: title: 'Wyoming Wake' version: '1.0.0' description: 'API for Wake Word Detection' paths: /api/info: get: summary: 'Get service information' responses: '200': description: OK content: application/json: schema: /api/detect-wake-word: post: summary: 'Transcribe WAV data to text' requestBody: description: 'WAV data (16-bit 16Khz mono preferred)' required: true content: audio/wav: schema: type: string format: binary parameters: - in: query name: uri description: 'URI of Wyoming ASR service' schema: type: string responses: '200': description: OK content: application/json: schema: type: object wyoming-1.6.0/wyoming/http/shared.py000066400000000000000000000036071465547706200175250ustar00rootroot00000000000000"""Shared code for HTTP servers.""" import argparse from pathlib import Path from typing import Union from flask import Flask, jsonify, redirect, request from swagger_ui import flask_api_doc # pylint: disable=no-name-in-module from wyoming.client import AsyncClient from wyoming.info import Describe, Info def get_argument_parser() -> argparse.ArgumentParser: """Create argument parser with shared arguments.""" parser = argparse.ArgumentParser() parser.add_argument("--host", default="0.0.0.0") parser.add_argument("--port", type=int, default=5000) parser.add_argument("--uri", help="URI of Wyoming service") parser.add_argument( "--debug", action="store_true", help="Print DEBUG logs to console" ) return parser def get_app( name: str, openapi_config_path: Union[str, Path], args: argparse.Namespace ) -> Flask: """Create Flask app with default endpoints.""" app = Flask(name) @app.route("/") def redirect_to_api(): return redirect("/api") @app.route("/api/info", methods=["GET"]) async def api_info(): uri = request.args.get("uri", args.uri) if not uri: raise ValueError("URI is required") async with AsyncClient.from_uri(uri) as client: await client.write_event(Describe().event()) while True: event = await client.read_event() if event is None: raise RuntimeError("Client disconnected") if Info.is_type(event.type): info = Info.from_event(event) return jsonify(info.to_dict()) @app.errorhandler(Exception) async def handle_error(err): """Return error as text.""" return (f"{err.__class__.__name__}: {err}", 500) flask_api_doc( app, config_path=str(openapi_config_path), url_prefix="/api", title="API doc" ) return app wyoming-1.6.0/wyoming/http/tts_server.py000066400000000000000000000056451465547706200204630ustar00rootroot00000000000000"""HTTP server for text to speech (TTS).""" import io import logging import wave from pathlib import Path from typing import Optional from flask import Response, request from wyoming.audio import AudioChunk, AudioStart, AudioStop from wyoming.client import AsyncClient from wyoming.error import Error from wyoming.tts import Synthesize, SynthesizeVoice from .shared import get_app, get_argument_parser _DIR = Path(__file__).parent CONF_PATH = _DIR / "conf" / "tts.yaml" def main(): parser = get_argument_parser() parser.add_argument("--voice", help="Default voice for synthesis") parser.add_argument("--speaker", help="Default voice speaker for synthesis") args = parser.parse_args() logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO) app = get_app("tts", CONF_PATH, args) @app.route("/api/text-to-speech", methods=["POST", "GET"]) async def api_stt() -> Response: uri = request.args.get("uri", args.uri) if not uri: raise ValueError("URI is required") if request.method == "POST": text = request.data.decode() else: text = request.args.get("text", "") if not text: raise ValueError("Text is required") voice: Optional[SynthesizeVoice] = None voice_name = request.args.get("voice", args.voice) if voice_name: voice = SynthesizeVoice( name=voice_name, speaker=request.args.get("speaker", args.speaker) ) async with AsyncClient.from_uri(uri) as client: wav_io = io.BytesIO() wav_file = wave.open(wav_io, "wb") await client.write_event(Synthesize(text=text, voice=voice).event()) while True: event = await client.read_event() if event is None: raise RuntimeError("Client disconnected") if AudioStart.is_type(event.type): audio_start = AudioStart.from_event(event) wav_file.setframerate(audio_start.rate) wav_file.setsampwidth(audio_start.width) wav_file.setnchannels(audio_start.channels) elif AudioChunk.is_type(event.type): audio_chunk = AudioChunk.from_event(event) wav_file.writeframes(audio_chunk.audio) elif AudioStop.is_type(event.type): wav_file.close() wav_io.seek(0) return Response( wav_io.getvalue(), headers={"Content-Type": "audio/wav"} ) elif Error.is_type(event.type): error = Error.from_event(event) raise RuntimeError( f"Unexpected error from client: code={error.code}, text={error.text}" ) app.run(args.host, args.port) if __name__ == "__main__": main() wyoming-1.6.0/wyoming/http/wake_server.py000066400000000000000000000040441465547706200205700ustar00rootroot00000000000000"""HTTP server for wake word detection.""" import io import logging import wave from pathlib import Path from flask import Response, jsonify, request from wyoming.audio import wav_to_chunks from wyoming.client import AsyncClient from wyoming.error import Error from wyoming.wake import Detection, NotDetected from .shared import get_app, get_argument_parser _DIR = Path(__file__).parent CONF_PATH = _DIR / "conf" / "wake.yaml" def main(): parser = get_argument_parser() parser.add_argument("--samples-per-chunk", type=int, default=1024) args = parser.parse_args() logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO) app = get_app("wake", CONF_PATH, args) @app.route("/api/detect-wake-word", methods=["POST", "GET"]) async def api_wake() -> Response: uri = request.args.get("uri", args.uri) if not uri: raise ValueError("URI is required") async with AsyncClient.from_uri(uri) as client: with io.BytesIO(request.data) as wav_io: with wave.open(wav_io, "rb") as wav_file: chunks = wav_to_chunks( wav_file, samples_per_chunk=args.samples_per_chunk, start_event=True, stop_event=True, ) for chunk in chunks: await client.write_event(chunk.event()) while True: event = await client.read_event() if event is None: raise RuntimeError("Client disconnected") if Detection.is_type(event.type) or NotDetected.is_type(event.type): return jsonify(event.to_dict()) if Error.is_type(event.type): error = Error.from_event(event) raise RuntimeError( f"Unexpected error from client: code={error.code}, text={error.text}" ) app.run(args.host, args.port) if __name__ == "__main__": main() wyoming-1.6.0/wyoming/info.py000066400000000000000000000161011465547706200162240ustar00rootroot00000000000000"""Information about available services, models, etc..""" from dataclasses import dataclass, field from typing import Any, Dict, List, Optional from .audio import AudioFormat from .event import Event, Eventable from .util.dataclasses_json import DataClassJsonMixin DOMAIN = "info" _DESCRIBE_TYPE = "describe" _INFO_TYPE = "info" @dataclass class Describe(Eventable): """Request info message.""" @staticmethod def is_type(event_type: str) -> bool: return event_type == _DESCRIBE_TYPE def event(self) -> Event: return Event(type=_DESCRIBE_TYPE) @staticmethod def from_event(event: Event) -> "Describe": return Describe() @dataclass class Attribution(DataClassJsonMixin): """Attribution for an artifact.""" name: str """Who made it.""" url: str """Where it's from.""" @dataclass class Artifact(DataClassJsonMixin): """Information about a service, model, etc..""" name: str """Name/id of artifact.""" attribution: Attribution """Who made the artifact and where it's from.""" installed: bool """True if the artifact is currently installed.""" description: Optional[str] """Human-readable description of the artifact.""" version: Optional[str] """Version of the artifact.""" # ----------------------------------------------------------------------------- @dataclass class AsrModel(Artifact): """Speech-to-text model.""" languages: List[str] """List of supported model languages.""" @dataclass class AsrProgram(Artifact): """Speech-to-text service.""" models: List[AsrModel] """List of available models.""" # ----------------------------------------------------------------------------- @dataclass class TtsVoiceSpeaker(DataClassJsonMixin): """Individual speaker in a multi-speaker voice.""" name: str """Name/id of speaker.""" @dataclass class TtsVoice(Artifact): """Text-to-speech voice.""" languages: List[str] """List of languages available in the voice.""" speakers: Optional[List[TtsVoiceSpeaker]] = None """List of individual speakers in the voice.""" @dataclass class TtsProgram(Artifact): """Text-to-speech service.""" voices: List[TtsVoice] """List of available voices.""" # ----------------------------------------------------------------------------- @dataclass class HandleModel(Artifact): """Intent handling model.""" languages: List[str] """List of supported languages in the model.""" @dataclass class HandleProgram(Artifact): """Intent handling service.""" models: List[HandleModel] """List of available models.""" # ----------------------------------------------------------------------------- @dataclass class WakeModel(Artifact): """Wake word detection model.""" languages: List[str] """List of languages supported by the model.""" phrase: Optional[str] """Wake up phrase used by the model.""" @dataclass class WakeProgram(Artifact): """Wake word detection service.""" models: List[WakeModel] """List of available models.""" # ----------------------------------------------------------------------------- @dataclass class IntentModel(Artifact): """Intent recognition model.""" languages: List[str] """List of languages supported by the model.""" @dataclass class IntentProgram(Artifact): """Intent recognition service.""" models: List[IntentModel] """List of available models.""" # ----------------------------------------------------------------------------- @dataclass class Satellite(Artifact): """Satellite information.""" area: Optional[str] = None """Name of the area the satellite is in.""" has_vad: Optional[bool] = None """True if a local VAD will be used to detect the end of voice commands.""" active_wake_words: Optional[List[str]] = None """Wake words that are currently being listened for.""" max_active_wake_words: Optional[int] = None """Maximum number of local wake words that can be run simultaneously.""" supports_trigger: Optional[bool] = None """Satellite supports remotely triggering pipeline runs.""" # ----------------------------------------------------------------------------- @dataclass class MicProgram(Artifact): """Microphone information.""" mic_format: AudioFormat """Input audio format.""" # ----------------------------------------------------------------------------- @dataclass class SndProgram(Artifact): """Sound output information.""" snd_format: AudioFormat """Output audio format.""" # ----------------------------------------------------------------------------- @dataclass class Info(Eventable): """Response to describe message with information about available services, models, etc.""" asr: List[AsrProgram] = field(default_factory=list) """Speech-to-text services.""" tts: List[TtsProgram] = field(default_factory=list) """Text-to-speech services.""" handle: List[HandleProgram] = field(default_factory=list) """Intent handling services.""" intent: List[IntentProgram] = field(default_factory=list) """Intent recognition services.""" wake: List[WakeProgram] = field(default_factory=list) """Wake word detection services.""" mic: List[MicProgram] = field(default_factory=list) """Audio input services.""" snd: List[SndProgram] = field(default_factory=list) """Audio output services.""" satellite: Optional[Satellite] = None """Satellite information.""" @staticmethod def is_type(event_type: str) -> bool: return event_type == _INFO_TYPE def event(self) -> Event: data: Dict[str, Any] = { "asr": [p.to_dict() for p in self.asr], "tts": [p.to_dict() for p in self.tts], "handle": [p.to_dict() for p in self.handle], "intent": [p.to_dict() for p in self.intent], "wake": [p.to_dict() for p in self.wake], "mic": [p.to_dict() for p in self.mic], "snd": [p.to_dict() for p in self.snd], } if self.satellite is not None: data["satellite"] = self.satellite.to_dict() return Event(type=_INFO_TYPE, data=data) @staticmethod def from_event(event: Event) -> "Info": assert event.data is not None satellite: Optional[Satellite] = None satellite_data = event.data.get("satellite") if satellite_data is not None: satellite = Satellite.from_dict(satellite_data) return Info( asr=[AsrProgram.from_dict(d) for d in event.data.get("asr", [])], tts=[TtsProgram.from_dict(d) for d in event.data.get("tts", [])], handle=[HandleProgram.from_dict(d) for d in event.data.get("handle", [])], intent=[IntentProgram.from_dict(d) for d in event.data.get("intent", [])], wake=[WakeProgram.from_dict(d) for d in event.data.get("wake", [])], mic=[MicProgram.from_dict(d) for d in event.data.get("mic", [])], snd=[SndProgram.from_dict(d) for d in event.data.get("snd", [])], satellite=satellite, ) wyoming-1.6.0/wyoming/intent.py000066400000000000000000000074441465547706200166040ustar00rootroot00000000000000"""Intent recognition and handling.""" from dataclasses import asdict, dataclass, field from typing import Any, Dict, List, Optional from .event import Event, Eventable DOMAIN = "intent" _RECOGNIZE_TYPE = "recognize" _INTENT_TYPE = "intent" _NOT_RECOGNIZED_TYPE = "not-recognized" @dataclass class Entity: """Named entity with a value.""" name: str value: Optional[Any] = None @dataclass class Recognize(Eventable): """Request to recognize an event from text.""" text: str """Text with intent in natural language.""" context: Optional[Dict[str, Any]] = None """Context from previous interactions.""" @staticmethod def is_type(event_type: str) -> bool: return event_type == _RECOGNIZE_TYPE def event(self) -> Event: data: Dict[str, Any] = {"text": self.text} if self.context is not None: data["context"] = self.context return Event(type=_RECOGNIZE_TYPE, data=data) @staticmethod def from_event(event: Event) -> "Recognize": assert event.data is not None return Recognize(text=event.data["text"], context=event.data.get("context")) @dataclass class Intent(Eventable): """Result of successful intent recognition.""" name: str """Name of intent.""" entities: List[Entity] = field(default_factory=list) """Named entities with values.""" text: Optional[str] = None """Human-readable response.""" context: Optional[Dict[str, Any]] = None """Context for next interaction.""" @staticmethod def is_type(event_type: str) -> bool: return event_type == _INTENT_TYPE def event(self) -> Event: data: Dict[str, Any] = {"name": self.name} if self.entities: data["entities"] = [asdict(entity) for entity in self.entities] if self.text is not None: data["text"] = self.text if self.context is not None: data["context"] = self.context return Event(type=_INTENT_TYPE, data=data) @staticmethod def from_dict(data: Dict[str, Any]) -> "Intent": entity_dicts = data.get("entities") if entity_dicts: entities: List[Entity] = [ Entity(**entity_dict) for entity_dict in entity_dicts ] else: entities = [] return Intent( name=data["name"], entities=entities, text=data.get("text"), context=data.get("context"), ) @staticmethod def from_event(event: Event) -> "Intent": assert event.data is not None return Intent.from_dict(event.data) def to_rhasspy(self) -> Dict[str, Any]: return { "intent": { "name": self.name, }, "entities": [ {"entity": entity.name, "value": entity.value} for entity in self.entities ], "slots": {entity.name: entity.value for entity in self.entities}, } @dataclass class NotRecognized(Eventable): """Result of intent recognition failure.""" text: Optional[str] = None """Human-readable response.""" context: Optional[Dict[str, Any]] = None """Context for next interaction.""" @staticmethod def is_type(event_type: str) -> bool: return event_type == _NOT_RECOGNIZED_TYPE def event(self) -> Event: data: Dict[str, Any] = {} if self.text is not None: data["text"] = self.text if self.context is not None: data["context"] = self.context return Event(type=_NOT_RECOGNIZED_TYPE, data=data) @staticmethod def from_event(event: Event) -> "NotRecognized": assert event.data is not None return NotRecognized( text=event.data.get("text"), context=event.data.get("context") ) wyoming-1.6.0/wyoming/mic.py000066400000000000000000000047671465547706200160600ustar00rootroot00000000000000"""Microphone input.""" import asyncio import contextlib import logging import time from asyncio.subprocess import Process from typing import List, Optional from .audio import AudioChunk from .client import AsyncClient from .event import Event _LOGGER = logging.getLogger(__name__) DOMAIN = "mic" class MicProcessAsyncClient(AsyncClient, contextlib.AbstractAsyncContextManager): """Context manager for getting microphone audio from an external program.""" def __init__( self, rate: int, width: int, channels: int, samples_per_chunk: int, program: str, program_args: List[str], ) -> None: super().__init__() self.rate = rate self.width = width self.channels = channels self.samples_per_chunk = samples_per_chunk self.bytes_per_chunk = samples_per_chunk * width * channels self.program = program self.program_args = program_args self._proc: Optional[Process] = None async def connect(self) -> None: self._proc = await asyncio.create_subprocess_exec( self.program, *self.program_args, stdout=asyncio.subprocess.PIPE ) async def disconnect(self) -> None: assert self._proc is not None try: if self._proc.returncode is None: # Terminate process gracefully self._proc.terminate() await self._proc.wait() except ProcessLookupError: # Expected when process has already exited pass except Exception: _LOGGER.exception("Unexpected error stopping process: %s", self.program) finally: self._proc = None async def __aenter__(self) -> "MicProcessAsyncClient": await self.connect() return self async def __aexit__(self, exc_type, exc, tb): await self.disconnect() async def read_event(self) -> Optional[Event]: assert self._proc is not None assert self._proc.stdout is not None try: audio_bytes = await self._proc.stdout.readexactly(self.bytes_per_chunk) return AudioChunk( rate=self.rate, width=self.width, channels=self.channels, audio=audio_bytes, timestamp=time.monotonic_ns(), ).event() except asyncio.IncompleteReadError: return None async def write_event(self, event: Event) -> None: """Client is read-only.""" wyoming-1.6.0/wyoming/ping.py000066400000000000000000000021501465547706200162250ustar00rootroot00000000000000"""Ping/pong messages.""" from dataclasses import dataclass from typing import Optional from .event import Event, Eventable _PING_TYPE = "ping" _PONG_TYPE = "pong" @dataclass class Ping(Eventable): """Request pong message.""" text: Optional[str] = None """Text to copy to response.""" @staticmethod def is_type(event_type: str) -> bool: return event_type == _PING_TYPE def event(self) -> Event: return Event( type=_PING_TYPE, data={"text": self.text}, ) @staticmethod def from_event(event: Event) -> "Ping": return Ping(text=event.data.get("text")) @dataclass class Pong(Eventable): """Response to ping message.""" text: Optional[str] = None """Text copied from request.""" @staticmethod def is_type(event_type: str) -> bool: return event_type == _PONG_TYPE def event(self) -> Event: return Event( type=_PONG_TYPE, data={"text": self.text}, ) @staticmethod def from_event(event: Event) -> "Pong": return Pong(text=event.data.get("text")) wyoming-1.6.0/wyoming/pipeline.py000066400000000000000000000075511465547706200171070ustar00rootroot00000000000000"""Pipeline events.""" from dataclasses import dataclass from enum import Enum from typing import Any, Dict, List, Optional from .event import Event, Eventable _RUN_PIPELINE_TYPE = "run-pipeline" class PipelineStage(str, Enum): """Stages of a pipeline.""" WAKE = "wake" """Wake word detection.""" ASR = "asr" """Speech-to-text (a.k.a. automated speech recognition).""" INTENT = "intent" """Intent recognition.""" HANDLE = "handle" """Intent handling.""" TTS = "tts" """Text-to-speech.""" @dataclass class RunPipeline(Eventable): """Run a pipeline""" start_stage: PipelineStage """Stage to start the pipeline on.""" end_stage: PipelineStage """Stage to end the pipeline on.""" wake_word_name: Optional[str] = None """Name of wake word that triggered this pipeline.""" restart_on_end: bool = False """True if pipeline should restart automatically after ending.""" wake_word_names: Optional[List[str]] = None """Wake word names to listen for (start_stage = wake).""" announce_text: Optional[str] = None """Text to announce using text-to-speech (start_stage = tts)""" def __post_init__(self) -> None: start_valid = True end_valid = True if self.start_stage == PipelineStage.WAKE: if self.end_stage not in ( PipelineStage.WAKE, PipelineStage.ASR, PipelineStage.INTENT, PipelineStage.HANDLE, PipelineStage.TTS, ): end_valid = False elif self.start_stage == PipelineStage.ASR: if self.end_stage not in ( PipelineStage.ASR, PipelineStage.INTENT, PipelineStage.HANDLE, PipelineStage.TTS, ): end_valid = False elif self.start_stage == PipelineStage.INTENT: if self.end_stage not in ( PipelineStage.INTENT, PipelineStage.HANDLE, PipelineStage.TTS, ): end_valid = False elif self.start_stage == PipelineStage.HANDLE: if self.end_stage not in ( PipelineStage.HANDLE, PipelineStage.TTS, ): end_valid = False elif self.start_stage == PipelineStage.TTS: if self.end_stage not in (PipelineStage.TTS,): end_valid = False else: start_valid = False if not start_valid: raise ValueError(f"Invalid start stage: {self.start_stage}") if not end_valid: raise ValueError(f"Invalid end stage: {self.end_stage}") @staticmethod def is_type(event_type: str) -> bool: return event_type == _RUN_PIPELINE_TYPE def event(self) -> Event: data: Dict[str, Any] = { "start_stage": self.start_stage.value, "end_stage": self.end_stage.value, "restart_on_end": self.restart_on_end, } if self.wake_word_name is not None: data["wake_word_name"] = self.wake_word_name if self.wake_word_names: data["wake_word_names"] = self.wake_word_names if self.announce_text is not None: data["announce_text"] = self.announce_text return Event(type=_RUN_PIPELINE_TYPE, data=data) @staticmethod def from_event(event: Event) -> "RunPipeline": assert event.data is not None return RunPipeline( start_stage=PipelineStage(event.data["start_stage"]), end_stage=PipelineStage(event.data["end_stage"]), wake_word_name=event.data.get("wake_word_name"), restart_on_end=event.data.get("restart_on_end", False), wake_word_names=event.data.get("wake_word_names"), announce_text=event.data.get("announce_text"), ) wyoming-1.6.0/wyoming/pyaudioop.py000066400000000000000000000161231465547706200173060ustar00rootroot00000000000000"""Partial implementation of the deprecated audioop module. Only supports: - widths 1, 2, and 4 - signed samples - tomono, tostereo, lin2lin, ratecv """ import math import struct from typing import Final, List, Optional, Tuple, Union BufferType = Union[bytes, bytearray] State = Tuple[int, Tuple[Tuple[int, ...], ...]] # width = (_, 1, 2, _, 4) _MAX_VALS: Final = [0, 0x7F, 0x7FFF, 0, 0x7FFFFFFF] _MIN_VALS: Final = [0, -0x80, -0x8000, 0, -0x80000000] _SIGNED_FORMATS: Final = ["", "b", "h", "", "i"] _UNSIGNED_FORMATS: Final = ["", "B", "H", "", "I"] def check_size(size: int) -> None: if size not in (1, 2, 4): raise ValueError(f"Size should be 1, 2, 4. Got {size}") def check_parameters(fragment_length: int, size: int) -> None: check_size(size) if (fragment_length % size) != 0: raise ValueError( "Not a whole number of frames: " f"fragment_length={fragment_length}, size={size}" ) def fbound(val: float, min_val: float, max_val: float) -> int: if val > max_val: val = max_val elif val < (min_val + 1): val = min_val val = math.floor(val) return int(val) def tomono( fragment: BufferType, width: int, lfactor: float, rfactor: float ) -> BufferType: fragment_length = len(fragment) check_parameters(fragment_length, width) max_val = _MAX_VALS[width] min_val = _MIN_VALS[width] struct_format = _SIGNED_FORMATS[width] result = bytearray(fragment_length // 2) for i in range(0, fragment_length, width * 2): val_left = struct.unpack_from(struct_format, fragment, i)[0] val_right = struct.unpack_from(struct_format, fragment, i + width)[0] val_mono = (val_left * lfactor) + (val_right * rfactor) sample_mono = fbound(val_mono, min_val, max_val) struct.pack_into(struct_format, result, i // 2, sample_mono) return result def tostereo( fragment: BufferType, width: int, lfactor: float, rfactor: float ) -> BufferType: fragment_length = len(fragment) check_parameters(fragment_length, width) max_val = _MAX_VALS[width] min_val = _MIN_VALS[width] struct_format = _SIGNED_FORMATS[width] result = bytearray(fragment_length * 2) for i in range(0, fragment_length, width): val_mono = struct.unpack_from(struct_format, fragment, i)[0] sample_left = fbound(val_mono * lfactor, min_val, max_val) sample_right = fbound(val_mono * rfactor, min_val, max_val) struct.pack_into(struct_format, result, i * 2, sample_left) struct.pack_into(struct_format, result, (i * 2) + width, sample_right) return result def _get_sample32(fragment: BufferType, width: int, index: int) -> int: if width == 1: return fragment[index] if width == 2: return (fragment[index] << 8) + (fragment[index + 1]) if width == 4: return ( (fragment[index] << 24) + (fragment[index + 1] << 16) + (fragment[index + 2] << 8) + fragment[index + 3] ) raise ValueError(f"Invalid width: {width}") def _set_sample32(fragment: bytearray, width: int, index: int, sample: int) -> None: if width == 1: fragment[index] = sample & 0x000000FF elif width == 2: fragment[index] = (sample >> 8) & 0x000000FF fragment[index + 1] = sample & 0x000000FF elif width == 4: fragment[index] = sample >> 24 fragment[index + 1] = (sample >> 16) & 0x000000FF fragment[index + 2] = (sample >> 8) & 0x000000FF fragment[index + 3] = sample & 0x000000FF else: raise ValueError(f"Invalid width: {width}") def lin2lin(fragment: BufferType, width: int, new_width: int) -> BufferType: if width == new_width: return fragment fragment_length = len(fragment) check_parameters(fragment_length, width) check_size(new_width) result = bytearray(int((fragment_length / width) * new_width)) j = 0 for i in range(0, fragment_length, width): sample = _get_sample32(fragment, width, i) _set_sample32(result, new_width, j, sample) j += new_width return result def ratecv( fragment: BufferType, width: int, nchannels: int, inrate: int, outrate: int, state: Optional[State], weightA: int = 1, weightB: int = 0, ) -> Tuple[bytearray, Optional[State]]: fragment_length = len(fragment) check_size(width) if nchannels < 1: raise ValueError(f"Number of channels should be >= 1, got {nchannels}") bytes_per_frame = width * nchannels if (weightA < 1) or (weightB) < 0: raise ValueError( "weightA should be >= 1, weightB should be >= 0, " f"got weightA={weightA}, weightB={weightB}" ) if (fragment_length % bytes_per_frame) != 0: raise ValueError("Not a whole number of frames") if (inrate <= 0) or (outrate <= 0): raise ValueError("Sampling rate not > 0") d = math.gcd(inrate, outrate) inrate //= d outrate //= d d = math.gcd(weightA, weightB) weightA //= d weightB //= d prev_i: List[int] = [0] * nchannels cur_i: List[int] = [0] * nchannels if state is None: d = -outrate # prev_i and cur_i are already zeroed else: d, samps = state if len(samps) != nchannels: raise ValueError("Illegal state argument") for chan_index, channel in enumerate(samps): prev_i[chan_index], cur_i[chan_index] = channel input_frames = fragment_length // bytes_per_frame output_frames = int(math.ceil(input_frames * (outrate / inrate))) # Approximate version used in C code to avoid overflow: # q = 1 + ((input_frames - 1) // inrate) # output_frames = q * outrate * bytes_per_frame result = bytearray(output_frames * bytes_per_frame) struct_format = _SIGNED_FORMATS[width] input_index = 0 output_index = 0 while True: while d < 0: if input_frames == 0: samps = tuple( (prev_i[chan], cur_i[chan]) for chan in range(0, nchannels) ) # NOTE: It's critical that result is clipped here return result[:output_index], (d, samps) for chan in range(0, nchannels): prev_i[chan] = cur_i[chan] cur_i[chan] = struct.unpack_from(struct_format, fragment, input_index)[ 0 ] input_index += width cur_i[chan] = ((weightA * cur_i[chan]) + (weightB * prev_i[chan])) // ( weightA + weightB ) input_frames -= 1 d += outrate while d >= 0: for chan in range(0, nchannels): sample = int( ( (float(prev_i[chan]) * float(d)) + (float(cur_i[chan]) * (float(outrate) - float(d))) ) / float(outrate) ) struct.pack_into(struct_format, result, output_index, sample) output_index += width d -= inrate return result, None wyoming-1.6.0/wyoming/satellite.py000066400000000000000000000054631465547706200172700ustar00rootroot00000000000000"""Satellite events.""" from dataclasses import dataclass from .event import Event, Eventable _RUN_SATELLITE_TYPE = "run-satellite" _PAUSE_SATELLITE_TYPE = "pause-satellite" _STREAMING_STARTED_TYPE = "streaming-started" _STREAMING_STOPPED_TYPE = "streaming-stopped" _SATELLITE_CONNECTED_TYPE = "satellite-connected" _SATELLITE_DISCONNECTED_TYPE = "satellite-disconnected" @dataclass class RunSatellite(Eventable): """Informs the satellite that the server is ready to run a pipeline.""" @staticmethod def is_type(event_type: str) -> bool: return event_type == _RUN_SATELLITE_TYPE def event(self) -> Event: return Event(type=_RUN_SATELLITE_TYPE) @staticmethod def from_event(event: Event) -> "RunSatellite": return RunSatellite() @dataclass class PauseSatellite(Eventable): """Informs the satellite that the server is not ready to run a pipeline.""" @staticmethod def is_type(event_type: str) -> bool: return event_type == _PAUSE_SATELLITE_TYPE def event(self) -> Event: return Event(type=_PAUSE_SATELLITE_TYPE) @staticmethod def from_event(event: Event) -> "PauseSatellite": return PauseSatellite() @dataclass class StreamingStarted(Eventable): """Satellite has started streaming audio to server.""" @staticmethod def is_type(event_type: str) -> bool: return event_type == _STREAMING_STARTED_TYPE def event(self) -> Event: return Event(type=_STREAMING_STARTED_TYPE) @staticmethod def from_event(event: Event) -> "StreamingStarted": return StreamingStarted() @dataclass class StreamingStopped(Eventable): """Satellite has stopped streaming audio to server.""" @staticmethod def is_type(event_type: str) -> bool: return event_type == _STREAMING_STOPPED_TYPE def event(self) -> Event: return Event(type=_STREAMING_STOPPED_TYPE) @staticmethod def from_event(event: Event) -> "StreamingStopped": return StreamingStopped() @dataclass class SatelliteConnected(Eventable): """Satellite has connected to server.""" @staticmethod def is_type(event_type: str) -> bool: return event_type == _SATELLITE_CONNECTED_TYPE def event(self) -> Event: return Event(type=_SATELLITE_CONNECTED_TYPE) @staticmethod def from_event(event: Event) -> "SatelliteConnected": return SatelliteConnected() @dataclass class SatelliteDisconnected(Eventable): """Satellite has disconnected from server.""" @staticmethod def is_type(event_type: str) -> bool: return event_type == _SATELLITE_DISCONNECTED_TYPE def event(self) -> Event: return Event(type=_SATELLITE_DISCONNECTED_TYPE) @staticmethod def from_event(event: Event) -> "SatelliteDisconnected": return SatelliteDisconnected() wyoming-1.6.0/wyoming/server.py000066400000000000000000000152571465547706200166120ustar00rootroot00000000000000import asyncio import sys from abc import ABC, abstractmethod from functools import partial from pathlib import Path from typing import Callable, Dict, Optional, Union from urllib.parse import urlparse from .event import Event, async_get_stdin, async_read_event, async_write_event class AsyncEventHandler(ABC): """Base class for async Wyoming event handler.""" def __init__( self, reader: asyncio.StreamReader, writer: asyncio.StreamWriter ) -> None: self.reader = reader self.writer = writer self._is_running = False @abstractmethod async def handle_event(self, event: Event) -> bool: """Handle an event. Returning false will disconnect the client.""" return True async def write_event(self, event: Event) -> None: """Send an event to the client.""" await async_write_event(event, self.writer) async def run(self) -> None: """Receive events until stopped or handle_event returns false.""" self._is_running = True try: while self._is_running: event = await async_read_event(self.reader) if event is None: break if not (await self.handle_event(event)): break finally: await self.disconnect() async def disconnect(self) -> None: """Called when client disconnects.""" async def stop(self) -> None: """Try to stop the event handler.""" self._is_running = False self.writer.close() self.reader.feed_eof() HandlerFactory = Callable[ [asyncio.StreamReader, asyncio.StreamWriter], AsyncEventHandler ] class AsyncServer(ABC): """Base class for async Wyoming server.""" def __init__(self) -> None: self._handlers: Dict[asyncio.Task, AsyncEventHandler] = {} @abstractmethod async def run(self, handler_factory: HandlerFactory) -> None: """Start server and block while running.""" @staticmethod def from_uri(uri: str) -> "AsyncServer": """Create server from URI.""" result = urlparse(uri) if result.scheme == "unix": return AsyncUnixServer(result.path) if result.scheme == "tcp": if (result.hostname is None) or (result.port is None): raise ValueError("A port must be specified when using a 'tcp://' URI") return AsyncTcpServer(result.hostname, result.port) if result.scheme == "stdio": return AsyncStdioServer() raise ValueError("Only 'stdio://', 'unix://', or 'tcp://' are supported") async def _handler_callback( self, handler_factory: HandlerFactory, reader: asyncio.StreamReader, writer: asyncio.StreamWriter, ): handler = handler_factory(reader, writer) task = asyncio.create_task(handler.run(), name="wyoming event handler") self._handlers[task] = handler task.add_done_callback(lambda t: self._handlers.pop(t, None)) async def start(self, handler_factory: HandlerFactory) -> None: """Start server without blocking.""" async def stop(self) -> None: """Try to stop all event handlers.""" await asyncio.gather(*(h.stop() for h in self._handlers.values())) class AsyncStdioServer(AsyncServer): """Wyoming server over stdin/stdout.""" async def run(self, handler_factory: HandlerFactory) -> None: """Start server and block while running.""" reader = await async_get_stdin() # Get stdout writer. # NOTE: This will make print() non-blocking. loop = asyncio.get_running_loop() writer_transport, writer_protocol = await loop.connect_write_pipe( asyncio.streams.FlowControlMixin, sys.stdout ) writer = asyncio.StreamWriter(writer_transport, writer_protocol, None, loop) handler = handler_factory(reader, writer) while True: event = await async_read_event(reader) if event is None: break if not (await handler.handle_event(event)): break class AsyncTcpServer(AsyncServer): """Wyoming server over TCP.""" def __init__(self, host: str, port: int) -> None: super().__init__() self.host = host self.port = port self._server: Optional[asyncio.AbstractServer] = None async def run(self, handler_factory: HandlerFactory) -> None: handler_callback = partial(self._handler_callback, handler_factory) self._server = await asyncio.start_server( handler_callback, host=self.host, port=self.port ) await self._server.serve_forever() async def start(self, handler_factory: HandlerFactory) -> None: """Start server without blocking.""" handler_callback = partial(self._handler_callback, handler_factory) self._server = await asyncio.start_server( handler_callback, host=self.host, port=self.port ) await self._server.start_serving() async def stop(self) -> None: """Try to stop all event handlers.""" await super().stop() if self._server is not None: self._server.close() class AsyncUnixServer(AsyncServer): """Wyoming server over a Unix domain socket.""" def __init__(self, socket_path: Union[str, Path]) -> None: super().__init__() self.socket_path = Path(socket_path) self._server: Optional[asyncio.AbstractServer] = None async def run(self, handler_factory: HandlerFactory) -> None: """Start server and block while running.""" # Need to unlink socket file if it exists self.socket_path.unlink(missing_ok=True) handler_callback = partial(self._handler_callback, handler_factory) self._server = await asyncio.start_unix_server( handler_callback, path=self.socket_path ) try: await self._server.serve_forever() finally: # Unlink when we're done self.socket_path.unlink(missing_ok=True) async def start(self, handler_factory: HandlerFactory) -> None: """Start server without blocking.""" # Need to unlink socket file if it exists self.socket_path.unlink(missing_ok=True) handler_callback = partial(self._handler_callback, handler_factory) self._server = await asyncio.start_unix_server( handler_callback, path=self.socket_path ) await self._server.start_serving() async def stop(self) -> None: """Try to stop all event handlers.""" await super().stop() if self._server is not None: self._server.close() self.socket_path.unlink(missing_ok=True) wyoming-1.6.0/wyoming/snd.py000066400000000000000000000054121465547706200160600ustar00rootroot00000000000000"""Audio output to speakers.""" import asyncio import contextlib import logging from asyncio.subprocess import Process from dataclasses import dataclass from typing import List, Optional from .audio import AudioChunk, AudioChunkConverter from .client import AsyncClient from .event import Event, Eventable _LOGGER = logging.getLogger(__name__) _PLAYED_TYPE = "played" @dataclass class Played(Eventable): """Audio has finished playing.""" @staticmethod def is_type(event_type: str) -> bool: return event_type == _PLAYED_TYPE def event(self) -> Event: return Event(type=_PLAYED_TYPE) @staticmethod def from_event(event: Event) -> "Played": return Played() class SndProcessAsyncClient(AsyncClient, contextlib.AbstractAsyncContextManager): """Context manager for sending output audio to an external program.""" def __init__( self, rate: int, width: int, channels: int, program: str, program_args: List[str], ) -> None: super().__init__() self.rate = rate self.width = width self.channels = channels self.program = program self.program_args = program_args self._proc: Optional[Process] = None self._chunk_converter = AudioChunkConverter(rate, width, channels) async def connect(self) -> None: self._proc = await asyncio.create_subprocess_exec( self.program, *self.program_args, stdin=asyncio.subprocess.PIPE ) async def disconnect(self) -> None: assert self._proc is not None assert self._proc.stdin is not None try: if self._proc.returncode is None: # Terminate process gracefully self._proc.stdin.close() await self._proc.wait() except ProcessLookupError: # Expected when process has already exited pass except Exception: _LOGGER.exception("Unexpected error stopping process: %s", self.program) finally: self._proc = None async def __aenter__(self) -> "SndProcessAsyncClient": await self.connect() return self async def __aexit__(self, exc_type, exc, tb): await self.disconnect() async def read_event(self) -> Optional[Event]: """Client is write-only.""" async def write_event(self, event: Event) -> None: assert self._proc is not None assert self._proc.stdin is not None if not AudioChunk.is_type(event.type): return chunk = AudioChunk.from_event(event) # Convert sample rate/width/channels if necessary chunk = self._chunk_converter.convert(chunk) self._proc.stdin.write(chunk.audio) await self._proc.stdin.drain() wyoming-1.6.0/wyoming/timer.py000066400000000000000000000073721465547706200164230ustar00rootroot00000000000000"""Support for voice timers.""" from dataclasses import dataclass from typing import Optional from .event import Event, Eventable DOMAIN = "timer" _STARTED_TYPE = "timer-started" _UPDATED_TYPE = "timer-updated" _CANCELLED_TYPE = "timer-cancelled" _FINISHED_TYPE = "timer-finished" @dataclass class TimerStarted(Eventable): """New timer was started.""" id: str """Unique id of timer.""" total_seconds: int """Total number of seconds the timer will run for.""" name: Optional[str] = None """Optional name provided by user.""" start_hours: Optional[int] = None """Number of hours users requested the timer to run for.""" start_minutes: Optional[int] = None """Number of minutes users requested the timer to run for.""" start_seconds: Optional[int] = None """Number of minutes users requested the timer to run for.""" @staticmethod def is_type(event_type: str) -> bool: return event_type == _STARTED_TYPE def event(self) -> Event: data = {"id": self.id, "total_seconds": self.total_seconds} if self.name is not None: data["name"] = self.name if self.start_hours is not None: data["start_hours"] = self.start_hours if self.start_minutes is not None: data["start_minutes"] = self.start_minutes if self.start_seconds is not None: data["start_seconds"] = self.start_seconds return Event( type=_STARTED_TYPE, data=data, ) @staticmethod def from_event(event: Event) -> "TimerStarted": return TimerStarted( id=event.data["id"], total_seconds=event.data["total_seconds"], name=event.data.get("name"), start_hours=event.data.get("start_hours"), start_minutes=event.data.get("start_minutes"), start_seconds=event.data.get("start_seconds"), ) @dataclass class TimerUpdated(Eventable): """Existing timer was paused, resumed, or had time added or removed.""" id: str """Unique id of timer.""" is_active: bool """True if timer is running.""" total_seconds: int """Number of seconds left on the timer.""" @staticmethod def is_type(event_type: str) -> bool: return event_type == _UPDATED_TYPE def event(self) -> Event: return Event( type=_UPDATED_TYPE, data={ "id": self.id, "is_active": self.is_active, "total_seconds": self.total_seconds, }, ) @staticmethod def from_event(event: Event) -> "TimerUpdated": return TimerUpdated( id=event.data["id"], is_active=event.data["is_active"], total_seconds=event.data["total_seconds"], ) @dataclass class TimerCancelled(Eventable): """Existing timer was cancelled.""" id: str """Unique id of timer.""" @staticmethod def is_type(event_type: str) -> bool: return event_type == _CANCELLED_TYPE def event(self) -> Event: return Event( type=_CANCELLED_TYPE, data={"id": self.id}, ) @staticmethod def from_event(event: Event) -> "TimerCancelled": return TimerCancelled(id=event.data["id"]) @dataclass class TimerFinished(Eventable): """Existing timer finished without being cancelled.""" id: str """Unique id of timer.""" @staticmethod def is_type(event_type: str) -> bool: return event_type == _FINISHED_TYPE def event(self) -> Event: return Event( type=_FINISHED_TYPE, data={"id": self.id}, ) @staticmethod def from_event(event: Event) -> "TimerFinished": return TimerFinished(id=event.data["id"]) wyoming-1.6.0/wyoming/tts.py000066400000000000000000000037671465547706200161210ustar00rootroot00000000000000"""Text to speech.""" from dataclasses import dataclass from typing import Any, Dict, Optional from .event import Event, Eventable DOMAIN = "tts" _SYNTHESIZE_TYPE = "synthesize" @dataclass class SynthesizeVoice: """Information about the desired voice for synthesis.""" name: Optional[str] = None """Voice name from tts info (overrides language).""" language: Optional[str] = None """Voice language from tts info.""" speaker: Optional[str] = None """Voice speaker from tts info.""" def to_dict(self) -> Dict[str, str]: if self.name is not None: voice = {"name": self.name} if self.speaker is not None: voice["speaker"] = self.speaker elif self.language is not None: voice = {"language": self.language} else: voice = {} return voice @staticmethod def from_dict(voice: Dict[str, Any]) -> "Optional[SynthesizeVoice]": if "name" in voice: return SynthesizeVoice( name=voice["name"], speaker=voice.get("speaker"), ) if "language" in voice: return SynthesizeVoice(name=voice["language"]) return None @dataclass class Synthesize(Eventable): """Request to synthesize audio from text.""" text: str """Text to synthesize.""" voice: Optional[SynthesizeVoice] = None """Voice to use during synthesis.""" @staticmethod def is_type(event_type: str) -> bool: return event_type == _SYNTHESIZE_TYPE def event(self) -> Event: data: Dict[str, Any] = {"text": self.text} if self.voice is not None: data["voice"] = self.voice.to_dict() return Event(type=_SYNTHESIZE_TYPE, data=data) @staticmethod def from_event(event: Event) -> "Synthesize": assert event.data is not None return Synthesize( text=event.data["text"], voice=SynthesizeVoice.from_dict(event.data.get("voice", {})), ) wyoming-1.6.0/wyoming/util/000077500000000000000000000000001465547706200156755ustar00rootroot00000000000000wyoming-1.6.0/wyoming/util/__init__.py000066400000000000000000000000001465547706200177740ustar00rootroot00000000000000wyoming-1.6.0/wyoming/util/dataclasses_json.py000066400000000000000000000044601465547706200215730ustar00rootroot00000000000000"""Implement a tiny subset of dataclasses_json for config.""" from collections.abc import Mapping, Sequence from dataclasses import asdict, fields, is_dataclass from typing import Any, Dict, Type class DataClassJsonMixin: """Adds from_dict to dataclass.""" @classmethod def from_dict(cls, data: Dict[str, Any]) -> Any: """Parse dataclasses recursively.""" kwargs: Dict[str, Any] = {} cls_fields = {field.name: field for field in fields(cls)} for key, value in data.items(): if key not in cls_fields: # Skip unknown fields continue field = cls_fields[key] if is_dataclass(field.type): assert issubclass(field.type, DataClassJsonMixin), field.type kwargs[key] = field.type.from_dict(value) else: kwargs[key] = _decode(value, field.type) # Fill in optional fields with None for field in cls_fields.values(): if (field.name not in kwargs) and _is_optional(field.type): kwargs[field.name] = None return cls(**kwargs) def to_dict(self) -> Dict[str, Any]: """Alias for asdict.""" return asdict(self) def _decode(value: Any, target_type: Type) -> Any: """Decode value using (possibly generic) type.""" if is_dataclass(target_type): assert issubclass(target_type, DataClassJsonMixin), target_type return target_type.from_dict(value) if value is not None else None if hasattr(target_type, "__args__"): # Optional[T] if type(None) in target_type.__args__: optional_type = target_type.__args__[0] return _decode(value, optional_type) # List[T] if isinstance(value, Sequence): list_type = target_type.__args__[0] return [_decode(item, list_type) for item in value] # Dict[str, T] if isinstance(value, Mapping): value_type = target_type.__args__[1] return { map_key: _decode(map_value, value_type) for map_key, map_value in value.items() } return value def _is_optional(target_type: Type): """True if type is Optional""" return hasattr(target_type, "__args__") and (type(None) in target_type.__args__) wyoming-1.6.0/wyoming/vad.py000066400000000000000000000023571465547706200160530ustar00rootroot00000000000000"""Voice activity detection.""" from dataclasses import dataclass from typing import Optional from .event import Event, Eventable DOMAIN = "vad" _STARTED_TYPE = "voice-started" _STOPPED_TYPE = "voice-stopped" @dataclass class VoiceStarted(Eventable): """User has started speaking.""" timestamp: Optional[int] = None """Milliseconds""" @staticmethod def is_type(event_type: str) -> bool: return event_type == _STARTED_TYPE def event(self) -> Event: return Event( type=_STARTED_TYPE, data={"timestamp": self.timestamp}, ) @staticmethod def from_event(event: Event) -> "VoiceStarted": return VoiceStarted(timestamp=event.data.get("timestamp")) @dataclass class VoiceStopped(Eventable): """User has stopped speaking.""" timestamp: Optional[int] = None """Milliseconds""" @staticmethod def is_type(event_type: str) -> bool: return event_type == _STOPPED_TYPE def event(self) -> Event: return Event( type=_STOPPED_TYPE, data={"timestamp": self.timestamp}, ) @staticmethod def from_event(event: Event) -> "VoiceStopped": return VoiceStopped(timestamp=event.data.get("timestamp")) wyoming-1.6.0/wyoming/version.py000066400000000000000000000003121465547706200167530ustar00rootroot00000000000000"""Version number.""" from pathlib import Path _DIR = Path(__file__).parent _VERSION_PATH = _DIR / "VERSION" __version__ = _VERSION_PATH.read_text(encoding="utf-8").strip() __all__ = ["__version__"] wyoming-1.6.0/wyoming/wake.py000066400000000000000000000111331465547706200162200ustar00rootroot00000000000000"""Wake word detection""" import asyncio import contextlib import logging from asyncio.subprocess import Process from dataclasses import dataclass from typing import List, Optional from .audio import AudioChunk, AudioChunkConverter from .client import AsyncClient from .event import Event, Eventable _LOGGER = logging.getLogger(__name__) DOMAIN = "wake" _DETECTION_TYPE = "detection" _DETECT_TYPE = "detect" _NOT_DETECTED_TYPE = "not-detected" @dataclass class Detection(Eventable): """Wake word was detected.""" name: Optional[str] = None """Name of model.""" timestamp: Optional[int] = None """Timestamp of audio chunk with detection""" speaker: Optional[str] = None """Name of speaker.""" @staticmethod def is_type(event_type: str) -> bool: return event_type == _DETECTION_TYPE def event(self) -> Event: return Event( type=_DETECTION_TYPE, data={ "name": self.name, "timestamp": self.timestamp, "speaker": self.speaker, }, ) @staticmethod def from_event(event: Event) -> "Detection": data = event.data or {} return Detection( name=data.get("name"), timestamp=data.get("timestamp"), speaker=data.get("speaker"), ) @dataclass class Detect(Eventable): """Wake word detection request. Followed by AudioStart, AudioChunk+, AudioStop """ names: Optional[List[str]] = None """Names of models to detect (None = any).""" @staticmethod def is_type(event_type: str) -> bool: return event_type == _DETECT_TYPE def event(self) -> Event: return Event(type=_DETECT_TYPE, data={"names": self.names}) @staticmethod def from_event(event: Event) -> "Detect": data = event.data or {} return Detect(names=data.get("names")) @dataclass class NotDetected(Eventable): """Audio stream ended before wake word was detected.""" @staticmethod def is_type(event_type: str) -> bool: return event_type == _NOT_DETECTED_TYPE def event(self) -> Event: return Event(type=_NOT_DETECTED_TYPE) @staticmethod def from_event(event: Event) -> "NotDetected": return NotDetected() class WakeProcessAsyncClient(AsyncClient, contextlib.AbstractAsyncContextManager): """Context manager for doing wake word detection with an external program.""" def __init__( self, rate: int, width: int, channels: int, program: str, program_args: List[str], ) -> None: super().__init__() self.rate = rate self.width = width self.channels = channels self.program = program self.program_args = program_args self._proc: Optional[Process] = None self._chunk_converter = AudioChunkConverter(rate, width, channels) async def connect(self) -> None: self._proc = await asyncio.create_subprocess_exec( self.program, *self.program_args, stdin=asyncio.subprocess.PIPE, stdout=asyncio.subprocess.PIPE ) async def disconnect(self) -> None: assert self._proc is not None assert self._proc.stdin is not None try: if self._proc.returncode is None: # Terminate process gracefully self._proc.stdin.close() await self._proc.wait() except ProcessLookupError: # Expected when process has already exited pass except Exception: _LOGGER.exception("Unexpected error stopping process: %s", self.program) finally: self._proc = None async def __aenter__(self) -> "WakeProcessAsyncClient": await self.connect() return self async def __aexit__(self, exc_type, exc, tb): await self.disconnect() async def read_event(self) -> Optional[Event]: assert self._proc is not None assert self._proc.stdout is not None line = (await self._proc.stdout.readline()).decode("utf-8").strip() name = line if line else None return Detection(name=name).event() async def write_event(self, event: Event) -> None: assert self._proc is not None assert self._proc.stdin is not None if not AudioChunk.is_type(event.type): return chunk = AudioChunk.from_event(event) # Convert sample rate/width/channels if necessary chunk = self._chunk_converter.convert(chunk) self._proc.stdin.write(chunk.audio) await self._proc.stdin.drain() wyoming-1.6.0/wyoming/zeroconf.py000066400000000000000000000017241465547706200171230ustar00rootroot00000000000000#!/usr/bin/env python3 """Runs mDNS zeroconf service for Home Assistant discovery.""" import logging import socket from typing import Optional _LOGGER = logging.getLogger(__name__) try: from zeroconf.asyncio import AsyncServiceInfo, AsyncZeroconf except ImportError: _LOGGER.fatal("pip install zeroconf") raise MDNS_TARGET_IP = "224.0.0.251" async def register_server(name: str, port: int, host: Optional[str] = None) -> None: if not host: test_sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) test_sock.setblocking(False) test_sock.connect((MDNS_TARGET_IP, 1)) host = test_sock.getsockname()[0] _LOGGER.debug("Detected IP: %s", host) assert host service_info = AsyncServiceInfo( "_wyoming._tcp.local.", f"{name}._wyoming._tcp.local.", addresses=[socket.inet_aton(host)], port=port, ) aiozc = AsyncZeroconf() await aiozc.async_register_service(service_info)