pax_global_header00006660000000000000000000000064147733200730014520gustar00rootroot0000000000000052 comment=2d49141c4bc6e785e387e2d42099cd182b965081 hassil-3.0.1/000077500000000000000000000000001477332007300130045ustar00rootroot00000000000000hassil-3.0.1/.github/000077500000000000000000000000001477332007300143445ustar00rootroot00000000000000hassil-3.0.1/.github/dependabot.yml000066400000000000000000000004551477332007300172000ustar00rootroot00000000000000version: 2 updates: - package-ecosystem: "github-actions" directory: "/" schedule: interval: daily open-pull-requests-limit: 10 - package-ecosystem: "pip" directory: "/" # Location of package manifests schedule: interval: "weekly" open-pull-requests-limit: 10 hassil-3.0.1/.github/release-drafter.yml000066400000000000000000000004161477332007300201350ustar00rootroot00000000000000change-template: "- #$NUMBER - $TITLE (@$AUTHOR)" categories: - title: "⚠ Breaking Changes" labels: - "breaking-change" - title: "⬆️ Dependencies" collapse-after: 1 labels: - "dependencies" template: | ## What’s Changed $CHANGES hassil-3.0.1/.github/workflows/000077500000000000000000000000001477332007300164015ustar00rootroot00000000000000hassil-3.0.1/.github/workflows/ci.yaml000066400000000000000000000012731477332007300176630ustar00rootroot00000000000000--- name: CI # yamllint disable-line rule:truthy on: push: branches: [main] pull_request: permissions: contents: read concurrency: # yamllint disable-line rule:line-length group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} cancel-in-progress: true jobs: build: runs-on: ubuntu-latest strategy: matrix: python_version: ["3.9", "3.10", "3.11", "3.12", "3.13"] steps: - uses: actions/checkout@v4.2.2 - uses: actions/setup-python@v5 with: python-version: "${{ matrix.python_version }}" cache: "pip" - run: script/setup --dev - run: script/lint - run: script/test hassil-3.0.1/.github/workflows/release-drafter.yml000066400000000000000000000005101477332007300221650ustar00rootroot00000000000000name: Release Drafter on: push: branches: - main jobs: update_release_draft: runs-on: ubuntu-latest steps: # Drafts your next Release notes as Pull Requests are merged into "main" - uses: release-drafter/release-drafter@v6.1.0 env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} hassil-3.0.1/.gitignore000066400000000000000000000001721477332007300147740ustar00rootroot00000000000000.DS_Store .idea *.log tmp/ *.py[cod] *.egg build htmlcov .projectile .venv/ venv/ .tox/ .mypy_cache/ *.egg-info/ dist/ hassil-3.0.1/.isort.cfg000066400000000000000000000001611477332007300147010ustar00rootroot00000000000000[settings] multi_line_output=3 include_trailing_comma=True force_grid_wrap=0 use_parentheses=True line_length=88 hassil-3.0.1/CHANGELOG.md000066400000000000000000000063741477332007300146270ustar00rootroot00000000000000# Changelog ## 3.0.1 - Use faster permutation iteration ## 3.0.0 - Rename `Sequence` to `Group` - Remove `SequenceType` and instead of types for each `Group`: - `Sequence` for `(a b c)` - `Alternative` for `(a|b|c)` - `Permutation` for `(a;b;c)` - `Sentence` now contains an `expression` field with the parsed expression - `Permutation` is represented explicitly instead of being expanded internally into an alternative - Everything used by downstream projects has been moved into a top-level import (`from hassil import Intents`). The internal structure of of hassil (e.g., `hassil.intents` or `hassil.util`) should be expected to change. - Drop support for Python 3.8 (EOL October 7, 2024) ## 2.2.3 - Fix behavior with wildcards inside and outside words ## 2.2.2 - Allow "," as a decimal separator for fractional ranges ## 2.2.1 - Allow list values with "in" but no "out" ## 2.2.0 - Add "fractions" to number ranges with halves and tenths - Don't remove punctuation within words in `text_clean` (e.g., "2.5") ## 2.1.1 - Allow number ranges to have the same start/stop (single number) ## 2.1.0 - Upgrade to `unicode-rbnf` 2.2 - Transition to pyproject.toml ## 2.0.4 - Trie values are accumulated on `insert` instead of being overwritten ## 2.0.3 - Make trie more restrictive (`two` will not match `t|wo`) ## 2.0.2 - Require `unicode-rbnf>=2.1` which includes important bugfixes ## 2.0.1 - Count stripped text in `text_chunks_matched` ## 2.0.0 - Allow wildcards to be followed by expansion rules and lists - Use regular expressions to filter sentence templates - Add `filter_with_regex` to intent settings and intent data (`false` disables regex filtering) - Filter text slot list values by required/excluded context during matching - Use a trie to filter range slot list values based on remaining text to be matched - Add `required_keywords` section to intent data to skip sentences without specific keywords - Preserve case during matching - Strip punctuation before text processing - Remove extraneous whitespace from the end of wildcards - Refactor string matching code into `string_matcher.py` ## 1.8.0 - Bump `unicode-rbnf` to 2.0.0 - Use multiple texts for numbers, e.g. for German 1 `ein`, `eins`, etc. - Remove `words_ruleset` for ranges ## 1.7.4 - Loosen `unicode-rbnf` version ## 1.7.3 - Cache number words ## 1.7.2 - Add apostrophe to punctuation list ## 1.7.1 - Fix `is_wildcard` in match entities - Fix `wildcard_text` initialization - Bump pylint to 3.1.0 ## 1.7.0 - Add multiplier to range ## 1.6.1 - Allow context values to be dicts ## 1.6.0 - Add metadata to sentences - Add metadata to list items ## 1.5.3 - Restrict unmatched entities to contiguous blocks of non-literal text - Automatically use intents language for number words if supported ## 1.5.2 - Add local slots (under data sentences) - Add literal text chunk count and matched sentence template to results ## 1.5.1 - Expand `requires_context` to allow copying value to a slot ## 1.5.0 - Add fuzzy matching using edit distance ## 1.4.0 - Sort wildcard sentences so they are processed by most literal text chunks first ## 1.3.0 - Add number to word generation using [unicode-rbnf](https://github.com/rhasspy/unicode-rbnf) for range lists ## 1.2.5 - Fix degenerate wildcard case hassil-3.0.1/CODEOWNERS000066400000000000000000000000201477332007300143670ustar00rootroot00000000000000* @synesthesiam hassil-3.0.1/HassILGrammar.g4000066400000000000000000000012611477332007300156720ustar00rootroot00000000000000grammar HassILGrammar; document : (sentence)+ ; sentence : expression EOL ; expression : (WS? group | WS? optional | WS ? list | WS? rule | text_chunk) (alt? expression)* ; // One or more text chunks in a sequence group : '(' WS? expression WS? ')' ; optional : '[' WS? expression WS? ']' ; alt : WS? '|' WS? ; text_chunk : WS? STRING WS? ; list : '{' WS? list_name WS? '}' ; list_name : STRING ; rule : '<' WS? rule_name WS? '>' ; rule_name : STRING ; STRING : (ESC | CHARACTER)+ ; ESC : '\\' [<>()[\]{}|] ; CHARACTER : ~ [<>()[\]{} \t\n\r|] ; EOL : [\n\r] + ; WS : [ \t] + ; hassil-3.0.1/LICENSE.md000066400000000000000000000261351477332007300144170ustar00rootroot00000000000000 Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. hassil-3.0.1/README.md000066400000000000000000000105351477332007300142670ustar00rootroot00000000000000# HassIL The Home Assistant Intent Language (HassIL) parser for [intents](https://github.com/home-assistant/intents). ## Dependencies * PyYAML ## Installation Run the `script/setup` script to automatically create a virtual environment and install the requirements. # Running ``` sh python3 -m hassil [ ...] ``` Once loaded, you may type in a sentence and see what intent it matches. For example: ``` sh python3 -m hassil examples/en.yaml --areas 'living room' what is the temperature in the living room {'intent': 'HassClimateGetTemperature', 'area': 'living room', 'domain': 'climate'} ``` Make sure to provide area names with `--areas`. Device or entity names can be provided with `--names`. ``` sh python3 -m hassil examples/en.yaml --areas office --names trapdoor open the trapdoor in the office {'intent': 'HassOpenCover', 'name': 'trapdoor', 'area': 'office'} ``` ### Sampling Sentences Sentences for each intent can be sampled from the intent YAML files: ``` sh python3 -m hassil.sample examples/en.yaml -n 1 {"intent": "HassTurnOn", "text": "turn on the entity"} {"intent": "HassTurnOff", "text": "turn off the entity"} {"intent": "HassOpenCover", "text": "open the entity in the area"} {"intent": "HassCloseCover", "text": "close the entity in the area"} {"intent": "HassLightsSet", "text": "set the entity color to red"} {"intent": "HassClimateSetTemperature", "text": "set temperature to 0 degrees in the area"} {"intent": "HassClimateGetTemperature", "text": "what is the temperature in the area"} ``` The `--areas` and `--names` arguments are the same from `python3 -m hassil`, but default to generic "area" and "entity" terms. Exclude the `-n` argument to sample all possible sentences. ## Sentence Templates Uses a custom parser written in Python. * Alternative words or phrases * `(red|green|blue)` * `turn(s|ed|ing)` * Optional words or phrases * `[the]` * `[this|that]` * `light[s]` * Permutations of words or phrases * `(patience; you must have) my young Padawan` * `is [the] light (on; in )` * Slot Lists * `{list_name}` * `{list_name:slot_name}` * Refers to a pre-defined list of values in YAML (`lists`), either global or local (particular to the intent to which the sentence refers) * Expansion Rules * `` * Refers to a pre-defined expansion rule in YAML (`expansion_rules`), either global or local (particular to the intent to which the sentence refers) ## YAML Format ``` yaml language: "" intents: : data: # List of sentences/slots/etc. - sentences: - "" - "" # Optional slots: # Fixed slots for the recognized intent : requires_context: # Must be present in match context : # Any provided value is good excludes_context: # Must NOT be present in match context : expansion_rules: # Expansion rules which only apply to the intent, referenced as : lists: # Lists which apply only to the current set of sentences, referenced as {list_name} or {list_name:slot_name} : values: # See below for other possible types - "items" - "in list" metadata: # Arbitrary key/value pairs that will be available in the result : # Optional lists of items that become alternatives in sentence templates lists: # Referenced as {list_name} or {list_name:slot_name} : values: - "items" - "in list" - in: "text in" out: # Optional context: : metadata: # Arbitrary key/value pairs that will be available in the result : range: type: "number" from: 0 to: 100 # inclusive multiplier: 1.0 # multiply to get final value wildcard: true # Optional rules that are expanded in sentence templates expansion_rules: # Referenced as : "" # Optional words that the intent recognizer can skip during recognition skip_words: - "" ``` hassil-3.0.1/examples/000077500000000000000000000000001477332007300146225ustar00rootroot00000000000000hassil-3.0.1/examples/en.yaml000066400000000000000000000031621477332007300161120ustar00rootroot00000000000000language: "en" intents: HassTurnOn: data: - sentences: - "turn on " - sentences: - "turn on light[s] in " slots: domain: "light" HassTurnOff: data: - sentences: - "turn off " - sentences: - "turn off light[s] in " slots: domain: "light" HassOpenCover: data: - sentences: - "open [in ]" slots: domain: "cover" - sentences: - "open [the] garage door" slots: domain: "cover" device_class: "garage_door" HassCloseCover: data: - sentences: - "close [in ]" slots: domain: "cover" - sentences: - "close [the] garage door" slots: domain: "cover" device_class: "garage_door" HassLightsSet: data: - sentences: - "set ( | ) color to {color}" slots: domain: "light" HassClimateSetTemperature: data: - sentences: - "set temperature to [in ]" slots: domain: "climate" HassClimateGetTemperature: data: - sentences: - "(what is | what's | whats) the temperature [in ]" slots: domain: "climate" lists: color: values: - "red" - "green" - "blue" temperature: range: type: "temperature" from: 0 to: 100 expansion_rules: name: "[the] {name}" area: "[the] {area}" temperature: "{temperature} [degree[s]]" skip_words: - "please" hassil-3.0.1/hassil/000077500000000000000000000000001477332007300142675ustar00rootroot00000000000000hassil-3.0.1/hassil/__init__.py000066400000000000000000000030131477332007300163750ustar00rootroot00000000000000"""Home Assistant Intent Language parser""" from .expression import ( Alternative, Expression, Group, ListReference, Permutation, RuleReference, Sentence, Sequence, TextChunk, ) from .intents import ( IntentData, IntentDataSettings, Intents, IntentsSettings, RangeFractionType, RangeSlotList, RangeType, SlotList, TextSlotList, TextSlotValue, ) from .parse_expression import parse_sentence from .recognize import ( RecognizeResult, is_match, recognize, recognize_all, recognize_best, ) from .sample import sample_expression, sample_intents from .trie import Trie, TrieNode from .util import ( check_excluded_context, check_required_context, is_template, merge_dict, normalize_text, normalize_whitespace, ) __all__ = [ "Alternative", "check_excluded_context", "check_required_context", "Expression", "Group", "IntentData", "IntentDataSettings", "Intents", "IntentsSettings", "is_match", "is_template", "ListReference", "merge_dict", "normalize_text", "normalize_whitespace", "parse_sentence", "Permutation", "RangeFractionType", "RangeSlotList", "RangeType", "recognize", "recognize_all", "recognize_best", "RecognizeResult", "RuleReference", "sample_expression", "sample_intents", "Sentence", "Sequence", "SlotList", "TextChunk", "TextSlotList", "TextSlotValue", "Trie", "TrieNode", ] hassil-3.0.1/hassil/__main__.py000066400000000000000000000047711477332007300163720ustar00rootroot00000000000000"""Command-line interface to hassil.""" import argparse import logging import os import sys from pathlib import Path import yaml from .intents import Intents, TextSlotList from .recognize import recognize from .util import merge_dict _LOGGER = logging.getLogger("hassil") def main(): """Main entry point""" parser = argparse.ArgumentParser() parser.add_argument("yaml", nargs="+", help="YAML files or directories") parser.add_argument( "--areas", nargs="+", help="Area names", default=[], ) parser.add_argument("--names", nargs="+", default=[], help="Device/entity names") parser.add_argument( "--debug", action="store_true", help="Print DEBUG messages to the console" ) args = parser.parse_args() level = logging.DEBUG if args.debug else logging.INFO logging.basicConfig(level=level) _LOGGER.debug(args) slot_lists = { "area": TextSlotList.from_strings(args.areas), "name": TextSlotList.from_strings(args.names), } input_dict = {"intents": {}} for yaml_path_str in args.yaml: yaml_path = Path(yaml_path_str) if yaml_path.is_dir(): yaml_file_paths = yaml_path.glob("*.yaml") else: yaml_file_paths = [yaml_path] for yaml_file_path in yaml_file_paths: _LOGGER.debug("Loading file: %s", yaml_file_path) with open(yaml_file_path, "r", encoding="utf-8") as yaml_file: merge_dict(input_dict, yaml.safe_load(yaml_file)) assert input_dict, "No intent YAML files loaded" intents = Intents.from_dict(input_dict) _LOGGER.info("Area names: %s", args.areas) _LOGGER.info("Device/Entity names: %s", args.names) if os.isatty(sys.stdout.fileno()): print("Reading sentences from stdin...", file=sys.stderr) try: for line in sys.stdin: line = line.strip() if not line: continue try: result = recognize(line, intents, slot_lists=slot_lists) if result is not None: print( { "intent": result.intent.name, **{e.name: e.value for e in result.entities_list}, } ) else: print("") except Exception: _LOGGER.exception(line) except KeyboardInterrupt: pass if __name__ == "__main__": main() hassil-3.0.1/hassil/_resources.py000066400000000000000000000001701477332007300170100ustar00rootroot00000000000000"""Shared access to package resources""" import importlib.metadata __version__ = importlib.metadata.version("hassil") hassil-3.0.1/hassil/errors.py000066400000000000000000000004221477332007300161530ustar00rootroot00000000000000"""Errors for hassil.""" class HassilError(Exception): """Base class for hassil errors""" class MissingListError(HassilError): """Error when a {slot_list} is missing.""" class MissingRuleError(HassilError): """Error when an is missing.""" hassil-3.0.1/hassil/expression.py000066400000000000000000000155461477332007300170530ustar00rootroot00000000000000"""Classes for representing sentence templates.""" from __future__ import annotations import re from abc import ABC from dataclasses import dataclass, field from typing import Dict, Iterable, Iterator, List, Optional, Tuple @dataclass class Expression(ABC): """Base class for expressions.""" @dataclass class TextChunk(Expression): """Contiguous chunk of text (with whitespace).""" # Text with casing/whitespace normalized text: str = "" # Set in __post_init__ original_text: str = None # type: ignore parent: "Optional[Group]" = None def __post_init__(self): if self.original_text is None: self.original_text = self.text @property def is_empty(self) -> bool: """True if the chunk is empty""" return self.text == "" @staticmethod def empty() -> TextChunk: """Returns an empty text chunk""" return TextChunk() @dataclass class Group(Expression): """Ordered group of expressions. Supports sequences, optionals, and alternatives.""" # Items in the group items: List[Expression] = field(default_factory=list) def text_chunk_count(self) -> int: """Return the number of TextChunk expressions in this group (recursive).""" num_text_chunks = 0 for item in self.items: if isinstance(item, TextChunk): num_text_chunks += 1 elif isinstance(item, Group): grp: Group = item num_text_chunks += grp.text_chunk_count() return num_text_chunks def list_names( self, expansion_rules: Optional[Dict[str, Sentence]] = None, ) -> Iterator[str]: """Return names of list references (recursive).""" for item in self.items: yield from self._list_names(item, expansion_rules) def _list_names( self, item: Expression, expansion_rules: Optional[Dict[str, Sentence]] = None, ) -> Iterator[str]: """Return names of list references (recursive).""" if isinstance(item, ListReference): list_ref: ListReference = item yield list_ref.list_name elif isinstance(item, Group): grp: Group = item yield from grp.list_names(expansion_rules) elif isinstance(item, RuleReference): rule_ref: RuleReference = item if expansion_rules and (rule_ref.rule_name in expansion_rules): rule_body = expansion_rules[rule_ref.rule_name].expression yield from self._list_names(rule_body, expansion_rules) @dataclass class Sequence(Group): """Sequence of expressions.""" @dataclass class Alternative(Group): """Expressions where only one will be recognized.""" is_optional: bool = False @dataclass class Permutation(Group): """Permutations of a set of expressions.""" def iterate_permutations(self) -> Iterable[Tuple[Expression, Permutation]]: """Iterate over all permutations.""" for i, item in enumerate(self.items): items = self.items.copy() del items[i] rest = Permutation(items=items) yield (item, rest) @dataclass class RuleReference(Expression): """Reference to an expansion rule by .""" # Name of referenced rule rule_name: str = "" @dataclass class ListReference(Expression): """Reference to a list by {name}.""" list_name: str = "" prefix: Optional[str] = None suffix: Optional[str] = None is_end_of_word: bool = True _slot_name: Optional[str] = None def __post_init__(self): if ":" in self.list_name: # list_name:slot_name self.list_name, self._slot_name = self.list_name.split(":", maxsplit=1) else: self._slot_name = self.list_name @property def slot_name(self) -> str: """Name of slot to put list value into.""" assert self._slot_name is not None return self._slot_name @dataclass class Sentence: """A complete sentence template.""" expression: Expression text: Optional[str] = None pattern: Optional[re.Pattern] = None def text_chunk_count(self) -> int: """Return the number of TextChunk expressions in this sentence.""" assert isinstance(self.expression, Group) return self.expression.text_chunk_count() # pylint: disable=no-member def list_names( self, expansion_rules: Optional[Dict[str, Sentence]] = None, ) -> Iterator[str]: """Return names of list references in this sentence.""" assert isinstance(self.expression, Group) return self.expression.list_names(expansion_rules) # pylint: disable=no-member def compile(self, expansion_rules: Dict[str, Sentence]) -> None: if self.pattern is not None: # Already compiled return pattern_chunks: List[str] = [] self._compile_expression(self.expression, pattern_chunks, expansion_rules) pattern_str = "".join(pattern_chunks).replace(r"\ ", r"[ ]*") self.pattern = re.compile(f"^{pattern_str}$", re.IGNORECASE) def _compile_expression( self, exp: Expression, pattern_chunks: List[str], rules: Dict[str, Sentence] ) -> None: if isinstance(exp, TextChunk): # Literal text chunk: TextChunk = exp if chunk.text: escaped_text = re.escape(chunk.text) pattern_chunks.append(escaped_text) elif isinstance(exp, Group): grp: Group = exp if isinstance(grp, Sequence): for item in grp.items: self._compile_expression(item, pattern_chunks, rules) elif isinstance(grp, Alternative): if grp.items: pattern_chunks.append("(?:") for item in grp.items: self._compile_expression(item, pattern_chunks, rules) pattern_chunks.append("|") pattern_chunks[-1] = ")" elif isinstance(grp, Permutation): if grp.items: pattern_chunks.append("(?:") for item in grp.items: self._compile_expression(item, pattern_chunks, rules) pattern_chunks.append("|") pattern_chunks[-1] = f"){{{len(grp.items)}}}" else: raise ValueError(grp) elif isinstance(exp, ListReference): # Slot list pattern_chunks.append("(?:.+)") elif isinstance(exp, RuleReference): # Expansion rule rule_ref: RuleReference = exp if rule_ref.rule_name not in rules: raise ValueError(rule_ref) e_rule = rules[rule_ref.rule_name] self._compile_expression(e_rule.expression, pattern_chunks, rules) else: raise ValueError(exp) hassil-3.0.1/hassil/intents.py000066400000000000000000000376401477332007300163370ustar00rootroot00000000000000"""Classes/methods for loading YAML intent files.""" from abc import ABC from dataclasses import dataclass, field from enum import Enum from functools import cached_property from pathlib import Path from typing import IO, Any, Dict, Iterable, List, Optional, Set, Tuple, Union, cast from yaml import safe_load from .expression import Expression, Sentence, TextChunk from .parse_expression import parse_sentence from .util import is_template, merge_dict, normalize_text @dataclass class SlotList(ABC): """Base class for slot lists.""" name: Optional[str] """Name of the slot list.""" class RangeType(str, Enum): """Number range type.""" NUMBER = "number" PERCENTAGE = "percentage" TEMPERATURE = "temperature" class RangeFractionType(str, Enum): """Number range fraction type.""" HALVES = "halves" TENTHS = "tenths" @staticmethod def get_fractions(fraction_type: "Optional[RangeFractionType]") -> List[float]: if fraction_type == RangeFractionType.HALVES: return [0.5] if fraction_type == RangeFractionType.TENTHS: return [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9] return [] @dataclass class RangeSlotList(SlotList): """Slot list for a range of numbers.""" start: int stop: int step: int = 1 type: RangeType = RangeType.NUMBER fraction_type: Optional[RangeFractionType] = None multiplier: Optional[float] = None digits: bool = True words: bool = True words_language: Optional[str] = None def __post_init__(self): """Validate number range""" assert self.start <= self.stop, "start cannot be greater than stop" assert self.step > 0, "step must be positive" assert self.digits or self.words, "must have digits, words, or both" def get_numbers(self) -> Iterable[Union[float, int]]: integers = range(self.start, self.stop + 1, self.step) if self.fraction_type is None: yield from integers else: fractions = RangeFractionType.get_fractions(self.fraction_type) for integer in integers: yield integer for fraction in fractions: yield integer + fraction @dataclass class TextSlotValue: """Single value in a text slot list.""" text_in: Expression """Input text for this value""" value_out: Any """Output value put into slot""" context: Optional[Dict[str, Any]] = None """Items added to context if value is matched""" metadata: Optional[Dict[str, Any]] = None """Additional metadata to be returned if value is matched""" @staticmethod def from_tuple( value_tuple: Union[ Tuple[str, Any], Tuple[str, Any, Dict[str, Any]], Tuple[str, Any, Dict[str, Any], Dict[str, Any]], ], allow_template: bool = True, ) -> "TextSlotValue": """Construct text slot value from a tuple.""" text_in, value_out, context, metadata = ( value_tuple[0], value_tuple[1], None, None, ) if len(value_tuple) > 2: context = cast(Tuple[str, Any, Dict[str, Any]], value_tuple)[2] if len(value_tuple) > 3: metadata = cast( Tuple[str, Any, Dict[str, Any], Dict[str, Any]], value_tuple )[3] return TextSlotValue( text_in=_maybe_parse_template(text_in, allow_template), value_out=value_out, context=context, metadata=metadata, ) @dataclass class TextSlotList(SlotList): """Slot list with pre-defined text values.""" values: List[TextSlotValue] @staticmethod def from_strings( strings: Iterable[str], allow_template: bool = True, name: Optional[str] = None, ) -> "TextSlotList": """ Construct a text slot list from strings. Input and output values are the same text. """ return TextSlotList( name=name, values=[ TextSlotValue( text_in=_maybe_parse_template(text, allow_template), value_out=text, ) for text in strings ], ) @staticmethod def from_tuples( tuples: Iterable[ Union[ Tuple[str, Any], Tuple[str, Any, Dict[str, Any]], Tuple[str, Any, Dict[str, Any], Dict[str, Any]], ] ], allow_template: bool = True, name: Optional[str] = None, ) -> "TextSlotList": """ Construct a text slot list from text/value pairs. Input values are the left (text), output values are the right (any). """ return TextSlotList( name=name, values=[ TextSlotValue.from_tuple(value_tuple, allow_template) for value_tuple in tuples ], ) @dataclass class WildcardSlotList(SlotList): """Matches as much text as possible.""" @dataclass class IntentDataSettings: """Settings for intent data.""" filter_with_regex: bool = True """Use regular expressions compiled from sentence patterns to filter possible matches.""" @dataclass(frozen=True) class IntentData: """Block of sentences and known slots for an intent.""" sentence_texts: List[str] """Sentence templates that match this intent.""" slots: Dict[str, Any] = field(default_factory=dict) """Slot values that are assumed if intent is matched.""" response: Optional[str] = None """Key for response to intent.""" requires_context: Dict[str, Any] = field(default_factory=dict) """Context items required before match is successful.""" excludes_context: Dict[str, Any] = field(default_factory=dict) """Context items that must not be present for match to be successful.""" expansion_rules: Dict[str, Sentence] = field(default_factory=dict) """Local expansion rules in the context of a single intent.""" slot_lists: Dict[str, SlotList] = field(default_factory=dict) """Local slot lists in the context of a single intent.""" wildcard_list_names: Set[str] = field(default_factory=set) """List names that are wildcards.""" metadata: Optional[Dict[str, Any]] = None """Metadata that will be passed into the result if matched.""" required_keywords: Optional[Set[str]] = None """Keywords that must be present for any sentence to match.""" settings: IntentDataSettings = field(default_factory=IntentDataSettings) """Settings for block of sentences.""" @cached_property def sentences(self) -> List[Sentence]: """Sentence templates that match this intent.""" sentences = [ parse_sentence(text, keep_text=True) for text in self.sentence_texts ] # Sort sentences so that wildcards with more literal text chunks are processed first. # This will reorder certain wildcards, for example: # # - "play {album} by {artist}" # - "play {album} by {artist} in {room}" # # will be reordered to: # # - "play {album} by {artist} in {room}" # - "play {album} by {artist}" sentences = sorted(sentences, key=self._sentence_order) return sentences def _sentence_order(self, sentence: Sentence) -> int: has_wildcards = False if self.wildcard_list_names: # Look for wildcard list references for list_name in sentence.list_names(): if list_name in self.wildcard_list_names: has_wildcards = True break if has_wildcards: # Sentences with more text chunks should be processed sooner return -sentence.text_chunk_count() return 0 @dataclass class Intent: """A named intent with sentences + slots.""" name: str data: List[IntentData] = field(default_factory=list) @dataclass class IntentsSettings: """Settings for intents.""" ignore_whitespace: bool = False """True if whitespace should be ignored during matching.""" filter_with_regex: bool = True """Use regular expressions compiled from sentence patterns to filter possible matches.""" @dataclass class Intents: """Collection of intents, rules, and lists for a language.""" language: str """Language code (e.g., en).""" intents: Dict[str, Intent] """Intents mapped by name.""" slot_lists: Dict[str, SlotList] = field(default_factory=dict) """Slot lists mapped by name.""" expansion_rules: Dict[str, Sentence] = field(default_factory=dict) """Expansion rules mapped by name.""" skip_words: List[str] = field(default_factory=list) """Words that can be skipped during recognition.""" settings: IntentsSettings = field(default_factory=IntentsSettings) """Settings that may change recognition.""" @staticmethod def from_files(file_paths: Iterable[Union[str, Path]]) -> "Intents": """Load intents from YAML file paths.""" intents_dict: Dict[str, Any] = {} for file_path in file_paths: with open(file_path, "r", encoding="utf-8") as yaml_file: merge_dict(intents_dict, safe_load(yaml_file)) return Intents.from_dict(intents_dict) @staticmethod def from_yaml(yaml_file: IO[str]) -> "Intents": """Load intents from a YAML file.""" return Intents.from_dict(safe_load(yaml_file)) @staticmethod def from_dict(input_dict: Dict[str, Any]) -> "Intents": """Parse intents from a dict.""" # language: "" # settings: # ignore_whitespace: false # filter_with_regex: false # intents: # IntentName: # data: # - sentences: # - "" # slots: # : # : # - # metadata: # key: value # expansion_rules: # : "" # lists: # : # values: # - "" # wildcard_list_names: Set[str] = { list_name for list_name, list_dict in input_dict.get("lists", {}).items() if list_dict.get("wildcard", False) } return Intents( language=input_dict["language"], intents={ intent_name: Intent( name=intent_name, data=[ IntentData( sentence_texts=data_dict["sentences"], slots=data_dict.get("slots", {}), requires_context=data_dict.get("requires_context", {}), excludes_context=data_dict.get("excludes_context", {}), expansion_rules={ rule_name: parse_sentence(rule_body, keep_text=True) for rule_name, rule_body in data_dict.get( "expansion_rules", {} ).items() }, slot_lists={ list_name: _parse_list(list_name, list_dict) for list_name, list_dict in data_dict.get( "lists", {} ).items() }, response=data_dict.get("response"), wildcard_list_names=wildcard_list_names, metadata=data_dict.get("metadata"), required_keywords=( set(data_dict["required_keywords"]) if "required_keywords" in data_dict else None ), settings=_parse_data_settings( data_dict.get("settings", {}) ), ) for data_dict in intent_dict["data"] ], ) for intent_name, intent_dict in input_dict["intents"].items() }, slot_lists={ list_name: _parse_list(list_name, list_dict) for list_name, list_dict in input_dict.get("lists", {}).items() }, expansion_rules={ rule_name: parse_sentence(rule_body, keep_text=True) for rule_name, rule_body in input_dict.get( "expansion_rules", {} ).items() }, skip_words=input_dict.get("skip_words", []), settings=_parse_settings(input_dict.get("settings", {})), ) def _parse_list( list_name: str, list_dict: Dict[str, Any], allow_template: bool = True, ) -> SlotList: """Parses a slot list from a dict.""" if "values" in list_dict: # Text values text_values: List[TextSlotValue] = [] for value in list_dict["values"]: if isinstance(value, str) and allow_template and is_template(value): # Wrap template value = {"in": value} if isinstance(value, str): # String value text_values.append( TextSlotValue( text_in=_maybe_parse_template(value, allow_template=False), value_out=value, ) ) else: # Object with "in" and "out" text_values.append( TextSlotValue( text_in=_maybe_parse_template(value["in"], allow_template), value_out=value.get("out"), context=value.get("context"), metadata=value.get("metadata"), ) ) return TextSlotList(name=list_name, values=text_values) if "range" in list_dict: # Number range range_dict = list_dict["range"] range_multiplier = range_dict.get("multiplier") fractions_type = range_dict.get("fractions") return RangeSlotList( name=list_name, type=RangeType(range_dict.get("type", "number")), start=int(range_dict["from"]), stop=int(range_dict["to"]), step=int(range_dict.get("step", 1)), multiplier=( float(range_multiplier) if range_multiplier is not None else None ), digits=bool(range_dict.get("digits", True)), words=bool(range_dict.get("words", True)), words_language=range_dict.get("words_language"), fraction_type=( RangeFractionType(fractions_type) if fractions_type is not None else None ), ) if list_dict.get("wildcard", False): # Wildcard return WildcardSlotList(name=list_name) raise ValueError(f"Unknown slot list type: {list_dict}") def _parse_settings(settings_dict: Dict[str, Any]) -> IntentsSettings: """Parse intent settings.""" return IntentsSettings( ignore_whitespace=settings_dict.get("ignore_whitespace", False), filter_with_regex=settings_dict.get("filter_with_regex", True), ) def _parse_data_settings(settings_dict: Dict[str, Any]) -> IntentDataSettings: """Parse intent data settings.""" return IntentDataSettings( filter_with_regex=settings_dict.get("filter_with_regex", True), ) def _maybe_parse_template(text: str, allow_template: bool = True) -> Expression: """Parse string as a sentence template if it has template syntax.""" if allow_template and is_template(text): return parse_sentence(text).expression return TextChunk(normalize_text(text)) hassil-3.0.1/hassil/models.py000066400000000000000000000026761477332007300161370ustar00rootroot00000000000000"""Shared models.""" from abc import ABC from dataclasses import dataclass from typing import Any, Dict, Optional, Union from .util import remove_punctuation @dataclass class MatchEntity: """Named entity that has been matched from a {slot_list}""" name: str """Name of the entity.""" value: Any """Value of the entity.""" text: str """Original value text.""" metadata: Optional[Dict[str, Any]] = None """Entity metadata.""" is_wildcard: bool = False """True if entity is a wildcard.""" is_wildcard_open: bool = True """While True, wildcard can continue matching.""" is_wildcard_end_of_word: bool = True """True if wildcard {list} is followed by whitespace.""" @property def text_clean(self) -> str: """Trimmed text with punctuation removed.""" return remove_punctuation(self.text).strip() @dataclass class UnmatchedEntity(ABC): """Base class for unmatched entities.""" name: str """Name of entity that should have matched.""" @dataclass class UnmatchedTextEntity(UnmatchedEntity): """Text entity that should have matched.""" text: str """Text that failed to match slot values.""" is_open: bool = True """While True, entity can continue matching.""" @dataclass class UnmatchedRangeEntity(UnmatchedEntity): """Range entity that should have matched.""" value: Union[int, float] """Value of entity that was out of range.""" hassil-3.0.1/hassil/parse_expression.py000066400000000000000000000267151477332007300202450ustar00rootroot00000000000000import re from dataclasses import dataclass from typing import Optional from .expression import ( Alternative, Expression, Group, ListReference, Permutation, RuleReference, Sentence, Sequence, TextChunk, ) from .parser import ( GROUP_END, GROUP_START, LIST_END, LIST_START, OPT_END, OPT_START, RULE_END, RULE_START, ParseChunk, ParseError, ParseType, next_chunk, ) from .util import normalize_text @dataclass class ParseMetadata: """Debug metadata for more helpful parsing errors.""" file_name: str line_number: int intent_name: Optional[str] = None class ParseExpressionError(ParseError): def __init__(self, chunk: ParseChunk, metadata: Optional[ParseMetadata] = None): super().__init__() self.chunk = chunk self.metadata = metadata def __str__(self) -> str: return f"Error in chunk {self.chunk} at {self.metadata}" def _ensure_alternative(grp: Group) -> Alternative: if isinstance(grp, Alternative): return grp # Collapse items into a single group return Alternative(items=[grp]) def _ensure_permutation(grp: Group) -> Permutation: if isinstance(grp, Permutation): return grp # Collapse items into a single group return Permutation(items=[grp]) def parse_group( grp_chunk: ParseChunk, metadata: Optional[ParseMetadata] = None ) -> Group: grp: Group = Sequence() if grp_chunk.parse_type == ParseType.GROUP: grp_text = _remove_delimiters(grp_chunk.text, GROUP_START, GROUP_END) elif grp_chunk.parse_type == ParseType.OPT: grp_text = _remove_delimiters(grp_chunk.text, OPT_START, OPT_END) else: raise ParseExpressionError(grp_chunk, metadata=metadata) item_chunk = next_chunk(grp_text) last_grp_text = grp_text while item_chunk is not None: if item_chunk.parse_type in ( ParseType.WORD, ParseType.GROUP, ParseType.OPT, ParseType.LIST, ParseType.RULE, ): # Chunk text ends with explicit whitespace is_end_of_word = (item_chunk.end_index < len(grp_text)) and grp_text[ item_chunk.end_index ].isspace() item = parse_expression( item_chunk, metadata=metadata, is_end_of_word=is_end_of_word ) if isinstance(grp, (Alternative, Permutation)): # Add to the most recent sequence last_item = grp.items[-1] if not isinstance(last_item, Sequence): raise ParseExpressionError(grp_chunk, metadata=metadata) last_item.items.append(item) else: # Add to parent group grp.items.append(item) if isinstance(item, TextChunk): item_tc: TextChunk = item item_tc.parent = grp elif item_chunk.parse_type == ParseType.ALT: grp = _ensure_alternative(grp) # Begin new sequence grp.items.append(Sequence()) elif item_chunk.parse_type == ParseType.PERM: grp = _ensure_permutation(grp) # Begin new sequence grp.items.append(Sequence()) else: raise ParseExpressionError(grp_chunk, metadata=metadata) # Next chunk grp_text = grp_text[item_chunk.end_index :] if grp_text == last_grp_text: # No change, unable to proceed raise ParseExpressionError(grp_chunk, metadata=metadata) item_chunk = next_chunk(grp_text) last_grp_text = grp_text if isinstance(grp, Permutation): _add_spaces_between_items(grp) return grp def parse_expression( chunk: ParseChunk, metadata: Optional[ParseMetadata] = None, is_end_of_word: bool = True, ) -> Expression: if chunk.parse_type == ParseType.WORD: original_text = _remove_escapes(chunk.text) text = normalize_text(original_text) return TextChunk(text=text, original_text=original_text) if chunk.parse_type == ParseType.GROUP: return parse_group(chunk, metadata=metadata) if chunk.parse_type == ParseType.OPT: grp = parse_group(chunk, metadata=metadata) alt = _ensure_alternative(grp) alt.is_optional = True alt.items.append(TextChunk(text="", parent=grp)) grp = alt return grp if chunk.parse_type == ParseType.LIST: text = _remove_escapes(chunk.text) list_name = _remove_delimiters(text, LIST_START, LIST_END) return ListReference(list_name=list_name, is_end_of_word=is_end_of_word) if chunk.parse_type == ParseType.RULE: text = _remove_escapes(chunk.text) rule_name = _remove_delimiters(text, RULE_START, RULE_END) return RuleReference(rule_name=rule_name) raise ParseExpressionError(chunk, metadata=metadata) def parse_sentence( text: str, keep_text=False, metadata: Optional[ParseMetadata] = None ) -> Sentence: """Parse a single sentence.""" original_text = text text = text.strip() # text = fix_pattern_whitespace(text.strip()) # Wrap in a group because sentences need to always be groups. text = f"({text})" chunk = next_chunk(text) if chunk is None: raise ParseError(f"Unexpected empty chunk in: {text}") if chunk.parse_type != ParseType.GROUP: raise ParseError(f"Expected (group) in: {text}") if chunk.start_index != 0: raise ParseError(f"Expected (group) to start at index 0 in: {text}") if chunk.end_index != len(text): raise ParseError(f"Expected chunk to end at index {chunk.end_index} in: {text}") grp = parse_expression(chunk, metadata=metadata) if not isinstance(grp, Group): raise ParseError(f"Expected Group, got: {grp}") # Unpack redundant group if len(grp.items) == 1: first_item = grp.items[0] if isinstance(first_item, Group): grp = first_item return Sentence( expression=grp, text=original_text if keep_text else None, ) # def fix_pattern_whitespace(text: str) -> str: # if PERM_SEP in text: # # Fix within permutations # text = PERM_SEP.join( # GROUP_START + fix_pattern_whitespace(perm_chunk) + GROUP_END # for perm_chunk in text.split(PERM_SEP) # ) # # Recursively process (group) # group_start_index = text.find(GROUP_START) # while group_start_index > 0: # # TODO: Can't cross OPT boundary # group_end_index = find_end_delimiter( # text, group_start_index + 1, GROUP_START, GROUP_END # ) # if group_end_index is None: # return text # will fail parsing # before_group, text_without_group, after_group = ( # text[:group_start_index], # text[group_start_index + 1 : group_end_index - 1], # text[group_end_index:], # ) # text = ( # fix_pattern_whitespace(before_group) # + GROUP_START # + fix_pattern_whitespace(text_without_group) # + GROUP_END # + fix_pattern_whitespace(after_group) # ) # group_start_index = text.find(GROUP_START, group_end_index) # # Fix whitespace after optional (beginning of sentence) # left_text, right_text = "", text # while right_text.startswith(OPT_START): # opt_end_index = find_end_delimiter(right_text, 1, OPT_START, OPT_END) # if (opt_end_index is None) or (opt_end_index >= len(right_text)): # break # if not right_text[opt_end_index].isspace(): # # No adjustment needed # break # # Move whitespace into optional and group # left_text += ( # OPT_START # + GROUP_START # + right_text[1 : opt_end_index - 1] # + GROUP_END # + " " # + OPT_END # ) # right_text = right_text[opt_end_index:].lstrip() # text = left_text + right_text # # Fix whitespace before optional (end of sentence) # left_text, right_text = text, "" # while left_text.endswith(OPT_END): # opt_end_index = len(left_text) # opt_start_index = left_text.rfind(OPT_START) # maybe_opt_end_index: Optional[int] = None # # Keep looking back for the "[" that starts this optional # while opt_start_index > 0: # maybe_opt_end_index = find_end_delimiter( # left_text, opt_start_index + 1, OPT_START, OPT_END # ) # if maybe_opt_end_index == opt_end_index: # break # found the matching "[" # # Look farther back # opt_start_index = left_text.rfind(OPT_START, 0, opt_start_index) # if (maybe_opt_end_index != opt_end_index) or (opt_start_index <= 0): # break # if not left_text[opt_start_index - 1].isspace(): # # No adjustment needed # break # # Move whitespace into optional and group # right_text = ( # (OPT_START + " " + GROUP_START + left_text[opt_start_index + 1 : -1]) # + GROUP_END # + OPT_END # + right_text # ) # left_text = left_text[:opt_start_index].rstrip() # text = left_text + right_text # # Fix whitespace around optional (middle of a sentence) # left_text, right_text = "", text # match = re.search(rf"\s({re.escape(OPT_START)})", right_text) # while match is not None: # opt_start_index = match.start(1) # opt_end_index = find_end_delimiter( # right_text, opt_start_index + 1, OPT_START, OPT_END # ) # if (opt_end_index is None) or (opt_end_index >= len(text)): # break # if right_text[opt_end_index].isspace(): # # Move whitespace inside optional, add group # left_text += ( # right_text[: opt_start_index - 1] # + OPT_START # + " " # + GROUP_START # + right_text[opt_start_index + 1 : opt_end_index - 1].lstrip() # + GROUP_END # + OPT_END # ) # else: # left_text += right_text[:opt_end_index] # right_text = right_text[opt_end_index:] # if not right_text: # break # match = re.search(rf"\s({re.escape(OPT_START)})", right_text) # text = left_text + right_text # return text def _remove_delimiters( text: str, start_char: str, end_char: Optional[str] = None ) -> str: """Removes the surrounding delimiters in text.""" if end_char is None: assert len(text) > 1, "Text is too short" assert text[0] == start_char, "Wrong start char" return text[1:] assert len(text) > 2, "Text is too short" assert text[0] == start_char, "Wrong start char" assert text[-1] == end_char, "Wrong end char" return text[1:-1] def _remove_escapes(text: str) -> str: """Remove backslash escape sequences""" return re.sub(r"\\(.)", r"\1", text) def _escape_text(text: str) -> str: """Escape parentheses, etc.""" return re.sub(r"([()\[\]{}<>])", r"\\\1", text) def _add_spaces_between_items(perm: Permutation) -> None: """Add spaces between each 2 items of a permutation""" for seq in perm.items: assert isinstance(seq, Sequence), "Item is not a sequence" seq.items.insert(0, TextChunk(text=" ")) seq.items.append(TextChunk(text=" ")) hassil-3.0.1/hassil/parser.py000066400000000000000000000111531477332007300161360ustar00rootroot00000000000000from dataclasses import dataclass from enum import Enum, auto from typing import Optional GROUP_START = "(" GROUP_END = ")" OPT_START = "[" OPT_END = "]" LIST_START = "{" LIST_END = "}" RULE_START = "<" RULE_END = ">" DELIM = { GROUP_START: GROUP_END, OPT_START: OPT_END, LIST_START: LIST_END, RULE_START: RULE_END, } DELIM_START = tuple(DELIM.keys()) DELIM_END = tuple(DELIM.values()) WORD_SEP = " " ALT_SEP = "|" PERM_SEP = ";" ESCAPE_CHAR = "\\" class ParseType(Enum): """Parse chunk types.""" WORD = auto() GROUP = auto() OPT = auto() LIST = auto() RULE = auto() ALT = auto() PERM = auto() END = auto() @dataclass class ParseChunk: """Block of text that means something to the parser.""" text: str start_index: int end_index: int parse_type: ParseType class ParseError(Exception): """Base class for parse errors""" def _find_end_delimiter( text: str, start_index: int, start_char: str, end_char: str ) -> Optional[int]: """Finds the index of an ending delimiter.""" if start_index > 0: text = text[start_index:] stack = 1 is_escaped = False for i, c in enumerate(text): if is_escaped: is_escaped = False continue if c == ESCAPE_CHAR: is_escaped = True continue if c == end_char: stack -= 1 if stack < 0: return None if stack == 0: return start_index + i + 1 if c == start_char: stack += 1 return None def _find_end_word(text: str, start_index: int) -> Optional[int]: """Finds the end index of a word.""" if start_index > 0: text = text[start_index:] is_escaped = False separator_found = False for i, c in enumerate(text): if is_escaped: is_escaped = False continue if c == ESCAPE_CHAR: is_escaped = True continue if (i > 0) and (c == WORD_SEP): separator_found = True continue if separator_found and (c != WORD_SEP): # Start of next word return start_index + i if (c == ALT_SEP) or (c == PERM_SEP) or (c in DELIM_START) or (c in DELIM_END): return start_index + i if text: # Entire text is a word return start_index + len(text) return None def _peek_type(text, start_index: int) -> ParseType: """Gets the parse chunk type based on the next character.""" if start_index >= len(text): return ParseType.END c = text[start_index] if c == GROUP_START: return ParseType.GROUP if c == OPT_START: return ParseType.OPT if c == LIST_START: return ParseType.LIST if c == RULE_START: return ParseType.RULE if c == ALT_SEP: return ParseType.ALT if c == PERM_SEP: return ParseType.PERM return ParseType.WORD def next_chunk(text: str, start_index: int = 0) -> Optional[ParseChunk]: """Gets the next parsable chunk from text.""" next_type = _peek_type(text, start_index) if next_type == ParseType.END: return None if next_type == ParseType.WORD: # Single word end_index = _find_end_word(text, start_index) if end_index is None: raise ParseError( f"Unable to find end of word from index {start_index} in: {text}" ) elif next_type in (ParseType.GROUP, ParseType.OPT, ParseType.LIST, ParseType.RULE): if next_type == ParseType.GROUP: start_char = GROUP_START end_char = GROUP_END error_str = "group ')'" elif next_type == ParseType.OPT: start_char = OPT_START end_char = OPT_END error_str = "optional ']'" elif next_type == ParseType.LIST: start_char = LIST_START end_char = LIST_END error_str = "list '}'" else: # next_type == ParseType.RULE start_char = RULE_START end_char = RULE_END error_str = "rule '>'" end_index = _find_end_delimiter(text, start_index + 1, start_char, end_char) if end_index is None: raise ParseError( f"Unable to find end of {error_str} from index {start_index} in: {text}" ) else: # next_type in (ParseType.ALT, ParseType.PERM): end_index = start_index + 1 chunk_text = text[start_index:end_index] return ParseChunk( text=chunk_text, start_index=start_index, end_index=end_index, parse_type=next_type, ) hassil-3.0.1/hassil/py.typed000066400000000000000000000000001477332007300157540ustar00rootroot00000000000000hassil-3.0.1/hassil/recognize.py000066400000000000000000000550551477332007300166400ustar00rootroot00000000000000"""Methods for recognizing intents from text.""" import collections.abc import itertools import logging from dataclasses import dataclass, field from typing import Any, Dict, Iterable, List, MutableSequence, Optional, Tuple from .expression import Sentence from .intents import Intent, IntentData, Intents, SlotList from .models import MatchEntity, UnmatchedEntity, UnmatchedTextEntity from .string_matcher import MatchContext, MatchSettings, match_expression from .util import ( WHITESPACE, check_excluded_context, check_required_context, normalize_text, remove_punctuation, remove_skip_words, ) MISSING_ENTITY = "" _LOGGER = logging.getLogger() @dataclass class RecognizeResult: """Result of recognition.""" intent: Intent """Matched intent""" intent_data: IntentData """Matched intent data""" entities: Dict[str, MatchEntity] = field(default_factory=dict) """Matched entities mapped by name.""" entities_list: List[MatchEntity] = field(default_factory=list) """Matched entities as a list (duplicates allowed).""" response: Optional[str] = None """Key for intent response.""" context: Dict[str, Any] = field(default_factory=dict) """Context values acquired during matching.""" unmatched_entities: Dict[str, UnmatchedEntity] = field(default_factory=dict) """Unmatched entities mapped by name.""" unmatched_entities_list: List[UnmatchedEntity] = field(default_factory=list) """Unmatched entities as a list (duplicates allowed).""" text_chunks_matched: int = 0 """Number of literal text chunks that were successfully matched.""" intent_sentence: Optional[Sentence] = None """Sentence template that was matched.""" intent_metadata: Optional[Dict[str, Any]] = None """Metadata from the intent sentence that was matched.""" def recognize( text: str, intents: Intents, slot_lists: Optional[Dict[str, SlotList]] = None, expansion_rules: Optional[Dict[str, Sentence]] = None, skip_words: Optional[List[str]] = None, intent_context: Optional[Dict[str, Any]] = None, default_response: Optional[str] = "default", allow_unmatched_entities: bool = False, language: Optional[str] = None, ) -> Optional[RecognizeResult]: """Return the first match of input text/words against a collection of intents. text: Text to recognize intents: Compiled intents slot_lists: Pre-defined text lists, ranges, or wildcards expansion_rules: Named template snippets skip_words: Strings to ignore in text intent_context: Slot values to use when not found in text default_response: Response key to use if not set in intent allow_unmatched_entities: True if entity values outside slot lists are allowed (slower) language: Optional language to use when converting digits to words Returns the first result. If allow_unmatched_entities is True, you should check for unmatched entities. """ for result in recognize_all( text, intents, slot_lists=slot_lists, expansion_rules=expansion_rules, skip_words=skip_words, intent_context=intent_context, default_response=default_response, allow_unmatched_entities=allow_unmatched_entities, language=language, ): return result return None def recognize_all( text: str, intents: Intents, slot_lists: Optional[Dict[str, SlotList]] = None, expansion_rules: Optional[Dict[str, Sentence]] = None, skip_words: Optional[Iterable[str]] = None, intent_context: Optional[Dict[str, Any]] = None, default_response: Optional[str] = "default", allow_unmatched_entities: bool = False, language: Optional[str] = None, ) -> Iterable[RecognizeResult]: """Return all matches for input text/words against a collection of intents. text: Text to recognize intents: Compiled intents slot_lists: Pre-defined text lists, ranges, or wildcards expansion_rules: Named template snippets skip_words: Strings to ignore in text intent_context: Slot values to use when not found in text default_response: Response key to use if not set in intent allow_unmatched_entities: True if entity values outside slot lists are allowed (slower) language: Optional language to use when converting digits to words Yields results as they're matched. If allow_unmatched_entities is True, you should check for unmatched entities. """ text = normalize_text(remove_punctuation(text)).strip() if skip_words is None: skip_words = intents.skip_words else: # Combine skip words skip_words = list(itertools.chain(skip_words, intents.skip_words)) if skip_words: text = remove_skip_words(text, skip_words, intents.settings.ignore_whitespace) text_keywords = text.split() if slot_lists is None: slot_lists = intents.slot_lists else: # Combine with intents slot_lists = {**intents.slot_lists, **slot_lists} if slot_lists is None: slot_lists = {} if expansion_rules is None: expansion_rules = intents.expansion_rules else: # Combine rules expansion_rules = {**intents.expansion_rules, **expansion_rules} if intent_context is None: intent_context = {} # Filter intents based on context and keywords available_intents: MutableSequence[ Tuple[Intent, IntentData, MatchSettings, Optional[List[Sentence]]] ] = [] for intent in intents.intents.values(): for intent_data in intent.data: if ( intent_data.required_keywords and intent_data.required_keywords.isdisjoint(text_keywords) ): # No keyword overlap continue if intent_context: # Skip sentence templates that can't possibly be matched due to # requires/excludes context. # # Additional context can be added during matching, so we can # only be sure about keys that exist right now. if intent_data.requires_context and ( not check_required_context( intent_data.requires_context, intent_context, allow_missing_keys=True, ) ): continue if intent_data.excludes_context and ( not check_excluded_context( intent_data.excludes_context, intent_context ) ): continue match_settings = MatchSettings( slot_lists={ **slot_lists, **intent_data.slot_lists, }, expansion_rules={ **expansion_rules, **intent_data.expansion_rules, }, ignore_whitespace=intents.settings.ignore_whitespace, allow_unmatched_entities=allow_unmatched_entities, language=language or intents.language, ) available_intents.append((intent, intent_data, match_settings, None)) # Filter with regex if intents.settings.filter_with_regex and (not allow_unmatched_entities): matching_intents: MutableSequence[ Tuple[Intent, IntentData, MatchSettings, Optional[List[Sentence]]] ] = [] for intent, intent_data, match_settings, _intent_sentences in available_intents: if not intent_data.settings.filter_with_regex: # All sentences matching_intents.append((intent, intent_data, match_settings, None)) continue matching_intent_sentences = [] for intent_sentence in intent_data.sentences: # Compile to regex once intent_sentence.compile(match_settings.expansion_rules) assert intent_sentence.pattern is not None regex_match = intent_sentence.pattern.match(text) if regex_match is not None: matching_intent_sentences.append(intent_sentence) if matching_intent_sentences: matching_intents.append( (intent, intent_data, match_settings, matching_intent_sentences) ) if matching_intents: available_intents = matching_intents # Fall back to string matcher if intents.settings.ignore_whitespace: text = WHITESPACE.sub("", text) else: # Artifical word boundary text += " " for intent, intent_data, match_settings, intent_sentences in available_intents: if not intent_sentences: intent_sentences = intent_data.sentences # Check each sentence template for intent_sentence in intent_sentences: # Create initial context match_context = MatchContext( text=text, intent_context=intent_context, intent_sentence=intent_sentence, intent_data=intent_data, ) maybe_match_contexts = match_expression( match_settings, match_context, intent_sentence.expression ) yield from _process_match_contexts( maybe_match_contexts, intent, intent_data, default_response=default_response, allow_unmatched_entities=allow_unmatched_entities, ) def _merge_match_contexts( match_contexts: Iterable[MatchContext], merged_context: MatchContext ) -> MatchContext: for match_context in match_contexts: if match_context.text: # Needed for open wildcards merged_context.text = match_context.text merged_context.entities.extend(match_context.entities) merged_context.intent_context.update(match_context.intent_context) return merged_context def _process_match_contexts( match_contexts: Iterable[MatchContext], intent: Intent, intent_data: IntentData, default_response: Optional[str] = None, allow_unmatched_entities: bool = False, ) -> Iterable[RecognizeResult]: for maybe_match_context in match_contexts: # Close any open wildcards or unmatched entities final_text = maybe_match_context.text.strip() if final_text: if unmatched_entity := maybe_match_context.get_open_entity(): # Consume the rest of the text (unmatched entity) unmatched_entity.text += final_text unmatched_entity.is_open = False maybe_match_context.text = "" elif wildcard := maybe_match_context.get_open_wildcard(): # Consume the rest of the text (wildcard) wildcard.text += final_text wildcard.value = wildcard.text wildcard.is_wildcard_open = False maybe_match_context.text = "" if not maybe_match_context.is_match: # Incomplete match with text still left at the end continue # Verify excluded context if intent_data.excludes_context and ( not check_excluded_context( intent_data.excludes_context, maybe_match_context.intent_context, ) ): continue # Verify required context slots_from_context: List[MatchEntity] = [] if intent_data.requires_context and ( not _copy_and_check_required_context( intent_data.requires_context, maybe_match_context, slots_from_context, allow_unmatched_entities=allow_unmatched_entities, ) ): continue # Clean up wildcard entities for entity in maybe_match_context.entities: if not entity.is_wildcard: continue entity.text = entity.text.strip() if isinstance(entity.value, str): entity.value = entity.value.strip() # Add fixed entities entity_names = set(entity.name for entity in maybe_match_context.entities) for slot_name, slot_value in intent_data.slots.items(): if slot_name not in entity_names: maybe_match_context.entities.append( MatchEntity(name=slot_name, value=slot_value, text="") ) # Add context slots for slot_entity in slots_from_context: if slot_entity.name not in entity_names: maybe_match_context.entities.append(slot_entity) # Return each match response = default_response if intent_data.response is not None: response = intent_data.response intent_metadata: Optional[Dict[str, Any]] = None if maybe_match_context.intent_data is not None: intent_metadata = maybe_match_context.intent_data.metadata yield RecognizeResult( intent=intent, intent_data=intent_data, entities={entity.name: entity for entity in maybe_match_context.entities}, entities_list=maybe_match_context.entities, response=response, context=maybe_match_context.intent_context, unmatched_entities={ entity.name: entity for entity in maybe_match_context.unmatched_entities }, unmatched_entities_list=maybe_match_context.unmatched_entities, text_chunks_matched=maybe_match_context.text_chunks_matched, intent_sentence=maybe_match_context.intent_sentence, intent_metadata=intent_metadata, ) def is_match( text: str, sentence: Sentence, slot_lists: Optional[Dict[str, SlotList]] = None, expansion_rules: Optional[Dict[str, Sentence]] = None, skip_words: Optional[Iterable[str]] = None, entities: Optional[Dict[str, Any]] = None, intent_context: Optional[Dict[str, Any]] = None, ignore_whitespace: bool = False, allow_unmatched_entities: bool = False, language: Optional[str] = None, ) -> Optional[MatchContext]: """Return the first match of input text/words against a sentence expression.""" text = normalize_text(remove_punctuation(text)).strip() if skip_words: text = remove_skip_words(text, skip_words, ignore_whitespace) if ignore_whitespace: text = WHITESPACE.sub("", text) else: # Artifical word boundary text += " " if slot_lists is None: slot_lists = {} if expansion_rules is None: expansion_rules = {} if intent_context is None: intent_context = {} settings = MatchSettings( slot_lists=slot_lists, expansion_rules=expansion_rules, ignore_whitespace=ignore_whitespace, allow_unmatched_entities=allow_unmatched_entities, language=language, ) match_context = MatchContext( text=text, intent_context=intent_context, intent_sentence=sentence, ) for maybe_match_context in match_expression( settings, match_context, sentence.expression ): if maybe_match_context.is_match: return maybe_match_context return None def _copy_and_check_required_context( required_context: Dict[str, Any], maybe_match_context: MatchContext, slots_from_context: List[MatchEntity], allow_unmatched_entities: bool = False, ) -> bool: """Check required context and copy slots into new entities.""" for ( context_key, context_value, ) in required_context.items(): copy_to_slot: Optional[str] = None if isinstance(context_value, collections.abc.Mapping): # Unpack dict # : # value: ... # slot: true/false or "name" maybe_copy_to_slot = context_value.get("slot") if isinstance(maybe_copy_to_slot, str): # Slot name provided copy_to_slot = maybe_copy_to_slot elif maybe_copy_to_slot: # True copy_to_slot = context_key context_value = context_value.get("value") actual_value = maybe_match_context.intent_context.get(context_key) actual_text = "" actual_metadata: Optional[Dict[str, Any]] = None if isinstance(actual_value, collections.abc.Mapping): # Unpack dict actual_text = actual_value.get("text", "") actual_metadata = actual_value.get("metadata") actual_value = actual_value.get("value") if allow_unmatched_entities and (actual_value is None): # Look in unmatched entities for unmatched_context_entity in maybe_match_context.unmatched_entities: if (unmatched_context_entity.name == context_key) and isinstance( unmatched_context_entity, UnmatchedTextEntity ): actual_value = unmatched_context_entity.text break if actual_value == context_value and context_value is not None: # Exact match to context value, except when context value is required and not provided if copy_to_slot: slots_from_context.append( MatchEntity( name=copy_to_slot, value=actual_value, text=actual_text, metadata=actual_metadata, ) ) continue if (context_value is None) and (actual_value is not None): # Any value matches, as long as it's set if copy_to_slot: slots_from_context.append( MatchEntity( name=copy_to_slot, value=actual_value, text=actual_text, metadata=actual_metadata, ) ) continue if ( isinstance(context_value, collections.abc.Collection) and not isinstance(context_value, str) and (actual_value in context_value) ): # Actual value was in context value list if copy_to_slot: slots_from_context.append( MatchEntity( name=copy_to_slot, value=actual_value, text=actual_text, metadata=actual_metadata, ) ) continue if allow_unmatched_entities: # Create missing entity as unmatched has_unmatched_entity = False for unmatched_context_entity in maybe_match_context.unmatched_entities: if unmatched_context_entity.name == context_key: has_unmatched_entity = True break if not has_unmatched_entity: maybe_match_context.unmatched_entities.append( UnmatchedTextEntity( name=context_key, text=MISSING_ENTITY, is_open=False, ) ) else: # Did not match required context return False return True def recognize_best( text: str, intents: Intents, slot_lists: Optional[Dict[str, SlotList]] = None, expansion_rules: Optional[Dict[str, Sentence]] = None, skip_words: Optional[Iterable[str]] = None, intent_context: Optional[Dict[str, Any]] = None, default_response: Optional[str] = "default", allow_unmatched_entities: bool = False, language: Optional[str] = None, best_metadata_key: Optional[str] = None, best_slot_name: Optional[str] = None, ) -> Optional[RecognizeResult]: """Find the best result with the following priorities: 1. The result that has "best_metadata_key" in its metadata 2. The result that has an entity for "best_slot_name" and longest text 3. The result that matches the most literal text See "recognize_all" for other parameters. """ metadata_found = False slot_found = False best_results: List[RecognizeResult] = [] best_slot_quality: Optional[int] = None for result in recognize_all( text, intents, slot_lists=slot_lists, expansion_rules=expansion_rules, skip_words=skip_words, intent_context=intent_context, default_response=default_response, allow_unmatched_entities=allow_unmatched_entities, language=language, ): # Prioritize intents with a specific metadata key if best_metadata_key is not None: is_metadata = ( result.intent_metadata is not None and result.intent_metadata.get(best_metadata_key) ) if metadata_found and not is_metadata: continue if (not metadata_found) and is_metadata: metadata_found = True # Clear builtin results slot_found = False best_results = [] best_slot_quality = None # Prioritize results with a specific slot if best_slot_name: entity = result.entities.get(best_slot_name) is_slot = (entity is not None) and not entity.is_wildcard if slot_found and not is_slot: continue if (not slot_found) and is_slot: slot_found = True # Clear non-slot results best_results = [] if is_slot and (entity is not None) and isinstance(entity.value, str): # Prioritize results with a better slot value slot_quality = len(entity.text) if (best_slot_quality is None) or (slot_quality > best_slot_quality): best_slot_quality = slot_quality # Clear worse slot results best_results = [] elif slot_quality < best_slot_quality: continue # Accumulate results. We will resolve the ambiguity below. best_results.append(result) if best_results: # Prioritize matches with fewer wildcards and more literal text matched. return sorted(best_results, key=_get_result_score)[0] return None def _get_result_score(result: RecognizeResult) -> Tuple[int, int]: """Get sort score for a result with (wildcards, -text_matched). text_matched is negated since we are sorting with lowest first. """ num_wildcards = sum(1 for e in result.entities_list if e.is_wildcard) return (num_wildcards, -result.text_chunks_matched) hassil-3.0.1/hassil/sample.py000066400000000000000000000273101477332007300161250ustar00rootroot00000000000000"""CLI tool for sampling sentences from intents.""" import argparse import itertools import json import logging import sys from functools import partial from pathlib import Path from typing import Dict, Iterable, Optional, Set, Tuple import yaml from unicode_rbnf import RbnfEngine from .errors import MissingListError, MissingRuleError from .expression import ( Alternative, Expression, Group, ListReference, Permutation, RuleReference, Sentence, Sequence, TextChunk, ) from .intents import Intents, RangeSlotList, SlotList, TextSlotList, WildcardSlotList from .util import merge_dict, normalize_whitespace _LOGGER = logging.getLogger("hassil.sample") # lang -> engine _ENGINE_CACHE: Dict[str, RbnfEngine] = {} def sample_intents( intents: Intents, slot_lists: Optional[Dict[str, SlotList]] = None, expansion_rules: Optional[Dict[str, Sentence]] = None, max_sentences_per_intent: Optional[int] = None, intent_names: Optional[Set[str]] = None, language: Optional[str] = None, exclude_sentences_with_wildcards: bool = True, expand_ranges: bool = True, ) -> Iterable[Tuple[str, str]]: """Sample text strings for sentences from intents.""" if slot_lists is None: slot_lists = intents.slot_lists else: # Combine with intents slot_lists = {**intents.slot_lists, **slot_lists} if slot_lists is None: slot_lists = {} if expansion_rules is None: expansion_rules = intents.expansion_rules else: # Combine rules expansion_rules = {**intents.expansion_rules, **expansion_rules} for intent_name, intent in intents.intents.items(): if intent_names and (intent_name not in intent_names): # Skip intent continue num_intent_sentences = 0 skip_intent = False for intent_data in intent.data: if intent_data.expansion_rules: local_expansion_rules = { **expansion_rules, **intent_data.expansion_rules, } else: local_expansion_rules = expansion_rules for intent_sentence in intent_data.sentences: if exclude_sentences_with_wildcards and any( list_name in intent_data.wildcard_list_names for list_name in intent_sentence.list_names(local_expansion_rules) ): continue sentence_texts = sample_sentence( intent_sentence, slot_lists, local_expansion_rules, language=language, expand_ranges=expand_ranges, ) for sentence_text in sentence_texts: yield (intent_name, sentence_text) num_intent_sentences += 1 if (max_sentences_per_intent is not None) and ( 0 < max_sentences_per_intent <= num_intent_sentences ): skip_intent = True break if skip_intent: break if skip_intent: break def sample_sentence( sentence: Sentence, slot_lists: Optional[Dict[str, SlotList]] = None, expansion_rules: Optional[Dict[str, Sentence]] = None, language: Optional[str] = None, expand_lists: bool = True, expand_ranges: bool = True, ) -> Iterable[str]: return sample_expression( sentence.expression, slot_lists, expansion_rules, language, expand_lists, expand_ranges, ) def sample_expression( expression: Expression, slot_lists: Optional[Dict[str, SlotList]] = None, expansion_rules: Optional[Dict[str, Sentence]] = None, language: Optional[str] = None, expand_lists: bool = True, expand_ranges: bool = True, ) -> Iterable[str]: """Sample possible text strings from an expression.""" if isinstance(expression, TextChunk): chunk: TextChunk = expression yield chunk.original_text elif isinstance(expression, Group): grp: Group = expression if isinstance(grp, Alternative): for item in grp.items: yield from sample_expression( item, slot_lists, expansion_rules, language=language, expand_lists=expand_lists, expand_ranges=expand_ranges, ) elif isinstance(grp, Sequence): seq_sentences = map( partial( sample_expression, slot_lists=slot_lists, expansion_rules=expansion_rules, language=language, expand_lists=expand_lists, expand_ranges=expand_ranges, ), grp.items, ) sentence_texts = itertools.product(*seq_sentences) for sentence_words in sentence_texts: yield normalize_whitespace("".join(sentence_words)) elif isinstance(grp, Permutation): # Need to make lists instead because itertools does multiple passes. grp_sentences = [ list( sample_expression( item, slot_lists, expansion_rules, language=language, expand_lists=expand_lists, expand_ranges=expand_ranges, ) ) for item in grp.items ] for perm_sentences in itertools.permutations(grp_sentences): sentence_texts = itertools.product(*perm_sentences) for sentence_words in sentence_texts: # Strip added whitespace yield normalize_whitespace("".join(sentence_words)).strip() else: raise ValueError(f"Unexpected group type: {grp}") elif isinstance(expression, ListReference): # {list} list_ref: ListReference = expression if not expand_lists: yield f"{{{list_ref.list_name}}}" return if (not slot_lists) or (list_ref.list_name not in slot_lists): raise MissingListError(f"Missing slot list {{{list_ref.list_name}}}") slot_list = slot_lists[list_ref.list_name] if isinstance(slot_list, TextSlotList): text_list: TextSlotList = slot_list if not text_list.values: # Not necessarily an error, but may be a surprise _LOGGER.warning("No values for list: %s", list_ref.list_name) for text_value in text_list.values: yield from sample_expression( text_value.text_in, slot_lists, expansion_rules, language=language, expand_lists=expand_lists, expand_ranges=expand_ranges, ) elif isinstance(slot_list, RangeSlotList): range_list: RangeSlotList = slot_list if not expand_ranges: if range_list.name: yield f"{{{range_list.name}}}" else: yield "{number}" return if range_list.digits: number_strs = map(str, range_list.get_numbers()) yield from number_strs if range_list.words: words_language = range_list.words_language or language if words_language: engine = _ENGINE_CACHE.get(words_language) if engine is None: engine = RbnfEngine.for_language(words_language) _ENGINE_CACHE[words_language] = engine assert engine is not None # digits -> words for word_number in range_list.get_numbers(): # Use all unique words for a number, including different # genders, cases, etc. format_result = engine.format_number(word_number) unique_number_strs = set(format_result.text_by_ruleset.values()) yield from unique_number_strs else: _LOGGER.warning( "No language set, so cannot convert %s digits to words", list_ref.slot_name, ) elif isinstance(slot_list, WildcardSlotList): wildcard_list: WildcardSlotList = slot_list if wildcard_list.name: yield f"{{{wildcard_list.name}}}" else: yield "{wildcard}" else: raise ValueError(f"Unexpected slot list type: {slot_list}") elif isinstance(expression, RuleReference): # rule_ref: RuleReference = expression if (not expansion_rules) or (rule_ref.rule_name not in expansion_rules): raise MissingRuleError(f"Missing expansion rule <{rule_ref.rule_name}>") rule_body = expansion_rules[rule_ref.rule_name].expression yield from sample_expression( rule_body, slot_lists, expansion_rules, language=language, expand_lists=expand_lists, expand_ranges=expand_ranges, ) else: raise ValueError(f"Unexpected expression: {expression}") def main(): """Main entry point""" parser = argparse.ArgumentParser() parser.add_argument("yaml", nargs="+", help="YAML files or directories") parser.add_argument( "-n", "--max-sentences-per-intent", type=int, help="Limit number of sentences per intent", ) parser.add_argument( "--intents", nargs="+", help="Only sample sentences from these intents" ) parser.add_argument( "--areas", nargs="+", help="Area names", default=["area"], ) parser.add_argument( "--names", nargs="+", default=["entity"], help="Device/entity names" ) parser.add_argument("--language", help="Language for digits to words") parser.add_argument( "--debug", action="store_true", help="Print DEBUG messages to the console" ) args = parser.parse_args() level = logging.DEBUG if args.debug else logging.INFO logging.basicConfig(level=level) _LOGGER.debug(args) slot_lists = { "area": TextSlotList.from_strings(args.areas), "name": TextSlotList.from_strings(args.names), } input_dict = {"intents": {}} for yaml_path_str in args.yaml: yaml_path = Path(yaml_path_str) if yaml_path.is_dir(): yaml_file_paths = yaml_path.glob("*.yaml") else: yaml_file_paths = [yaml_path] for yaml_file_path in yaml_file_paths: _LOGGER.debug("Loading file: %s", yaml_file_path) with open(yaml_file_path, "r", encoding="utf-8") as yaml_file: merge_dict(input_dict, yaml.safe_load(yaml_file)) assert input_dict, "No intent YAML files loaded" intents = Intents.from_dict(input_dict) intents_and_texts = sample_intents( intents, slot_lists, max_sentences_per_intent=args.max_sentences_per_intent, intent_names=set(args.intents) if args.intents else None, language=args.language, ) for intent_name, sentence_text in intents_and_texts: json.dump( {"intent": intent_name, "text": sentence_text.strip()}, sys.stdout, ensure_ascii=False, ) print("") if __name__ == "__main__": main() hassil-3.0.1/hassil/sample_template.py000066400000000000000000000014201477332007300200120ustar00rootroot00000000000000"""CLI tool for sampling sentences from a template.""" import argparse import logging from .parse_expression import parse_sentence from .sample import sample_expression _LOGGER = logging.getLogger("hassil.sample_template") def main(): """Main entry point""" parser = argparse.ArgumentParser() parser.add_argument("sentence", help="Sentence template") parser.add_argument( "--debug", action="store_true", help="Print DEBUG messages to the console" ) args = parser.parse_args() level = logging.DEBUG if args.debug else logging.INFO logging.basicConfig(level=level) _LOGGER.debug(args) sentence = parse_sentence(args.sentence) for text in sample_expression(sentence): print(text) if __name__ == "__main__": main() hassil-3.0.1/hassil/string_matcher.py000066400000000000000000001155151477332007300176620ustar00rootroot00000000000000"""Original hassil matcher.""" import logging import re from collections import defaultdict from dataclasses import dataclass, field from typing import Any, Dict, Iterable, List, Optional, Tuple, Union from unicode_rbnf import RbnfEngine from .errors import MissingListError, MissingRuleError from .expression import ( Alternative, Expression, Group, ListReference, Permutation, RuleReference, Sentence, Sequence, TextChunk, ) from .intents import ( IntentData, RangeFractionType, RangeSlotList, SlotList, TextSlotList, WildcardSlotList, ) from .models import ( MatchEntity, UnmatchedEntity, UnmatchedRangeEntity, UnmatchedTextEntity, ) from .trie import Trie from .util import ( WHITESPACE, check_excluded_context, check_required_context, match_first, match_start, remove_punctuation, ) INTEGER_START = re.compile(r"^(\s*-?[0-9]+)") FLOAT_START = re.compile(r"^(\s*-?[0-9]+(?:[.,][0-9]+)?)") INTEGER_ANYWHERE = re.compile(r"(\s*-?[0-9])") FLOAT_ANYWHERE = re.compile(r"(\s*-?[0-9]+(?:[.,][0-9]+)?)") BREAK_WORDS_TABLE = str.maketrans("-_", " ") # lang -> engine _ENGINE_CACHE: Dict[str, RbnfEngine] = {} # lang -> number -> words _RANGE_TRIE_CACHE: Dict[ str, Dict[Tuple[int, int, int, Optional[RangeFractionType]], Trie] ] = defaultdict(dict) _LOGGER = logging.getLogger() @dataclass class MatchSettings: """Settings used in match_expression.""" slot_lists: Dict[str, SlotList] = field(default_factory=dict) """Available slot lists mapped by name.""" expansion_rules: Dict[str, Sentence] = field(default_factory=dict) """Available expansion rules mapped by name.""" ignore_whitespace: bool = False """True if whitespace should be ignored during matching.""" allow_unmatched_entities: bool = False """True if unmatched entities are kept for better error messages (slower).""" language: Optional[str] = None """Optional language to use when converting digits to words.""" @dataclass class MatchContext: """Context passed to match_expression.""" text: str """Input text remaining to be processed.""" entities: List[MatchEntity] = field(default_factory=list) """Entities that have been found in input text.""" intent_context: Dict[str, Any] = field(default_factory=dict) """Context items from outside or acquired during matching.""" is_start_of_word: bool = True """True if current text is the start of a word.""" unmatched_entities: List[UnmatchedEntity] = field(default_factory=list) """Entities that failed to match (requires allow_unmatched_entities=True).""" close_wildcards: bool = False """True if open wildcards should be closed during init.""" close_unmatched: bool = False """True if open unmatched entities should be closed during init.""" text_chunks_matched: int = 0 """Number of literal text chunks that were matched.""" intent_sentence: Optional[Sentence] = None """Sentence template that is being matched.""" intent_data: Optional[IntentData] = None """Data from sentence template group in intents.""" def __post_init__(self): if self.close_wildcards: for entity in self.entities: entity.is_wildcard_open = False if self.close_unmatched: for unmatched_entity in self.unmatched_entities: if isinstance(unmatched_entity, UnmatchedTextEntity): unmatched_entity.is_open = False @property def is_match(self) -> bool: """True if no text is left that isn't just whitespace or punctuation""" text = remove_punctuation(self.text).strip() if text: return False # Wildcards cannot be empty for entity in self.entities: if entity.is_wildcard and (not entity.text.strip()): return False # Unmatched entities cannot be empty for unmatched_entity in self.unmatched_entities: if isinstance(unmatched_entity, UnmatchedTextEntity) and ( not unmatched_entity.text.strip() ): return False return True def get_open_wildcard(self) -> Optional[MatchEntity]: """Get the last open wildcard or None.""" if not self.entities: return None last_entity = self.entities[-1] if last_entity.is_wildcard and last_entity.is_wildcard_open: return last_entity return None def get_open_entity(self) -> Optional[UnmatchedTextEntity]: """Get the last open unmatched text entity or None.""" if not self.unmatched_entities: return None last_entity = self.unmatched_entities[-1] if isinstance(last_entity, UnmatchedTextEntity) and last_entity.is_open: return last_entity return None def match_expression( settings: MatchSettings, context: MatchContext, expression: Expression ) -> Iterable[MatchContext]: """Yield matching contexts for an expression""" if isinstance(expression, TextChunk): chunk: TextChunk = expression if settings.ignore_whitespace: # Remove all whitespace chunk_text = WHITESPACE.sub("", chunk.text) context_text = WHITESPACE.sub("", context.text) else: # Keep whitespace chunk_text = chunk.text context_text = context.text if context.is_start_of_word: # Ignore extra whitespace at the beginning of chunk and text # since we know we're at the start of a word. chunk_text = chunk_text.lstrip() context_text = context_text.lstrip() # True if remaining text to be matched is empty or whitespace. # # If so, we can't say this is a successful match yet because the # sentence template may have remaining non-optional expressions. # # So we have to continue matching, skipping over empty or whitespace # chunks until the template is exhausted. is_context_text_empty = len(context_text.strip()) == 0 if chunk.is_empty: # Skip empty chunk (NOT whitespace) yield context else: wildcard = context.get_open_wildcard() if (wildcard is not None) and (not wildcard.text.strip()): if not chunk_text.strip(): # Skip space yield MatchContext( text=context_text, is_start_of_word=True, # Copy over entities=context.entities, intent_context=context.intent_context, unmatched_entities=context.unmatched_entities, text_chunks_matched=context.text_chunks_matched, intent_sentence=context.intent_sentence, intent_data=context.intent_data, ) return # Wildcard cannot be empty if settings.ignore_whitespace: is_wildcard_end_of_word = False else: is_wildcard_end_of_word = wildcard.is_wildcard_end_of_word start_idx = match_first( context_text, chunk_text, start_of_word=is_wildcard_end_of_word, ) if start_idx < 0: # Cannot possibly match return if start_idx == 0: # Possible degenerate case where the next word in the # template duplicates. start_idx = match_first( context_text, chunk_text, 1, start_of_word=is_wildcard_end_of_word, ) if start_idx < 0: # Cannot possibly match return # Produce all possible matches where the wildcard consumes text # up to where the chunk matches in the string. entities_without_wildcard = context.entities[:-1] while start_idx > 0: wildcard_text = context_text[:start_idx] yield from match_expression( settings, MatchContext( text=context_text[start_idx:], is_start_of_word=True, entities=entities_without_wildcard + [ MatchEntity( name=wildcard.name, text=wildcard_text, value=wildcard_text, is_wildcard=True, is_wildcard_open=False, # always close ) ], # Copy over intent_context=context.intent_context, unmatched_entities=context.unmatched_entities, text_chunks_matched=context.text_chunks_matched, intent_sentence=context.intent_sentence, intent_data=context.intent_data, ), expression, ) start_idx = match_first(context_text, chunk_text, start_idx + 1) # Do not continue with matching return end_pos = match_start(context_text, chunk_text) if end_pos is not None: # Successful match for chunk context_text = context_text[end_pos:] # Close wildcards/unmatched entities on non-empty chunk chunk_text_stripped = chunk_text.strip() is_chunk_non_empty = len(chunk_text_stripped) > 0 text_chunks_matched = context.text_chunks_matched if is_chunk_non_empty: text_chunks_matched += len(chunk_text_stripped) yield MatchContext( text=context_text, # must use chunk.text because it hasn't been stripped is_start_of_word=chunk.text.endswith(" "), text_chunks_matched=text_chunks_matched, # Copy over entities=context.entities, intent_context=context.intent_context, unmatched_entities=context.unmatched_entities, intent_sentence=context.intent_sentence, intent_data=context.intent_data, # close_wildcards=is_chunk_non_empty, close_unmatched=is_chunk_non_empty, ) elif is_context_text_empty and chunk_text.isspace(): # No text left to match, so extra whitespace is OK to skip yield context else: # Try breaking words apart context_text = context_text.translate(BREAK_WORDS_TABLE) end_pos = match_start(context_text, chunk_text) if end_pos is not None: context_text = context_text[end_pos:] # Close wildcards/unmatched entities on non-empty chunk is_chunk_non_empty = len(chunk_text.strip()) > 0 yield MatchContext( text=context_text, # Copy over entities=context.entities, intent_context=context.intent_context, is_start_of_word=context.is_start_of_word, unmatched_entities=context.unmatched_entities, text_chunks_matched=context.text_chunks_matched, intent_sentence=context.intent_sentence, intent_data=context.intent_data, # close_wildcards=is_chunk_non_empty, close_unmatched=is_chunk_non_empty, ) elif wildcard is not None: # Add to wildcard by skipping ahead in the text until we find # the current chunk text. skip_idx = match_first(context_text, chunk_text) if skip_idx >= 0: wildcard_text = context_text[:skip_idx] # Wildcards cannot be empty if wildcard_text: entities = [ e for e in context.entities if e.name != wildcard.name ] entities.append( MatchEntity( name=wildcard.name, value=wildcard_text, text=wildcard_text, is_wildcard=True, is_wildcard_open=False, # always close ) ) yield MatchContext( text=context.text[skip_idx + len(chunk_text) :], # Copy over # entities=context.entities, intent_context=context.intent_context, is_start_of_word=True, unmatched_entities=context.unmatched_entities, text_chunks_matched=context.text_chunks_matched, intent_sentence=context.intent_sentence, intent_data=context.intent_data, # entities=entities, ) elif settings.allow_unmatched_entities and ( unmatched_entity := context.get_open_entity() ): # Add to the most recent unmatched entity by skipping ahead in # the text until we find the current chunk text. re_chunk_text = re.escape(chunk_text.strip()) if settings.ignore_whitespace: chunk_match = re.search(re_chunk_text, context_text) else: # Only skip to a word boundary chunk_match = re.search( rf"\s{re_chunk_text}(\s|$)", context_text ) if chunk_match: unmatched_entity_text = ( unmatched_entity.text + context_text[: chunk_match.start() + 1] ) # Unmatched entities cannot be empty if unmatched_entity_text: # Make a copy of modified unmatched entity unmatched_entities = [ e for e in context.unmatched_entities if e.name != unmatched_entity.name ] unmatched_entities.append( UnmatchedTextEntity( name=unmatched_entity.name, text=unmatched_entity_text, is_open=False, # always close ) ) yield MatchContext( text=context.text[chunk_match.end() :], # Copy over entities=context.entities, intent_context=context.intent_context, is_start_of_word=True, text_chunks_matched=context.text_chunks_matched + len(chunk.text.strip()), intent_sentence=context.intent_sentence, intent_data=context.intent_data, # unmatched_entities=unmatched_entities, ) else: # Match failed pass elif isinstance(expression, Group): grp: Group = expression if isinstance(grp, Alternative): # Any may match (words|in|alternative) # NOTE: [optional] = (optional|) for item in grp.items: yield from match_expression(settings, context, item) elif isinstance(grp, Sequence): if grp.items: # All must match (words in group) group_contexts = [context] for item in grp.items: # Next step group_contexts = [ item_context for group_context in group_contexts for item_context in match_expression( settings, group_context, item ) ] if not group_contexts: break yield from group_contexts elif isinstance(grp, Permutation): if len(grp.items) == 1: yield from match_expression(settings, context, grp.items[0]) else: # All must match (in arbitrary order) for item, rest in grp.iterate_permutations(): for item_context in match_expression(settings, context, item): yield from match_expression(settings, item_context, rest) else: raise ValueError(f"Unexpected group type: {grp}") elif isinstance(expression, ListReference): # {list} list_ref: ListReference = expression if (not settings.slot_lists) or (list_ref.list_name not in settings.slot_lists): raise MissingListError(f"Missing slot list {{{list_ref.list_name}}}") wildcard = context.get_open_wildcard() slot_list = settings.slot_lists[list_ref.list_name] if isinstance(slot_list, TextSlotList): if context.text: text_list: TextSlotList = slot_list # Any value may match has_matches = False required_context: Optional[Dict[str, Any]] = None excluded_context: Optional[Dict[str, Any]] = None if context.intent_data is not None: required_context = context.intent_data.requires_context excluded_context = context.intent_data.excludes_context for slot_value in text_list.values: # Filter possible values with required/excluded context if required_context and ( not check_required_context( required_context, slot_value.context, allow_missing_keys=True, ) ): continue if excluded_context and ( not check_excluded_context(excluded_context, slot_value.context) ): continue if (isinstance(slot_value.text_in, TextChunk)) and ( len(context.text) < len(slot_value.text_in.text) ): # Not enough text left to match continue value_contexts = match_expression( settings, MatchContext( # Copy over text=context.text, entities=context.entities, intent_context=context.intent_context, is_start_of_word=context.is_start_of_word, unmatched_entities=context.unmatched_entities, text_chunks_matched=context.text_chunks_matched, intent_sentence=context.intent_sentence, intent_data=context.intent_data, ), slot_value.text_in, ) for value_context in value_contexts: has_matches = True value_wildcard: Optional[MatchEntity] = None if ( value_context.entities and value_context.entities[-1].is_wildcard ): value_wildcard = value_context.entities[-1] if value_wildcard is not None and context.text.startswith( value_wildcard.text ): # Remove wildcard text from value remaining_text = context.text[len(value_wildcard.text) :] else: remaining_text = context.text entity_text = ( remaining_text[: -len(value_context.text)] if value_context.text else remaining_text ) entities = value_context.entities + [ MatchEntity( name=list_ref.slot_name, value=( entity_text if slot_value.value_out is None else slot_value.value_out ), text=entity_text, metadata=slot_value.metadata, ) ] if slot_value.context: # Merge context from matched list value yield MatchContext( entities=entities, intent_context={ **context.intent_context, **slot_value.context, }, # Copy over text=value_context.text, is_start_of_word=context.is_start_of_word, unmatched_entities=context.unmatched_entities, text_chunks_matched=context.text_chunks_matched, intent_sentence=context.intent_sentence, intent_data=context.intent_data, ) else: yield MatchContext( entities=entities, # Copy over text=value_context.text, intent_context=value_context.intent_context, is_start_of_word=context.is_start_of_word, unmatched_entities=context.unmatched_entities, text_chunks_matched=context.text_chunks_matched, intent_sentence=context.intent_sentence, intent_data=context.intent_data, ) if (not has_matches) and settings.allow_unmatched_entities: # Report mismatch yield MatchContext( # Copy over text=context.text, entities=context.entities, intent_context=context.intent_context, is_start_of_word=context.is_start_of_word, text_chunks_matched=context.text_chunks_matched, intent_sentence=context.intent_sentence, intent_data=context.intent_data, # unmatched_entities=context.unmatched_entities + [UnmatchedTextEntity(name=list_ref.slot_name, text="")], close_wildcards=True, ) elif isinstance(slot_list, RangeSlotList): if context.text: # List that represents a number range. range_list: RangeSlotList = slot_list number_matches: List[re.Match] = [] if wildcard is None: # Look for digits at the start of the incoming text if range_list.fraction_type is None: number_match = INTEGER_START.match(context.text) else: number_match = FLOAT_START.match(context.text) if number_match is not None: number_matches.append(number_match) else: # Look for digit(s) anywhere in the string. # The wildcard will consume text up to that point. if range_list.fraction_type is None: number_pattern = INTEGER_ANYWHERE else: number_pattern = FLOAT_ANYWHERE number_matches.extend(number_pattern.finditer(context.text)) digits_match = False if range_list.digits and number_matches: for number_match in number_matches: number_text = number_match[1] word_number: Union[int, float] = float( number_text.replace(",", ".") # normalize decimal separator ) # Check if number is within range of our list if (range_list.step == 1) and ( range_list.fraction_type is None ): # Unit step in_range = ( range_list.start <= word_number <= range_list.stop ) else: # Non-unit step or fractions in_range = word_number in range_list.get_numbers() if in_range: # Number is in range digits_match = True range_value = word_number if range_list.multiplier is not None: range_value *= range_list.multiplier entities = context.entities + [ MatchEntity( name=list_ref.slot_name, value=range_value, text=number_match.group(1), ) ] if wildcard is None: yield MatchContext( text=context.text[number_match.end() :], entities=entities, # Copy over intent_context=context.intent_context, is_start_of_word=context.is_start_of_word, unmatched_entities=context.unmatched_entities, text_chunks_matched=context.text_chunks_matched, intent_sentence=context.intent_sentence, intent_data=context.intent_data, ) else: # Wildcard consumes text before number if wildcard.is_wildcard_open: wildcard.text += context.text[ : number_match.end() - 1 ] wildcard.value = wildcard.text yield MatchContext( text=context.text[number_match.end() :], entities=entities, # Copy over intent_context=context.intent_context, is_start_of_word=context.is_start_of_word, unmatched_entities=context.unmatched_entities, text_chunks_matched=context.text_chunks_matched, intent_sentence=context.intent_sentence, intent_data=context.intent_data, # close_wildcards=True, ) elif settings.allow_unmatched_entities and (wildcard is None): # Report out of range yield MatchContext( # Copy over text=context.text[len(number_text) :], entities=context.entities, intent_context=context.intent_context, is_start_of_word=context.is_start_of_word, text_chunks_matched=context.text_chunks_matched, intent_sentence=context.intent_sentence, intent_data=context.intent_data, # unmatched_entities=context.unmatched_entities + [ UnmatchedRangeEntity( name=list_ref.slot_name, value=word_number ) ], ) # Only check number words if: # 1. Words are enabled for this list # 2. We didn't already match digits # 3. the incoming text doesn't start with digits words_match: bool = False if range_list.words and (not digits_match) and (not number_matches): words_language = range_list.words_language or settings.language if words_language: range_settings = ( range_list.start, range_list.stop, range_list.step, range_list.fraction_type, ) range_trie = _RANGE_TRIE_CACHE[words_language].get( range_settings ) try: if range_trie is None: range_trie = _build_range_trie( words_language, range_list ) _RANGE_TRIE_CACHE[words_language][ range_settings ] = range_trie for ( number_end_pos, number_text, range_value, ) in range_trie.find(context.text): number_start_pos = number_end_pos - len(number_text) if (wildcard is None) and (number_start_pos > 0): # Can't possibly match because the number # string isn't at the start of the text. continue entities = context.entities + [ MatchEntity( name=list_ref.slot_name, value=range_value, text=number_text, ) ] if wildcard is None: yield from match_expression( settings, MatchContext( text=context.text, entities=entities, # Copy over intent_context=context.intent_context, is_start_of_word=context.is_start_of_word, unmatched_entities=context.unmatched_entities, text_chunks_matched=context.text_chunks_matched, intent_sentence=context.intent_sentence, intent_data=context.intent_data, ), TextChunk(number_text), ) else: # Wildcard consumes text before number wildcard.text += context.text[:number_start_pos] wildcard.value = wildcard.text yield from match_expression( settings, MatchContext( text=context.text[number_start_pos:], entities=entities, # Copy over intent_context=context.intent_context, is_start_of_word=context.is_start_of_word, unmatched_entities=context.unmatched_entities, text_chunks_matched=context.text_chunks_matched, intent_sentence=context.intent_sentence, intent_data=context.intent_data, # close_wildcards=True, ), TextChunk(number_text), ) except ValueError as error: _LOGGER.debug( "Unexpected error converting numbers to words for language '%s': %s", settings.language, str(error), ) if ( (not digits_match) and (not words_match) and settings.allow_unmatched_entities ): # Report not a number yield MatchContext( # Copy over text=context.text, entities=context.entities, intent_context=context.intent_context, is_start_of_word=context.is_start_of_word, text_chunks_matched=context.text_chunks_matched, intent_sentence=context.intent_sentence, intent_data=context.intent_data, # unmatched_entities=context.unmatched_entities + [UnmatchedTextEntity(name=list_ref.slot_name, text="")], close_wildcards=True, ) elif isinstance(slot_list, WildcardSlotList): if context.text: # Start wildcard entities yield MatchContext( # Copy over text=context.text, intent_context=context.intent_context, is_start_of_word=context.is_start_of_word, unmatched_entities=context.unmatched_entities, text_chunks_matched=context.text_chunks_matched, intent_sentence=context.intent_sentence, intent_data=context.intent_data, # entities=context.entities + [ MatchEntity( name=list_ref.slot_name, value="", text="", is_wildcard=True, is_wildcard_end_of_word=list_ref.is_end_of_word, ) ], close_unmatched=True, ) else: raise ValueError(f"Unexpected slot list type: {slot_list}") elif isinstance(expression, RuleReference): # rule_ref: RuleReference = expression if (not settings.expansion_rules) or ( rule_ref.rule_name not in settings.expansion_rules ): raise MissingRuleError(f"Missing expansion rule <{rule_ref.rule_name}>") yield from match_expression( settings, context, settings.expansion_rules[rule_ref.rule_name].expression ) else: raise ValueError(f"Unexpected expression: {expression}") def _build_range_trie(language: str, range_list: RangeSlotList) -> Trie: range_trie = Trie() # Load number formatting engine engine = _ENGINE_CACHE.get(language) if engine is None: engine = RbnfEngine.for_language(language) _ENGINE_CACHE[language] = engine for word_number in range_list.get_numbers(): range_value: Union[float, int] = word_number if range_list.multiplier is not None: range_value *= range_list.multiplier format_result = engine.format_number(word_number) used_words = set() for words in format_result.text_by_ruleset.values(): if words in used_words: continue range_trie.insert(words, range_value) used_words.add(words) words = words.translate(BREAK_WORDS_TABLE) if words in used_words: continue range_trie.insert(words, range_value) used_words.add(words) return range_trie hassil-3.0.1/hassil/trie.py000066400000000000000000000055001477332007300156040ustar00rootroot00000000000000"""Specialized implementation of a trie. See: https://en.wikipedia.org/wiki/Trie """ from collections import deque from dataclasses import dataclass from typing import Any, Dict, Iterable, List, Optional, Tuple @dataclass class TrieNode: """Node in trie.""" id: int text: Optional[str] = None values: Optional[List[Any]] = None children: "Optional[Dict[str, TrieNode]]" = None class Trie: """A specialized trie data structure that finds all known words in a string.""" def __init__(self) -> None: self.roots: Dict[str, TrieNode] = {} self._next_id = 0 def insert(self, text: str, value: Any) -> None: """Insert a word and value into the trie.""" current_node: Optional[TrieNode] = None current_children: Optional[Dict[str, TrieNode]] = self.roots last_idx = len(text) - 1 for i, c in enumerate(text): if current_children is None: assert current_node is not None current_node.children = current_children = {} current_node = current_children.get(c) if current_node is None: current_node = TrieNode(id=self.next_id()) current_children[c] = current_node if i == last_idx: current_node.text = text if current_node.values is None: current_node.values = [value] else: current_node.values.append(value) current_children = current_node.children def find(self, text: str, unique: bool = True) -> Iterable[Tuple[int, str, Any]]: """Yield (end_pos, text, value) pairs of all words found in the string.""" q = deque([(self.roots, i) for i in range(len(text))]) visited = set() while q: item = q.popleft() current_children, current_position = item if current_position >= len(text): continue current_char = text[current_position] node = current_children.get(current_char) if (node is not None) and (node.id not in visited): if node.text is not None: # End is one past the current position if unique: visited.add(node.id) if node.values: for value in node.values: yield (current_position + 1, node.text, value) else: # null value yield (current_position + 1, node.text, None) if node.children and (current_position < len(text)): q.append((node.children, current_position + 1)) def next_id(self) -> int: current_id = self._next_id self._next_id += 1 return current_id hassil-3.0.1/hassil/util.py000066400000000000000000000162471477332007300156300ustar00rootroot00000000000000"""Utility methods.""" import collections import re import unicodedata from collections.abc import Mapping, MutableMapping from typing import Any, Dict, Iterable, Optional WHITESPACE = re.compile(r"\s+") WHITESPACE_CAPTURE = re.compile(r"(\s+)") WHITESPACE_SEPARATOR = " " TEMPLATE_SYNTAX = re.compile(r".*[(){}<>\[\]|].*") PUNCTUATION_STR = ".。,,?¿?؟!¡!;;::’" PUNCTUATION_PATTERN = rf"[{re.escape(PUNCTUATION_STR)}]+" PUNCTUATION_START = re.compile(rf"^{PUNCTUATION_PATTERN}") PUNCTUATION_END = re.compile(rf"{PUNCTUATION_PATTERN}$") PUNCTUATION_END_SPACE = re.compile(rf"{PUNCTUATION_PATTERN}\s*$") PUNCTUATION_START_WORD = re.compile(rf"(?<=\W){PUNCTUATION_PATTERN}(?=\w)") PUNCTUATION_END_WORD = re.compile(rf"(?<=\w){PUNCTUATION_PATTERN}(?=\W)") PUNCTUATION_WORD = re.compile(rf"(?<=\W){PUNCTUATION_PATTERN}(?=\W)") INITIALISM_DOTS_AT_END = re.compile(r"\b(?:\w\.){2,}$") def merge_dict( base_dict: MutableMapping[Any, Any], new_dict: Mapping[Any, Any] ) -> None: """Merge new_dict into base_dict.""" for key, value in new_dict.items(): if key in base_dict: old_value = base_dict[key] if isinstance(old_value, collections.abc.MutableMapping): # Combine dictionary assert isinstance( value, collections.abc.Mapping ), f"Not a dict: {value}" merge_dict(old_value, value) elif isinstance(old_value, collections.abc.MutableSequence): # Combine list assert isinstance( value, collections.abc.Sequence ), f"Not a list: {value}" old_value.extend(value) else: # Overwrite base_dict[key] = value else: base_dict[key] = value def remove_escapes(text: str) -> str: """Remove backslash escape sequences.""" return re.sub(r"\\(.)", r"\1", text) def normalize_whitespace(text: str) -> str: """Make all whitespace inside a string single spaced.""" return WHITESPACE_CAPTURE.sub(WHITESPACE_SEPARATOR, text) def normalize_text(text: str) -> str: """Normalize whitespace and unicode forms.""" text = normalize_whitespace(text) text = unicodedata.normalize("NFC", text) return text def is_template(text: str) -> bool: """Return True if text contains template syntax.""" return TEMPLATE_SYNTAX.match(text) is not None def check_required_context( required_context: Dict[str, Any], match_context: Optional[Dict[str, Any]], allow_missing_keys: bool = False, ) -> bool: """Return True if match context does not violate required context. Setting allow_missing_keys to True only checks existing keys in match context. """ for ( required_key, required_value, ) in required_context.items(): if (not match_context) or (required_key not in match_context): # Match is missing key if allow_missing_keys: # Only checking existing keys continue return False if isinstance(required_value, collections.abc.Mapping): # Unpack dict # : # value: ... required_value = required_value.get("value") # Ensure value matches actual_value = match_context[required_key] if isinstance(actual_value, collections.abc.Mapping): # Unpack dict # : # value: ... actual_value = actual_value.get("value") if (not isinstance(required_value, str)) and isinstance( required_value, collections.abc.Collection ): if actual_value not in required_value: # Match value not in required list return False elif (required_value is not None) and (actual_value != required_value): # Match value doesn't equal required value return False return True def check_excluded_context( excluded_context: Dict[str, Any], match_context: Optional[Dict[str, Any]] ) -> bool: """Return True if match context does not violate excluded context.""" for ( excluded_key, excluded_value, ) in excluded_context.items(): if (not match_context) or (excluded_key not in match_context): continue if isinstance(excluded_value, collections.abc.Mapping): # Unpack dict # : # value: ... excluded_value = excluded_value.get("value") # Ensure value does not match actual_value = match_context[excluded_key] if isinstance(actual_value, collections.abc.Mapping): # Unpack dict # : # value: ... actual_value = actual_value.get("value") if (not isinstance(excluded_value, str)) and isinstance( excluded_value, collections.abc.Collection ): if actual_value in excluded_value: # Match value is in excluded list return False elif actual_value == excluded_value: # Match value equals excluded value return False return True def remove_skip_words( text: str, skip_words: Iterable[str], ignore_whitespace: bool ) -> str: """Remove all skip words from text.""" if not skip_words: return text if ignore_whitespace: skip_words_pattern = re.compile( r"(" + "|".join( re.escape(w.strip()) for w in sorted(skip_words, key=len, reverse=True) ) + r")", re.IGNORECASE, ) return skip_words_pattern.sub("", text) skip_words_pattern = re.compile( r"(?<=\W)(" + "|".join( re.escape(w.strip()) for w in sorted(skip_words, key=len, reverse=True) ) + r")(?=\W)", re.IGNORECASE, ) text = skip_words_pattern.sub(" ", f" {text} ").strip() return normalize_whitespace(text) def remove_punctuation(text: str) -> str: """Remove punctuation from start/end of words and entire text.""" text = PUNCTUATION_START.sub("", text) if not INITIALISM_DOTS_AT_END.match(text): # Don't remove final "." from "A.C.", etc. text = PUNCTUATION_END.sub("", text) text = PUNCTUATION_START_WORD.sub("", text) text = PUNCTUATION_END_WORD.sub("", text) text = PUNCTUATION_WORD.sub("", text) return text def match_start(text: str, prefix: str) -> Optional[int]: """Match prefix at start of text and return end of match position.""" match = re.match(rf"^{re.escape(prefix)}", text, re.IGNORECASE) if match is None: return None return match.end() def match_first( text: str, prefix: str, start_idx: int = 0, start_of_word: bool = False ) -> int: """Match prefix at text or word boundary and return start of match position.""" if start_idx > 0: text = text[start_idx:] if start_of_word: boundary = r"\b" else: boundary = "" match = re.search(rf"{boundary}{re.escape(prefix)}", text, re.IGNORECASE) if match is None: return -1 return start_idx + match.start() hassil-3.0.1/mypy.ini000066400000000000000000000003141477332007300145010ustar00rootroot00000000000000 [mypy] [mypy-setuptools.*] ignore_missing_imports = True [mypy-pytest.*] ignore_missing_imports = True [mypy-yaml.*] ignore_missing_imports = True [mypy-unicode_rbnf.*] ignore_missing_imports = True hassil-3.0.1/pylintrc000066400000000000000000000014151477332007300145740ustar00rootroot00000000000000[MESSAGES CONTROL] disable= format, abstract-method, cyclic-import, duplicate-code, global-statement, import-outside-toplevel, inconsistent-return-statements, locally-disabled, not-context-manager, too-few-public-methods, too-many-arguments, too-many-branches, too-many-instance-attributes, too-many-lines, too-many-locals, too-many-public-methods, too-many-return-statements, too-many-statements, too-many-boolean-expressions, unnecessary-pass, unused-argument, broad-except, too-many-nested-blocks, invalid-name, unused-import, fixme, useless-super-delegation, missing-module-docstring, missing-class-docstring, missing-function-docstring, import-error, consider-using-with [FORMAT] expected-line-ending-format=LF hassil-3.0.1/pyproject.toml000066400000000000000000000023261477332007300157230ustar00rootroot00000000000000[build-system] requires = ["setuptools>=62.3"] build-backend = "setuptools.build_meta" [project] name = "hassil" version = "3.0.1" license = {text = "Apache-2.0"} description = "The Home Assistant Intent Language parser" readme = "README.md" authors = [ {name = "The Home Assistant Authors", email = "hello@home-assistant.io"} ] keywords = ["home", "assistant", "intent", "recognition"] classifiers = [ "Development Status :: 3 - Alpha", "Intended Audience :: Developers", "Topic :: Text Processing :: Linguistic", "License :: OSI Approved :: Apache Software License", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", ] requires-python = ">=3.9.0" dependencies = [ "PyYAML>=6.0,<7", "unicode-rbnf>=2.3,<3" ] [project.urls] "Source Code" = "http://github.com/home-assistant/hassil" [tool.setuptools] platforms = ["any"] zip-safe = true include-package-data = true [tool.setuptools.packages.find] include = ["hassil"] exclude = ["tests", "tests.*"] [project.scripts] hassil = "hassil.__main__:main" hassil-3.0.1/requirements_dev.txt000066400000000000000000000001351477332007300171250ustar00rootroot00000000000000build>=1,<2 black==24.8.0 flake8==7.2.0 mypy==1.14.1 pylint==3.2.7 pytest==8.3.5 tox==4.25.0 hassil-3.0.1/script/000077500000000000000000000000001477332007300143105ustar00rootroot00000000000000hassil-3.0.1/script/format000077500000000000000000000007471477332007300155360ustar00rootroot00000000000000#!/usr/bin/env python3 import subprocess import venv from pathlib import Path _DIR = Path(__file__).parent _PROGRAM_DIR = _DIR.parent _VENV_DIR = _PROGRAM_DIR / ".venv" _MODULE_DIR = _PROGRAM_DIR / "hassil" _TESTS_DIR = _PROGRAM_DIR / "tests" _FORMAT_DIRS = [_MODULE_DIR, _TESTS_DIR] context = venv.EnvBuilder().ensure_directories(_VENV_DIR) subprocess.check_call([context.env_exe, "-m", "black"] + _FORMAT_DIRS) subprocess.check_call([context.env_exe, "-m", "isort"] + _FORMAT_DIRS) hassil-3.0.1/script/lint000077500000000000000000000013151477332007300152040ustar00rootroot00000000000000#!/usr/bin/env python3 import subprocess import venv from pathlib import Path _DIR = Path(__file__).parent _PROGRAM_DIR = _DIR.parent _VENV_DIR = _PROGRAM_DIR / ".venv" _MODULE_DIR = _PROGRAM_DIR / "hassil" _TESTS_DIR = _PROGRAM_DIR / "tests" _LINT_DIRS = [_MODULE_DIR, _TESTS_DIR] context = venv.EnvBuilder().ensure_directories(_VENV_DIR) subprocess.check_call([context.env_exe, "-m", "black"] + _LINT_DIRS + ["--check"]) subprocess.check_call([context.env_exe, "-m", "isort"] + _LINT_DIRS + ["--check"]) subprocess.check_call([context.env_exe, "-m", "flake8"] + _LINT_DIRS) subprocess.check_call([context.env_exe, "-m", "pylint"] + _LINT_DIRS) subprocess.check_call([context.env_exe, "-m", "mypy"] + _LINT_DIRS) hassil-3.0.1/script/package000077500000000000000000000004711477332007300156330ustar00rootroot00000000000000#!/usr/bin/env python3 import subprocess import venv from pathlib import Path _DIR = Path(__file__).parent _PROGRAM_DIR = _DIR.parent _VENV_DIR = _PROGRAM_DIR / ".venv" context = venv.EnvBuilder().ensure_directories(_VENV_DIR) subprocess.check_call( [context.env_exe, "-m", "build", "--sdist", "--wheel"] ) hassil-3.0.1/script/run000077500000000000000000000004701477332007300150430ustar00rootroot00000000000000#!/usr/bin/env python3 import sys import subprocess import venv from pathlib import Path _DIR = Path(__file__).parent _PROGRAM_DIR = _DIR.parent _VENV_DIR = _PROGRAM_DIR / ".venv" context = venv.EnvBuilder().ensure_directories(_VENV_DIR) subprocess.check_call([context.env_exe, "-m", "hassil"] + sys.argv[1:]) hassil-3.0.1/script/setup000077500000000000000000000017641477332007300154060ustar00rootroot00000000000000#!/usr/bin/env python3 import argparse import subprocess import venv from pathlib import Path _DIR = Path(__file__).parent _PROGRAM_DIR = _DIR.parent _VENV_DIR = _PROGRAM_DIR / ".venv" _BUILD_DIR = _DIR.parent / "build" / "lib" / "hassil" _MODULE_DIR = _PROGRAM_DIR / "hassil" parser = argparse.ArgumentParser() parser.add_argument("--dev", action="store_true", help="Install dev requirements") args = parser.parse_args() # Create virtual environment builder = venv.EnvBuilder(with_pip=True) context = builder.ensure_directories(_VENV_DIR) builder.create(_VENV_DIR) # Upgrade dependencies pip = [context.env_exe, "-m", "pip"] subprocess.check_call(pip + ["install", "--upgrade", "pip"]) subprocess.check_call(pip + ["install", "--upgrade", "setuptools", "wheel"]) # Install requirements subprocess.check_call(pip + ["install", "-e", str(_PROGRAM_DIR)]) if args.dev: # Install dev requirements subprocess.check_call( pip + ["install", "-r", str(_PROGRAM_DIR / "requirements_dev.txt")] ) hassil-3.0.1/script/test000077500000000000000000000005461477332007300152220ustar00rootroot00000000000000#!/usr/bin/env python3 import subprocess import sys import venv from pathlib import Path _DIR = Path(__file__).parent _PROGRAM_DIR = _DIR.parent _VENV_DIR = _PROGRAM_DIR / ".venv" _TEST_DIR = _PROGRAM_DIR / "tests" context = venv.EnvBuilder().ensure_directories(_VENV_DIR) subprocess.check_call([context.env_exe, "-m", "pytest", _TEST_DIR] + sys.argv[1:]) hassil-3.0.1/setup.cfg000066400000000000000000000007731477332007300146340ustar00rootroot00000000000000[flake8] # To work with Black max-line-length = 88 # E501: line too long # W503: Line break occurred before a binary operator # E203: Whitespace before ':' # D202 No blank lines allowed after function docstring # W504 line break after binary operator ignore = E501, W503, E203, D202, W504 # F401 import unused per-file-ignores = hassil/__init__.py:F401 [isort] multi_line_output = 3 include_trailing_comma=True force_grid_wrap=0 use_parentheses=True line_length=88 indent = " " hassil-3.0.1/tests/000077500000000000000000000000001477332007300141465ustar00rootroot00000000000000hassil-3.0.1/tests/__init__.py000066400000000000000000000000771477332007300162630ustar00rootroot00000000000000"""Tests for Home Assistant Intent Language (HassIL) parser""" hassil-3.0.1/tests/test_expression.py000066400000000000000000000130301477332007300177530ustar00rootroot00000000000000from unittest.mock import ANY from hassil.expression import ( Alternative, ListReference, Permutation, RuleReference, Sentence, Sequence, TextChunk, ) from hassil.parse_expression import parse_expression, parse_sentence from hassil.parser import next_chunk # ----------------------------------------------------------------------------- def test_word(): assert parse_expression(next_chunk("test")) == t(text="test") def test_sequence_in_sequence(): assert parse_expression(next_chunk("((test test2))")) == Sequence( items=[Sequence(items=[t(text="test "), t(text="test2")])], ) def test_escapes(): assert parse_expression(next_chunk(r"(test\<\>\{\}\)\( test2)")) == Sequence( items=[t(text="test<>{})( "), t(text="test2")], ) def test_optional(): assert parse_expression(next_chunk("[test test2]")) == Alternative( items=[ Sequence( items=[t(text="test "), t(text="test2")], ), t(text=""), ], is_optional=True, ) def test_alternative(): assert parse_expression(next_chunk("(test | test2)")) == Alternative( items=[Sequence(items=[t(text="test ")]), Sequence(items=[t(text=" test2")])], ) def test_permutation(): assert parse_expression(next_chunk("(test; test2)")) == Permutation( items=[ Sequence(items=[t(text=" "), t(text="test"), t(text=" ")]), Sequence(items=[t(text=" "), t(text=" test2"), t(text=" ")]), ], ) def test_optional_alternative(): assert parse_expression(next_chunk("[test | test2]")) == Alternative( items=[ Sequence(items=[t(text="test ")]), Sequence(items=[t(text=" test2")]), t(text=""), ], is_optional=True, ) def test_optional_permutation(): assert parse_expression(next_chunk("[test; test2]")) == Alternative( items=[ Permutation( items=[ Sequence(items=[t(text=" "), t(text="test"), t(text=" ")]), Sequence(items=[t(text=" "), t(text=" test2"), t(text=" ")]), ], ), t(text=""), ], is_optional=True, ) def test_slot_reference(): assert parse_expression(next_chunk("{test}")) == ListReference(list_name="test") def test_rule_reference(): assert parse_expression(next_chunk("")) == RuleReference(rule_name="test") def test_sentence_no_group(): assert parse_sentence("this is a test") == Sentence( expression=Sequence( items=[t(text="this "), t(text="is "), t(text="a "), t(text="test")] ) ) def test_sentence_group(): assert parse_sentence("(this is a test)") == Sentence( expression=Sequence( items=[t(text="this "), t(text="is "), t(text="a "), t(text="test")] ) ) def test_sentence_optional(): assert parse_sentence("[this is a test]") == Sentence( expression=Alternative( items=[ Sequence( items=[ t(text="this "), t(text="is "), t(text="a "), t(text="test"), ] ), t(text=""), ], is_optional=True, ) ) def test_sentence_optional_prefix(): assert parse_sentence("[t]est") == Sentence( expression=Sequence( items=[ Alternative( items=[Sequence(items=[t(text="t")]), t(text="")], is_optional=True ), t(text="est"), ], ) ) def test_sentence_optional_suffix(): assert parse_sentence("test[s]") == Sentence( expression=Sequence( items=[ t(text="test"), Alternative( items=[Sequence(items=[t(text="s")]), t(text="")], is_optional=True ), ], ) ) def test_sentence_alternative_whitespace(): assert parse_sentence("test ( 1 | 2)") == Sentence( expression=Sequence( items=[ t(text="test "), Alternative( items=[ Sequence(items=[t(text=" 1 ")]), Sequence(items=[t(text=" 2")]), ] ), ], ) ) def test_list_reference_inside_word(): assert parse_sentence("ab{test}cd") == Sentence( expression=Sequence( items=[ t(text="ab"), ListReference("test", is_end_of_word=False), t(text="cd"), ], ) ) def test_list_reference_outside_word(): assert parse_sentence("ab{test} cd") == Sentence( expression=Sequence( items=[ t(text="ab"), ListReference("test", is_end_of_word=True), t(text=" cd"), ], ) ) # def test_fix_pattern_whitespace(): # assert fix_pattern_whitespace("[start] middle [end]") == "[(start) ]middle[ (end)]" # assert fix_pattern_whitespace("start [middle] end") == "start[ (middle)] end" # assert fix_pattern_whitespace("start (middle [end])") == "start (middle[ (end)])" # assert ( # fix_pattern_whitespace("[start] (middle) [end]") == "[(start) ](middle)[ (end)]" # ) # ----------------------------------------------------------------------------- def t(**kwargs): return TextChunk(parent=ANY, **kwargs) hassil-3.0.1/tests/test_intents.py000066400000000000000000000115431477332007300172470ustar00rootroot00000000000000from hassil import is_match, parse_sentence from hassil.intents import TextSlotList def test_no_match(): sentence = parse_sentence("turn on the lights") assert is_match("turn on the lights", sentence) assert not is_match("turn off the lights", sentence) assert not is_match("don't turn on the lights", sentence) def test_punctuation(): sentence = parse_sentence("turn on the lights") assert is_match("turn on the lights.", sentence) assert is_match("turn on the lights!", sentence) def test_whitespace(): sentence = parse_sentence("turn on the lights") assert is_match(" turn on the lights", sentence) def test_skip_punctuation(): sentence = parse_sentence("turn on the lights") assert is_match("turn ! on ? the, lights.", sentence) def test_skip_words(): sentence = parse_sentence("turn on [the] lights") skip_words = {"please", "could", "you", "my"} assert is_match( "could you please turn on my lights?", sentence, skip_words=skip_words ) assert is_match("turn on the lights, please", sentence, skip_words=skip_words) def test_optional(): sentence = parse_sentence("turn on [the] lights in [the] kitchen") assert is_match("turn on the lights in the kitchen", sentence) assert is_match("turn on lights in kitchen", sentence) def test_optional_plural(): sentence = parse_sentence("turn on the light[s]") assert is_match("turn on the light", sentence) assert is_match("turn on the lights", sentence) def test_group_plural(): sentence = parse_sentence("give me the penn(y|ies)") assert is_match("give me the penny", sentence) assert is_match("give me the pennies", sentence) def test_list(): sentence = parse_sentence("turn off {area}") areas = TextSlotList.from_strings(["kitchen", "living room"]) assert is_match("turn off kitchen", sentence, slot_lists={"area": areas}) assert is_match("turn off living room", sentence, slot_lists={"area": areas}) def test_list_prefix_suffix(): sentence = parse_sentence("turn off abc-{area}-123") areas = TextSlotList.from_strings(["kitchen", "living room"]) assert is_match("turn off abc-kitchen-123", sentence, slot_lists={"area": areas}) assert is_match( "turn off abc-living room-123", sentence, slot_lists={"area": areas} ) def test_rule(): sentence = parse_sentence("turn off ") assert is_match( "turn off kitchen", sentence, expansion_rules={"area": parse_sentence("[the] kitchen")}, ) def test_rule_prefix_suffix(): sentence = parse_sentence("turn off abc--123") assert is_match( "turn off abc-kitchen-123", sentence, expansion_rules={"area": parse_sentence("[the ]kitchen")}, ) def test_alternative_whitespace(): sentence = parse_sentence("(start|stopp)ed") assert is_match("started", sentence) assert is_match("stopped", sentence) def test_alternative_whitespace_2(): sentence = parse_sentence("set brightness to ( minimum | lowest)") assert is_match("set brightness to lowest", sentence) def test_no_allow_template(): sentence = parse_sentence("turn off {name}") names = TextSlotList.from_strings(["light[s]"]) assert is_match("turn off lights", sentence, slot_lists={"name": names}) names = TextSlotList.from_strings(["light[s]"], allow_template=False) assert not is_match("turn off lights", sentence, slot_lists={"name": names}) assert is_match("turn off light[s]", sentence, slot_lists={"name": names}) def test_no_whitespace_fails(): sentence = parse_sentence("this is a test") assert not is_match("thisisatest", sentence) def test_permutations(): sentence = parse_sentence("(in the kitchen;is there smoke)") assert is_match("in the kitchen is there smoke", sentence) assert is_match("is there smoke in the kitchen", sentence) sentence = parse_sentence("(a;b;c)") assert is_match("a b c", sentence) assert is_match("a c b", sentence) assert is_match("b a c", sentence) assert is_match("b c a", sentence) assert is_match("c a b", sentence) assert is_match("c b a", sentence) def test_nl_optional_whitespace(): sentence = parse_sentence( "[] (alle|in) [ ] aan [willen | kunnen] []" ) slot_lists = { "area": TextSlotList.from_strings(["Keuken", "Woonkamer"], allow_template=False) } expansion_rules = { "area": parse_sentence("[de|het] {area}"), "doe": parse_sentence("(zet|mag|mogen|doe|verander|maak|schakel)"), "lamp": parse_sentence("[de|het] (lamp[en]|licht[en]|verlichting)"), } for text in [ "Mogen in de keuken de lampen aan?", "Mogen in de keukenlampen aan?", ]: assert is_match( text, sentence, slot_lists=slot_lists, expansion_rules=expansion_rules, ) hassil-3.0.1/tests/test_parser.py000066400000000000000000000021461477332007300170560ustar00rootroot00000000000000"""Tests for Hassil parser""" from hassil.parser import ParseChunk, ParseType, next_chunk def test_word(): text = "test" assert next_chunk(text) == ParseChunk( text="test", parse_type=ParseType.WORD, start_index=0, end_index=len(text), ) def test_group(): text = "(test test2)" assert next_chunk(text) == ParseChunk( text="(test test2)", parse_type=ParseType.GROUP, start_index=0, end_index=len(text), ) def test_optional(): text = "[test test2]" assert next_chunk(text) == ParseChunk( text="[test test2]", parse_type=ParseType.OPT, start_index=0, end_index=len(text), ) def test_list_reference(): text = "{test}" assert next_chunk(text) == ParseChunk( text="{test}", parse_type=ParseType.LIST, start_index=0, end_index=len(text), ) def test_rule_reference(): text = "" assert next_chunk(text) == ParseChunk( text="", parse_type=ParseType.RULE, start_index=0, end_index=len(text), ) hassil-3.0.1/tests/test_recognize.py000066400000000000000000001637071477332007300175620ustar00rootroot00000000000000import io from typing import Set, cast import pytest from hassil import Intents, recognize, recognize_all, recognize_best from hassil.expression import TextChunk from hassil.intents import TextSlotList from hassil.models import MatchEntity, UnmatchedRangeEntity, UnmatchedTextEntity from hassil.recognize import MISSING_ENTITY TEST_YAML = """ language: "en" intents: TurnOnTV: data: - sentences: - "turn on [the] TV in " - "turn on TV" slots: domain: "media_player" name: "roku" SetBrightness: data: - sentences: - "set [the] brightness in to " slots: domain: "light" name: "all" - sentences: - "set [the] brightness of to " requires_context: domain: "light" slots: domain: "light" GetTemperature: data: - sentences: - " [the] temperature in " slots: domain: "climate" CloseCover: data: - sentences: - "close " requires_context: domain: "cover" slots: domain: "cover" Play: data: - sentences: - "play " excludes_context: domain: - "cover" - "light" CloseCurtains: data: - sentences: - "close [the] curtains [in ]" slots: domain: "cover" device_class: "curtain" requires_context: area: slot: true not_copied: "not copied value" copied_to_different: value: null slot: "different_slot" expansion_rules: area: "[the] {area}" name: "[the] {name}" brightness: "{brightness_pct}[%| percent]" what_is: "(what's | whats | what is)" lists: brightness_pct: range: type: percentage from: 0 to: 100 skip_words: - "please" """ @pytest.fixture def intents(): with io.StringIO(TEST_YAML) as test_file: return Intents.from_yaml(test_file) @pytest.fixture def slot_lists(): return { "area": TextSlotList.from_tuples( [("kitchen", "area.kitchen"), ("living room", "area.living_room")] ), "name": TextSlotList.from_tuples( [ ("hue", "light.hue", {"domain": "light"}), ( "garage door", "cover.garage_door", {"domain": "cover"}, ), ( "blue curtains", "cover.blue_curtains", { "domain": "cover", "device_class": "curtain", "area": "living_room", }, ), ( "roku", "media_player.roku", {"domain": "media_player"}, ), ] ), } # pylint: disable=redefined-outer-name def test_turn_on(intents, slot_lists): result = recognize("turn on kitchen TV, please", intents, slot_lists=slot_lists) assert result is not None assert result.intent.name == "TurnOnTV" assert result.text_chunks_matched > 0 area = result.entities["area"] assert area.name == "area" assert area.value == "area.kitchen" # From YAML assert result.entities["domain"].value == "media_player" assert result.entities["name"].value == "roku" # pylint: disable=redefined-outer-name def test_brightness_area(intents, slot_lists): result = recognize( "set the brightness in the living room to 75%", intents, slot_lists=slot_lists ) assert result is not None assert result.intent.name == "SetBrightness" assert result.text_chunks_matched > 0 assert result.entities["area"].value == "area.living_room" assert result.entities["brightness_pct"].value == 75 # From YAML assert result.entities["domain"].value == "light" assert result.entities["name"].value == "all" # pylint: disable=redefined-outer-name def test_brightness_area_words(intents, slot_lists): result = recognize( "set brightness in the living room to forty-two percent", intents, slot_lists=slot_lists, language="en", ) assert result is not None assert result.intent.name == "SetBrightness" assert result.entities["area"].value == "area.living_room" assert result.entities["brightness_pct"].value == 42 # From YAML assert result.entities["domain"].value == "light" assert result.entities["name"].value == "all" # pylint: disable=redefined-outer-name def test_brightness_name(intents, slot_lists): result = recognize( "set brightness of the hue to 50%", intents, slot_lists=slot_lists ) assert result is not None assert result.intent.name == "SetBrightness" assert result.entities["name"].value == "light.hue" assert result.entities["brightness_pct"].value == 50 # From YAML assert result.entities["domain"].value == "light" # pylint: disable=redefined-outer-name def test_brightness_not_cover(intents, slot_lists): result = recognize( "set brightness of the garage door to 50%", intents, slot_lists=slot_lists ) assert result is None # pylint: disable=redefined-outer-name def test_temperature(intents, slot_lists): result = recognize( "what is the temperature in the living room?", intents, slot_lists=slot_lists ) assert result is not None assert result.intent.name == "GetTemperature" assert result.entities["area"].value == "area.living_room" # From YAML assert result.entities["domain"].value == "climate" # pylint: disable=redefined-outer-name def test_close_name(intents, slot_lists): result = recognize("close the garage door", intents, slot_lists=slot_lists) assert result is not None assert result.intent.name == "CloseCover" assert result.entities["name"].value == "cover.garage_door" # From YAML assert result.entities["domain"].value == "cover" # pylint: disable=redefined-outer-name def test_close_not_light(intents, slot_lists): result = recognize("close the hue", intents, slot_lists=slot_lists) assert result is None # pylint: disable=redefined-outer-name def test_play(intents, slot_lists): result = recognize("play roku", intents, slot_lists=slot_lists) assert result is not None assert result.intent.name == "Play" assert result.entities["name"].value == "media_player.roku" # From context assert result.context["domain"] == "media_player" # pylint: disable=redefined-outer-name def test_play_no_cover(intents, slot_lists): result = recognize("play the garage door", intents, slot_lists=slot_lists) assert result is None # pylint: disable=redefined-outer-name def test_requires_context_implicit(intents, slot_lists): intent_context = { "area": "living room", "not_copied": "not copied value", "copied_to_different": "copied value", } result = recognize( "close the curtains", intents, slot_lists=slot_lists, intent_context=intent_context, ) assert result is not None assert result.intent.name == "CloseCurtains" # test_slot should not be copied over assert set(result.entities.keys()) == { "area", "domain", "device_class", "different_slot", } assert result.entities["area"].value == "living room" assert result.entities["domain"].value == "cover" assert result.entities["device_class"].value == "curtain" assert result.entities["different_slot"].value == "copied value" # pylint: disable=redefined-outer-name def test_requires_context_none_provided(intents, slot_lists): result = recognize("close the curtains", intents, slot_lists=slot_lists) assert result is None def test_lists_no_template() -> None: """Ensure list values without template syntax are plain text.""" yaml_text = """ language: "en" intents: {} lists: test: values: - "test value" """ with io.StringIO(yaml_text) as test_file: intents = Intents.from_yaml(test_file) test_list = cast(TextSlotList, intents.slot_lists["test"]) text_in = test_list.values[0].text_in assert isinstance(text_in, TextChunk) assert text_in.text == "test value" def test_list_text_normalized() -> None: """Ensure list text in values are normalized.""" yaml_text = """ language: "en" intents: TestIntent: data: - sentences: - "run {test_name}" lists: test_name: values: - "tEsT 1" """ with io.StringIO(yaml_text) as test_file: intents = Intents.from_yaml(test_file) result = recognize("run test 1", intents) assert result is not None assert result.entities["test_name"].value == "tEsT 1" def test_skip_prefix() -> None: yaml_text = """ language: "en" intents: TestIntent: data: - sentences: - "run {test_name}" lists: test_name: values: - "test" skip_words: - "the" """ with io.StringIO(yaml_text) as test_file: intents = Intents.from_yaml(test_file) result = recognize("run the test", intents) assert result is not None assert result.entities["test_name"].value == "test" def test_skip_sorted() -> None: """Ensure skip words are processed longest first""" yaml_text = """ language: "en" intents: TestIntent: data: - sentences: - "run {test_name}" lists: test_name: values: - "test" skip_words: - "could" - "could you" """ with io.StringIO(yaml_text) as test_file: intents = Intents.from_yaml(test_file) result = recognize("could you run test", intents) assert result is not None assert result.entities["test_name"].value == "test" def test_response_key() -> None: """Check response key in intent data""" yaml_text = """ language: "en" intents: TestIntent: data: - sentences: - "this is a test" response: "test_response" """ with io.StringIO(yaml_text) as test_file: intents = Intents.from_yaml(test_file) result = recognize("this is a test", intents) assert result is not None assert result.response == "test_response" def test_entity_text() -> None: """Ensure original text is returned as well as substituted list value""" yaml_text = """ language: "en" intents: TestIntent: data: - sentences: - "run test {name} [now]" - "{name} test" lists: name: values: - in: "alpha " out: "A" """ with io.StringIO(yaml_text) as test_file: intents = Intents.from_yaml(test_file) for sentence in ("run test alpha, now", "run test alpha!", "alpha test"): result = recognize(sentence, intents) assert result is not None, sentence assert result.entities["name"].value == "A" assert result.entities["name"].text_clean == "alpha" def test_number_text() -> None: """Ensure original text is returned as well as substituted number""" yaml_text = """ language: "en" intents: TestIntent: data: - sentences: - "set {percentage}[%] [now]" - "{percentage}[%] set" lists: percentage: range: from: 0 to: 100 """ with io.StringIO(yaml_text) as test_file: intents = Intents.from_yaml(test_file) for sentence in ("set 50% now", "set 50%", "50% set"): result = recognize(sentence, intents) assert result is not None, sentence assert result.entities["percentage"].value == 50 assert result.entities["percentage"].text.strip() == "50" def test_recognize_all() -> None: """Test recognize_all method for returning all matches.""" yaml_text = """ language: "en" intents: TestIntent1: data: - sentences: - "run test" TestIntent2: data: - sentences: - "run test" """ with io.StringIO(yaml_text) as test_file: intents = Intents.from_yaml(test_file) results = list(recognize_all("run test", intents)) assert len(results) == 2 assert {result.intent.name for result in results} == { "TestIntent1", "TestIntent2", } def test_ignore_whitespace() -> None: """Test option to ignore whitespace during matching.""" yaml_text = """ language: "en" settings: ignore_whitespace: true intents: TestIntent1: data: - sentences: - "run [the] test" """ with io.StringIO(yaml_text) as test_file: intents = Intents.from_yaml(test_file) for sentence in ("runtest", "runthetest", "r u n t h e t e s t"): result = recognize(sentence, intents) assert result is not None, sentence def test_skip_words_ignore_whitespace() -> None: """Test option to ignore whitespace with skip words during matching.""" yaml_text = """ language: "en" settings: ignore_whitespace: true intents: TestIntent1: data: - sentences: - "ad" skip_words: - "bc" """ with io.StringIO(yaml_text) as test_file: intents = Intents.from_yaml(test_file) result = recognize("abcd", intents) assert result is not None def test_local_expansion_rules() -> None: """Test local expansion rules, defined at the intent level""" yaml_text = """ language: "en" intents: GetSmokeState: data: - expansion_rules: verb: "(are|is)" subject: "[all] [the] light[s]" state: "on" location: "[in ]" sentences: - " " - " " expansion_rules: area: "[the] {area}" lists: area: values: - kitchen """ with io.StringIO(yaml_text) as test_file: intents = Intents.from_yaml(test_file) for sentence in ( "are the lights on in the kitchen", "are the lights in the kitchen on", ): result = recognize(sentence, intents) assert result is not None, sentence assert result.intent.name == "GetSmokeState" def test_local_slot_lists() -> None: """Test local slot lists, defined at the intent level""" yaml_text = """ language: "en" intents: PlayTrackAtVolume: data: - sentences: - "play {track} at {volume}[%| percent] volume" lists: track: wildcard: true lists: volume: range: from: 1 to: 100 """ with io.StringIO(yaml_text) as test_file: intents = Intents.from_yaml(test_file) for sentence in ("play paint it black at 90% volume",): result = recognize(sentence, intents) assert result is not None, sentence assert result.intent.name == "PlayTrackAtVolume" track = result.entities.get("track") volume = result.entities.get("volume") assert isinstance(track, MatchEntity) assert track.value == "paint it black" assert isinstance(volume, MatchEntity) assert volume.value == 90 def test_unmatched_entity() -> None: """Test allow_unmatched_entities option to provide better feedback.""" yaml_text = """ language: "en" intents: Test: data: - sentences: - "set [all] {domain} in {area} to {percent}[%] now" - "set {area} {domain} to {percent}" lists: area: values: - kitchen - bedroom domain: values: - lights percent: range: type: percentage from: 0 to: 100 """ with io.StringIO(yaml_text) as test_file: intents = Intents.from_yaml(test_file) sentence = "set fans in living room to 101% now" # Should fail without unmatched entities enabled result = recognize(sentence, intents, allow_unmatched_entities=False) assert result is None, f"{sentence} should not match" # Should succeed now result = recognize(sentence, intents, allow_unmatched_entities=True) assert result is not None, f"{sentence} should match" assert set(result.unmatched_entities.keys()) == {"domain", "area", "percent"} domain = result.unmatched_entities["domain"] assert isinstance(domain, UnmatchedTextEntity) assert domain.text == "fans " area = result.unmatched_entities["area"] assert isinstance(area, UnmatchedTextEntity) assert area.text == "living room " percent = result.unmatched_entities["percent"] assert isinstance(percent, UnmatchedRangeEntity) assert percent.value == 101 sentence = "set all lights in kitchen to blah blah blah now" result = recognize(sentence, intents, allow_unmatched_entities=True) assert result is not None, f"{sentence} should match" assert set(result.unmatched_entities.keys()) == {"percent"} percent = result.unmatched_entities["percent"] assert isinstance(percent, UnmatchedTextEntity) assert percent.text == "blah blah blah " # Test with unmatched entity at end of sentence sentence = "set kitchen lights to nothing" result = recognize(sentence, intents, allow_unmatched_entities=True) assert result is not None, f"{sentence} should match" assert set(result.unmatched_entities.keys()) == {"percent"} percent = result.unmatched_entities["percent"] assert isinstance(percent, UnmatchedTextEntity) assert percent.text == "nothing" def test_unmatched_range_only() -> None: """Test allow_unmatched_entities option with an out-of-range value only.""" yaml_text = """ language: "en" intents: Test: data: - sentences: - "set {domain} to {percent}[%]" lists: domain: values: - lights percent: range: type: percentage from: 0 to: 100 """ with io.StringIO(yaml_text) as test_file: intents = Intents.from_yaml(test_file) sentence = "set lights to 1001%" # Should fail without unmatched entities enabled result = recognize(sentence, intents, allow_unmatched_entities=False) assert result is None, f"{sentence} should not match" # Should succeed now result = recognize(sentence, intents, allow_unmatched_entities=True) assert result is not None, f"{sentence} should match" assert set(result.entities.keys()) == {"domain"} assert set(result.unmatched_entities.keys()) == {"percent"} domain = result.entities["domain"] assert domain.text == "lights" percent = result.unmatched_entities["percent"] assert isinstance(percent, UnmatchedRangeEntity) assert percent.value == 1001 def test_no_empty_unmatched_entity() -> None: """Test that unmatched entities are not empty.""" yaml_text = """ language: "en" intents: Test: data: - sentences: - "turn on {name}[ please]" - "illuminate all[ {area}] lights" - "activate {name} now" lists: name: values: - light area: values: - bedroom """ with io.StringIO(yaml_text) as test_file: intents = Intents.from_yaml(test_file) sentence = "turn on " results = list(recognize_all(sentence, intents, allow_unmatched_entities=True)) assert not results, f"{sentence} should not match" # With optional word at end sentence = "turn on please" results = list(recognize_all(sentence, intents, allow_unmatched_entities=True)) assert not results, f"{sentence} should not match" sentence = "illuminate all lights" results = list(recognize_all(sentence, intents, allow_unmatched_entities=True)) assert results, f"{sentence} should match" assert len(results) == 1, "Only 1 result expected" assert not results[0].unmatched_entities, "No unmatched entities expected" sentence = "illuminate all kitchen lights" results = list(recognize_all(sentence, intents, allow_unmatched_entities=True)) assert results, f"{sentence} should match" assert len(results) == 1, "Only 1 result expected" result = results[0] assert set(result.unmatched_entities.keys()) == {"area"} area = result.unmatched_entities["area"] assert isinstance(area, UnmatchedTextEntity) assert area.text == "kitchen " # With required word at end sentence = "activate now" results = list(recognize_all(sentence, intents, allow_unmatched_entities=True)) assert not results, f"{sentence} should not match" def test_unmatched_entity_context() -> None: """Test that unmatched entities work with requires/excludes context.""" yaml_text = """ language: "en" intents: Test: data: - sentences: - "open {name}" requires_context: domain: cover lists: name: values: - garage door """ with io.StringIO(yaml_text) as test_file: intents = Intents.from_yaml(test_file) sentence = "open garage door" # Should fail when unmatched entities aren't allowed result = recognize(sentence, intents, allow_unmatched_entities=False) assert result is None, f"{sentence} should not match" # Should succeed now with an unmatched domain entity result = recognize(sentence, intents, allow_unmatched_entities=True) assert result is not None, f"{sentence} should match" assert set(result.unmatched_entities.keys()) == {"domain"} domain = result.unmatched_entities["domain"] assert isinstance(domain, UnmatchedTextEntity) assert domain.text == MISSING_ENTITY # Now both entities are unmatched sentence = "open back door" result = recognize(sentence, intents, allow_unmatched_entities=True) assert result is not None, f"{sentence} should match" assert set(result.unmatched_entities.keys()) == {"domain", "name"} domain = result.unmatched_entities["domain"] assert isinstance(domain, UnmatchedTextEntity) assert domain.text == MISSING_ENTITY name = result.unmatched_entities["name"] assert isinstance(name, UnmatchedTextEntity) assert name.text == "back door" def test_unmatched_slot_name() -> None: """Test that unmatched entities use slot name instead of list name.""" yaml_text = """ language: "en" intents: Test: data: - sentences: - "run {script_name:name}" - "execute script {script_number:number}" lists: script_name: values: - stealth mode script_number: range: from: 1 to: 100 """ with io.StringIO(yaml_text) as test_file: intents = Intents.from_yaml(test_file) sentence = "run missing name" result = recognize(sentence, intents, allow_unmatched_entities=True) assert result is not None, f"{sentence} should match" assert set(result.unmatched_entities.keys()) == {"name"} sentence = "execute script wrong number" result = recognize(sentence, intents, allow_unmatched_entities=True) assert result is not None, f"{sentence} should match" assert set(result.unmatched_entities.keys()) == {"number"} # Outside range sentence = "execute script 0" result = recognize(sentence, intents, allow_unmatched_entities=True) assert result is not None, f"{sentence} should match" assert set(result.unmatched_entities.keys()) == {"number"} def test_unmatched_entity_stops_at_optional() -> None: """Test that unmatched entities do not cross optional text chunks.""" yaml_text = """ language: "en" intents: Test: data: - sentences: - "set {area} [to] brightness " - "set {name} [to] brightness " lists: name: values: - lamp area: values: - kitchen brightness_pct: range: type: percentage from: 0 to: 100 expansion_rules: brightness: "{brightness_pct}[%| percent]" """ with io.StringIO(yaml_text) as test_file: intents = Intents.from_yaml(test_file) sentence = "set unknown thing to brightness 100%" # Should fail without unmatched entities enabled result = recognize(sentence, intents, allow_unmatched_entities=False) assert result is None, f"{sentence} should not match" results = list(recognize_all(sentence, intents, allow_unmatched_entities=True)) assert len(results) == 4 area_names: Set[str] = set() entity_names: Set[str] = set() for result in results: assert len(result.unmatched_entities) == 1 area_entity = result.unmatched_entities.get("area") if area_entity is not None: assert isinstance(area_entity, UnmatchedTextEntity) area_names.add(area_entity.text) else: name_entity = result.unmatched_entities.get("name") assert name_entity is not None assert isinstance(name_entity, UnmatchedTextEntity) entity_names.add(name_entity.text) assert area_names == {"unknown thing ", "unknown thing to "} assert entity_names == {"unknown thing ", "unknown thing to "} def test_unmatched_entities_dont_share_text() -> None: """Test that text only goes into one unmatched entity.""" yaml_text = """ language: "en" intents: Test: data: - sentences: - "set [the] brightness [of] {name} [to] " lists: name: values: - lamp brightness_pct: range: type: percentage from: 0 to: 100 expansion_rules: brightness: "{brightness_pct}[%| percent]" """ with io.StringIO(yaml_text) as test_file: intents = Intents.from_yaml(test_file) sentence = "set brightness of unknown thing to 100%" # Should fail without unmatched entities enabled result = recognize(sentence, intents, allow_unmatched_entities=False) assert result is None, f"{sentence} should not match" results = list(recognize_all(sentence, intents, allow_unmatched_entities=True)) assert len(results) == 2 possible_names: Set[str] = set() for result in results: assert len(result.unmatched_entities) == 1 assert "name" in result.unmatched_entities name_entity = result.unmatched_entities["name"] assert isinstance(name_entity, UnmatchedTextEntity) possible_names.add(name_entity.text) assert possible_names == {"unknown thing ", "of unknown thing "} def test_unmatched_entities_cant_skip_words() -> None: yaml_text = """ language: "en" intents: Test: data: - sentences: - "[turn] {name} [to] on" lists: name: values: - lamp """ with io.StringIO(yaml_text) as test_file: intents = Intents.from_yaml(test_file) sentence = "turn on unknown thing" # Should fail without unmatched entities enabled result = recognize(sentence, intents, allow_unmatched_entities=False) assert result is None, f"{sentence} should not match" # Should also fail with unmatched entities enabled results = list(recognize_all(sentence, intents, allow_unmatched_entities=True)) assert len(results) == 0 def test_unmatched_entities_text_chunks_matched() -> None: yaml_text = """ language: "en" intents: Test: data: - sentences: - "[turn] on {name}" - "[turn] on {name} light" lists: name: values: - test """ with io.StringIO(yaml_text) as test_file: intents = Intents.from_yaml(test_file) sentence = "turn on unknown light" results = list(recognize_all(sentence, intents, allow_unmatched_entities=True)) assert len(results) == 2 # '[turn] on {name} light' should have more literal text matched result_1, result_2 = results assert result_1.intent_sentence is not None if result_1.intent_sentence.text == "[turn] on {name}": assert result_1.text_chunks_matched < result_2.text_chunks_matched else: assert result_2.text_chunks_matched < result_1.text_chunks_matched def test_wildcard() -> None: """Test wildcard slot lists/entities.""" yaml_text = """ language: "en" intents: Test: data: - sentences: - "play {album} by {artist}[ please] now" - "start {album} by {artist}" - "begin {album} by artist {artist}" lists: album: wildcard: true artist: wildcard: true """ with io.StringIO(yaml_text) as test_file: intents = Intents.from_yaml(test_file) # Case should be kept sentence = "play The White Album by The Beatles please now" result = recognize(sentence, intents) assert result is not None, f"{sentence} should match" assert set(result.entities.keys()) == {"album", "artist"} assert result.entities["album"].value == "The White Album" assert result.entities["album"].is_wildcard assert result.entities["artist"].value == "The Beatles" assert result.entities["artist"].is_wildcard # Wildcards cannot be empty sentence = "play by please now" result = recognize(sentence, intents) assert result is None, f"{sentence} should not match" # Test without text at the end sentence = "start the white album by the beatles." result = recognize(sentence, intents) assert result is not None, f"{sentence} should match" assert set(result.entities.keys()) == {"album", "artist"} assert result.entities["album"].value == "the white album" assert result.entities["album"].is_wildcard assert result.entities["artist"].value == "the beatles" assert result.entities["artist"].is_wildcard # Test use of next word in wildcard sentence = "play day by day by taken by trees now" results = list(recognize_all(sentence, intents)) assert results, f"{sentence} should match" assert len(results) == 3 # 3 "by" words # Verify each combination of album/artist is present album_artist = { (result.entities["album"].value, result.entities["artist"].value) for result in results } assert album_artist == { ("day", "day by taken by trees"), ("day by day", "taken by trees"), ("day by day by taken", "trees"), } for result in results: assert result.entities["album"].is_wildcard assert result.entities["artist"].is_wildcard # Test use of next word at end of word in wildcard sentence = "play Moby by Moby please now" result = recognize(sentence, intents) assert result is not None, f"{sentence} should match" assert set(result.entities.keys()) == {"album", "artist"} assert result.entities["album"].value == "Moby" assert result.entities["album"].is_wildcard assert result.entities["artist"].value == "Moby" assert result.entities["artist"].is_wildcard # Add "artist" word sentence = "begin day by day by artist taken by trees" result = recognize(sentence, intents) assert result is not None, f"{sentence} should match" assert set(result.entities.keys()) == {"album", "artist"} assert result.entities["album"].value == "day by day" assert result.entities["album"].is_wildcard assert result.entities["artist"].value == "taken by trees" assert result.entities["artist"].is_wildcard def test_wildcard_degenerate() -> None: """Test degenerate case for wildcards.""" yaml_text = """ language: "en" intents: Test: data: - sentences: - "play {album} by {artist}" lists: album: wildcard: true artist: wildcard: true """ with io.StringIO(yaml_text) as test_file: intents = Intents.from_yaml(test_file) sentence = "play by by by by by" results = list(recognize_all(sentence, intents)) assert results, f"{sentence} should match" assert len(results) == 3 # 3 valid splits # Verify each combination album_artist = { (result.entities["album"].value, result.entities["artist"].value) for result in results } assert album_artist == { ("by", "by by by"), ("by by", "by by"), ("by by by", "by"), } def test_optional_wildcard() -> None: """Test optional wildcard slot list.""" yaml_text = """ language: "en" intents: Test: data: - sentences: - "play {album}[by {artist}]" lists: album: wildcard: true artist: wildcard: true """ with io.StringIO(yaml_text) as test_file: intents = Intents.from_yaml(test_file) # With all wildcards sentence = "play the white album by the beatles" result = recognize(sentence, intents) assert result is not None, f"{sentence} should match" assert set(result.entities.keys()) == {"album", "artist"} assert result.entities["album"].value == "the white album" assert result.entities["artist"].value == "the beatles" # Missing one wildcard sentence = "play the white album" result = recognize(sentence, intents) assert result is not None, f"{sentence} should match" assert set(result.entities.keys()) == {"album"} assert result.entities["album"].value == "the white album" def test_wildcard_slot_name() -> None: """Test wildcard uses slot instead of list name.""" yaml_text = """ language: "en" intents: Test: data: - sentences: - "run {script_name:name}" lists: script_name: wildcard: true """ with io.StringIO(yaml_text) as test_file: intents = Intents.from_yaml(test_file) sentence = "run script 1" result = recognize(sentence, intents) assert result is not None, f"{sentence} should match" assert set(result.entities.keys()) == {"name"} assert result.entities["name"].value == "script 1" def test_wildcard_ordering() -> None: """Test wildcard ordering by number of literal text chunks.""" yaml_text = """ language: "en" intents: Test: data: - sentences: - "play {album} by {artist}" - "play {album} by {artist} in {room}" lists: album: wildcard: true artist: wildcard: true room: wildcard: true """ with io.StringIO(yaml_text) as test_file: intents = Intents.from_yaml(test_file) sentence = "play the white album by the beatles in the living room" result = recognize(sentence, intents) assert result is not None, f"{sentence} should match" assert set(result.entities.keys()) == {"album", "artist", "room"} assert result.entities["album"].value == "the white album" assert result.entities["artist"].value == "the beatles" assert result.entities["room"].value == "the living room" # Check that the first sentence can still be used sentence = "play the white album by the beatles" result = recognize(sentence, intents) assert result is not None, f"{sentence} should match" assert set(result.entities.keys()) == {"album", "artist"} assert result.entities["album"].value == "the white album" assert result.entities["artist"].value == "the beatles" def test_ordering_only_wildcards() -> None: """Test that re-ordering only affects wildcards.""" yaml_text = """ language: "en" intents: Test: data: - sentences: - "turn on {light} in {room}" - "turn on {light}" lists: light: values: - light - light in bedroom room: values: - bedroom """ with io.StringIO(yaml_text) as test_file: intents = Intents.from_yaml(test_file) sentence = "turn on light in bedroom" result = recognize(sentence, intents) assert result is not None, f"{sentence} should match" assert set(result.entities.keys()) == {"light", "room"} assert result.entities["light"].value == "light" assert result.entities["room"].value == "bedroom" def test_wildcard_punctuation() -> None: """Test that wildcards do not include punctuation.""" yaml_text = """ language: "en" intents: Test: data: - sentences: - "is {name} in {zone}" lists: name: wildcard: true zone: wildcard: true """ with io.StringIO(yaml_text) as test_file: intents = Intents.from_yaml(test_file) sentence = "is Alice in New York!?" result = recognize(sentence, intents) assert result is not None, f"{sentence} should match" assert set(result.entities.keys()) == {"name", "zone"} assert result.entities["name"].value == "Alice" assert result.entities["zone"].value == "New York" def test_wildcard_inside_word() -> None: """Test wildcard inside of a word.""" yaml_text = """ language: "en" intents: Test: data: - sentences: - "ab{test}cd" lists: test: wildcard: true """ with io.StringIO(yaml_text) as test_file: intents = Intents.from_yaml(test_file) sentence = "ab123cd" result = recognize(sentence, intents) assert result is not None, f"{sentence} should match" assert set(result.entities.keys()) == {"test"} assert result.entities["test"].value == "123" assert result.entities["test"].is_wildcard def test_wildcard_outside_word() -> None: """Test wildcard outside of a word.""" yaml_text = """ language: "en" intents: Test: data: - sentences: - "ab{test} cd" lists: test: wildcard: true """ with io.StringIO(yaml_text) as test_file: intents = Intents.from_yaml(test_file) sentence = "ab123 cd" result = recognize(sentence, intents) assert result is not None, f"{sentence} should match" assert set(result.entities.keys()) == {"test"} assert result.entities["test"].value == "123" assert result.entities["test"].is_wildcard sentence = "ab123cd" result = recognize(sentence, intents) assert result is None, f"{sentence} should not match" def test_wildcard_outside_word_ignore_whitespace() -> None: """Test wildcard outside of a word when ignoring whitespace.""" yaml_text = """ language: "en" settings: ignore_whitespace: true intents: Test: data: - sentences: - "ab{test} cd" lists: test: wildcard: true """ with io.StringIO(yaml_text) as test_file: intents = Intents.from_yaml(test_file) for sentence in ("abc123 cd", "abc123cd"): result = recognize(sentence, intents) assert result is not None, f"{sentence} should match" def test_entity_metadata() -> None: """Ensure metadata is returned for text slots""" yaml_text = """ language: "en" intents: TestIntent: data: - sentences: - "run test {name} [now]" - "{name} test" lists: name: values: - in: "alpha " out: "A" metadata: is_alpha: true """ with io.StringIO(yaml_text) as test_file: intents = Intents.from_yaml(test_file) for sentence in ("run test alpha, now", "run test alpha!", "alpha test"): result = recognize(sentence, intents) assert result is not None, sentence assert result.entities["name"].value == "A" assert result.entities["name"].text_clean == "alpha" assert result.entities["name"].metadata == {"is_alpha": True} def test_sentence_metadata() -> None: """Test that metadata attached to sentences is passed through to the result.""" yaml_text = """ language: "en" intents: Test: data: - sentences: - "this is a test" metadata: string_key: "test value" int_key: 1234 """ with io.StringIO(yaml_text) as test_file: intents = Intents.from_yaml(test_file) sentence = "this is a test" result = recognize(sentence, intents) assert result is not None, f"{sentence} should match" assert result.intent_metadata is not None, "No metadata" assert result.intent_metadata == {"string_key": "test value", "int_key": 1234} def test_digits_calc() -> None: """Test that metadata attached to sentences is passed through to the result.""" yaml_text = """ language: "en" intents: Calculate: data: - sentences: - "calc[ulate] {x} {operator} {y}" lists: operator: values: - in: "(+|plus)" out: "+" x: range: from: 0 to: 100 digits: true words: true y: range: from: 0 to: 100 digits: true words: true """ with io.StringIO(yaml_text) as test_file: intents = Intents.from_yaml(test_file) sentence = "calc 1 + 2" result = recognize(sentence, intents) assert result is not None, f"{sentence} should match" assert result.entities.keys() == {"x", "operator", "y"} assert result.entities["x"].value == 1 assert result.entities["operator"].value == "+" assert result.entities["y"].value == 2 sentence = "calc 1 plus two" result = recognize(sentence, intents) assert result is not None, f"{sentence} should match" assert result.entities.keys() == {"x", "operator", "y"} assert result.entities["x"].value == 1 assert result.entities["operator"].value == "+" assert result.entities["y"].value == 2 def test_range_params_calc() -> None: """Test that params attached to RangeSlotList affect the parsing.""" yaml_text = """ language: "en" intents: Calculate: data: - sentences: - "calc[ulate] {x} {operator} {y}" lists: operator: values: - in: "(+|plus)" out: "+" x: range: from: 0 to: 100 digits: false words: true y: range: from: 0 to: 100 digits: true words: false """ with io.StringIO(yaml_text) as test_file: intents = Intents.from_yaml(test_file) # x can't have digits sentence = "calc 1 + 2" result = recognize(sentence, intents) assert result is None, f"{sentence} should not match" # y can't have words sentence = "calc one plus two" result = recognize(sentence, intents) assert result is None, f"{sentence} should not match" sentence = "calc one + 2" result = recognize(sentence, intents) assert result is not None, f"{sentence} should match" assert result.entities.keys() == {"x", "operator", "y"} assert result.entities["x"].value == 1 assert result.entities["operator"].value == "+" assert result.entities["y"].value == 2 def test_range_rule_sets_calc() -> None: """Test that params attached to RangeSlotList affect the parsing.""" # https://github.com/rhasspy/unicode-rbnf/blob/master/unicode_rbnf/engine.py#L13 yaml_text = """ language: "en" intents: Calculate: data: - sentences: - "calc[ulate] {x} {operator} {y}" lists: operator: values: - in: "(+|plus)" out: "+" x: range: from: 0 to: 3000 digits: true words: true y: range: from: 0 to: 3000 digits: true words: true """ with io.StringIO(yaml_text) as test_file: intents = Intents.from_yaml(test_file) sentence = "calc one thousand nine hundred ninety-nine + 23" result = recognize(sentence, intents) assert result is not None, f"{sentence} should match" assert result.entities.keys() == {"x", "operator", "y"} assert result.entities["x"].value == 1999 assert result.entities["operator"].value == "+" assert result.entities["y"].value == 23 sentence = "calc 23 + nineteen ninety-nine" result = recognize(sentence, intents) assert result is None, f"{sentence} should not match" # pylint: disable=redefined-outer-name def test_context_dict(intents, slot_lists): yaml_text = """ language: "en" intents: TestIntent: data: - sentences: - "test sentence" requires_context: slot1: value: value1 slot: true excludes_context: slot2: value2 """ with io.StringIO(yaml_text) as test_file: intents = Intents.from_yaml(test_file) slot1 = {"value": "value1", "text": "Value One"} # Try includes context result = recognize( "test sentence", intents, slot_lists=slot_lists, intent_context={"slot1": slot1}, ) assert result is not None assert result.context.keys() == {"slot1"} assert result.context["slot1"] == slot1 assert result.entities.keys() == {"slot1"} assert result.entities["slot1"].value == "value1" assert result.entities["slot1"].text == "Value One" # Try excludes context result = recognize( "test sentence", intents, slot_lists=slot_lists, intent_context={"slot1": slot1, "slot2": {"value": "value2"}}, ) assert result is None # pylint: disable=redefined-outer-name def test_range_multiplier(intents, slot_lists): yaml_text = """ language: "en" intents: SetVolume: data: - sentences: - "set volume to {volume_level}" lists: volume_level: range: from: 0 to: 100 multiplier: 0.01 """ with io.StringIO(yaml_text) as test_file: intents = Intents.from_yaml(test_file) result = recognize("set volume to 50", intents) assert result is not None assert result.entities.keys() == {"volume_level"} assert result.entities["volume_level"].value == 0.5 assert result.entities["volume_level"].text == "50" def test_recognize_best(): yaml_text = """ language: "en" intents: TurnOn: data: - sentences: - "{anything} lamp" metadata: best_key: "best value" - sentences: - "turn on {area} lamp" - "turn on {name}" lists: area: values: - bedroom name: values: - bedroom lamp anything: wildcard: true """ with io.StringIO(yaml_text) as test_file: intents = Intents.from_yaml(test_file) # Should match the sentence with the wildcard slot because it's listed first. result = recognize("turn on bedroom lamp", intents) # not best assert result is not None assert result.entities.keys() == {"anything"} # Should match the sentence with the wildcard slot because of its metadata. result = recognize_best( "turn on bedroom lamp", intents, best_metadata_key="best_key" ) assert result is not None assert result.entities.keys() == {"anything"} # Should match the sentence with the "area" slot because it has the most # literal text matched. result = recognize_best("turn on bedroom lamp", intents) assert result is not None assert result.entities.keys() == {"area"} # Should match the sentence with the "name" slot because it's a priority result = recognize_best("turn on bedroom lamp", intents, best_slot_name="name") assert result is not None assert result.entities.keys() == {"name"} assert result.entities["name"].value == "bedroom lamp" def test_regex_branching(): yaml_text = """ language: "en" intents: TurnOn: data: - sentences: - "turn on ({area} {name}|{name})" lists: area: values: - bedroom name: values: - bedroom lamp - lamp """ with io.StringIO(yaml_text) as test_file: intents = Intents.from_yaml(test_file) results = list(recognize_all("turn on bedroom lamp", intents)) assert len(results) == 2 def test_commas_dont_change() -> None: """Ensure commas don't change the interpretation of a sentence.""" yaml_text = """ language: "en" intents: TurnOn: data: - sentences: - "turn on [the] {name}" - "turn on [the] {area} lights" lists: name: values: - lamp area: values: - living room """ with io.StringIO(yaml_text) as test_file: intents = Intents.from_yaml(test_file) for sentence in ( "turn on the living room lights", "turn, on the, living room lights", "turn on, the living room lights", "turn on the, living room lights", "turn on the living room, lights", ): result = recognize(sentence, intents) assert result is not None, sentence assert result.entities.keys() == {"area"} def test_wildcard_then_other_stuff() -> None: """Test wildcard followed by expansion rule and list.""" yaml_text = """ language: "en" intents: SetTimer: data: - sentences: - "set timer {timer_name:name} " - "set timer {timer_name:name} {timer_state:state} [now]" AddItem: data: - sentences: - "add {item} [to [my]] {todo_list}" lists: timer_name: wildcard: true item: wildcard: true minutes: range: from: 1 to: 59 timer_state: values: - "on" - "off" todo_list: values: - "shopping list" expansion_rules: timer_duration: "{minutes} minute[s]" """ with io.StringIO(yaml_text) as test_file: intents = Intents.from_yaml(test_file) # Check ranges for sentence in ("set timer pizza 5 minutes", "set timer pizza five minutes"): result = recognize(sentence, intents) assert result is not None, f"{sentence} should match" assert set(result.entities.keys()) == {"name", "minutes"} assert result.entities["name"].text == "pizza" assert result.entities["name"].value == "pizza" assert result.entities["minutes"].text.strip() in {"5", "five"} assert result.entities["minutes"].value == 5 # Check value list sentence = "set timer a big long name on now" result = recognize(sentence, intents) assert result is not None, f"{sentence} should match" assert set(result.entities.keys()) == {"name", "state"} assert result.entities["name"].text == "a big long name" assert result.entities["name"].value == "a big long name" assert result.entities["state"].text == "on" assert result.entities["state"].value == "on" sentence = "add apples to my shopping list" result = recognize(sentence, intents) assert result is not None, f"{sentence} should match" assert set(result.entities.keys()) == {"item", "todo_list"} assert result.entities["item"].text == "apples" assert result.entities["item"].value == "apples" assert result.entities["todo_list"].text == "shopping list" assert result.entities["todo_list"].value == "shopping list" def test_range_list_with_one_number() -> None: """Test a range list with the same start/stop value.""" yaml_text = """ language: "en" intents: TestIntent: data: - sentences: - "test {value}" lists: value: range: from: 1 to: 1 """ with io.StringIO(yaml_text) as test_file: intents = Intents.from_yaml(test_file) # Check ranges assert recognize("test 1", intents) assert not recognize("test 2", intents) def test_range_list_with_halves() -> None: """Test a range list with fractions (1/2).""" yaml_text = """ language: "en" intents: TestIntent: data: - sentences: - "test {value}" lists: value: range: from: 1 to: 2 fractions: halves """ with io.StringIO(yaml_text) as test_file: intents = Intents.from_yaml(test_file) assert recognize("test 1", intents) result = recognize("test 1.5", intents) assert result is not None value = result.entities.get("value") assert value is not None assert value.value == 1.5 assert value.text == "1.5" assert value.text_clean == "1.5" result = recognize("test one point five", intents, language="en") assert result is not None value = result.entities.get("value") assert value is not None assert value.value == 1.5 assert value.text == "one point five" assert value.text_clean == "one point five" result = recognize("test 2.0", intents) assert result is not None value = result.entities.get("value") assert value is not None assert value.value == 2 assert value.text == "2.0" assert value.text_clean == "2.0" result = recognize("test 2.5", intents) assert result is not None value = result.entities.get("value") assert value is not None assert value.value == 2.5 assert value.text == "2.5" assert value.text_clean == "2.5" # Only halves assert not recognize("test 2.1", intents) # Comma separator result = recognize("test 2,5", intents) assert result is not None value = result.entities.get("value") assert value is not None assert value.value == 2.5 assert value.text == "2,5" assert value.text_clean == "2,5" def test_range_list_with_tenths() -> None: """Test a range list with fractions (1/10).""" yaml_text = """ language: "en" intents: TestIntent: data: - sentences: - "test {value}" lists: value: range: from: 1 to: 2 fractions: tenths """ with io.StringIO(yaml_text) as test_file: intents = Intents.from_yaml(test_file) assert recognize("test 1", intents) assert recognize("test 2", intents) for integer in range(1, 3): for tenth in range(1, 10): value_str = f"{integer}.{tenth}" result = recognize(f"test {value_str}", intents) assert result is not None value = result.entities.get("value") assert value is not None assert value.value == float(value_str) assert value.text == value_str assert value.text_clean == value_str result = recognize("test one point one", intents, language="en") assert result is not None value = result.entities.get("value") assert value is not None assert value.value == 1.1 assert value.text == "one point one" result = recognize("test two point six", intents, language="en") assert result is not None value = result.entities.get("value") assert value is not None assert value.value == 2.6 assert value.text == "two point six" # Only tenths assert not recognize("test 2.12", intents) def test_range_lists_separated_by_punctuation() -> None: """Test range lists separated by punctuation.""" yaml_text = """ language: "en" intents: TestIntent: data: - sentences: - "test {value1}.{value2}" lists: value1: range: from: 0 to: 1 value2: range: from: 0 to: 2 """ with io.StringIO(yaml_text) as test_file: intents = Intents.from_yaml(test_file) result = recognize("test 1.2", intents) assert result is not None value1 = result.entities.get("value1") assert value1 is not None assert value1.value == 1 value2 = result.entities.get("value2") assert value2 is not None assert value2.value == 2 def test_range_lists_separated_by_punctuation_with_wildcard() -> None: """Test range lists separated by punctuation preceeded by a wildcard.""" yaml_text = """ language: "en" intents: TestIntent: data: - sentences: - "test {anything} {value1}.{value2}" lists: anything: wildcard: true value1: range: from: 0 to: 1 value2: range: from: 0 to: 2 """ with io.StringIO(yaml_text) as test_file: intents = Intents.from_yaml(test_file) result = recognize("test for the big 1.2", intents) assert result is not None anything = result.entities.get("anything") assert anything is not None assert anything.value == "for the big" value1 = result.entities.get("value1") assert value1 is not None assert value1.value == 1 value2 = result.entities.get("value2") assert value2 is not None assert value2.value == 2 def test_list_value_in_no_out() -> None: """Test list values with "in" but no "out".""" yaml_text = """ language: "en" intents: TestIntent: data: - sentences: - "test {value1}" - sentences: - "also test {value2}" lists: value2: values: - g[h]i - j[k]l lists: value1: values: - a[b]c - in: d[e]f """ with io.StringIO(yaml_text) as test_file: intents = Intents.from_yaml(test_file) for value in ("abc", "ac", "def", "df"): result = recognize(f"test {value}", intents) assert result is not None, value assert "value1" in result.entities assert result.entities["value1"].value == value for value in ("ghi", "gi", "jkl", "jl"): result = recognize(f"also test {value}", intents) assert result is not None, value assert "value2" in result.entities assert result.entities["value2"].value == value hassil-3.0.1/tests/test_sample.py000066400000000000000000000103711477332007300170420ustar00rootroot00000000000000from hassil import parse_sentence from hassil.intents import RangeFractionType, RangeSlotList, TextSlotList from hassil.sample import sample_sentence def test_text_chunk(): assert set(sample_sentence(parse_sentence("this is a test"))) == {"this is a test"} def test_group(): assert set(sample_sentence(parse_sentence("this (is a) test"))) == { "this is a test" } def test_optional(): assert set(sample_sentence(parse_sentence("turn on [the] light[s]"))) == { "turn on light", "turn on lights", "turn on the light", "turn on the lights", } def test_double_optional(): assert set(sample_sentence(parse_sentence("turn [on] [the] light[s]"))) == { "turn light", "turn lights", "turn on light", "turn on lights", "turn the light", "turn the lights", "turn on the light", "turn on the lights", } def test_alternative(): assert set(sample_sentence(parse_sentence("this is (the | a) test"))) == { "this is a test", "this is the test", } def test_list(): sentence = parse_sentence("turn off {area}") areas = TextSlotList.from_strings(["kitchen", "living room"]) assert set(sample_sentence(sentence, slot_lists={"area": areas})) == { "turn off kitchen", "turn off living room", } def test_list_range(): sentence = parse_sentence("run test {num}") num_list = RangeSlotList(name=None, start=1, stop=3) assert set(sample_sentence(sentence, slot_lists={"num": num_list})) == { "run test 1", "run test 2", "run test 3", } def test_list_range_missing_language(): sentence = parse_sentence("run test {num}") num_list = RangeSlotList(name=None, start=1, stop=3, words=True) # Range slot digits cannot be converted to words without a language available. assert set(sample_sentence(sentence, slot_lists={"num": num_list})) == { "run test 1", "run test 2", "run test 3", } def test_list_range_words(): sentence = parse_sentence("run test {num}") num_list = RangeSlotList(name=None, start=1, stop=3, words=True) assert set( sample_sentence(sentence, slot_lists={"num": num_list}, language="en") ) == { "run test 1", "run test one", "run test 2", "run test two", "run test 3", "run test three", } def test_list_range_halves_words(): sentence = parse_sentence("run test {num}") num_list = RangeSlotList( name=None, start=1, stop=1, fraction_type=RangeFractionType.HALVES, words=True ) assert set( sample_sentence(sentence, slot_lists={"num": num_list}, language="en") ) == { "run test 1", "run test one", "run test 1.5", "run test one point five", } def test_list_range_tenths_words(): sentence = parse_sentence("run test {num}") num_list = RangeSlotList( name=None, start=1, stop=1, fraction_type=RangeFractionType.TENTHS, words=True ) assert set( sample_sentence(sentence, slot_lists={"num": num_list}, language="en") ) == { "run test 1", "run test one", "run test 1.1", "run test one point one", "run test 1.2", "run test one point two", "run test 1.3", "run test one point three", "run test 1.4", "run test one point four", "run test 1.5", "run test one point five", "run test 1.6", "run test one point six", "run test 1.7", "run test one point seven", "run test 1.8", "run test one point eight", "run test 1.9", "run test one point nine", } def test_rule(): sentence = parse_sentence("turn off ") assert set( sample_sentence( sentence, expansion_rules={"area": parse_sentence("[the] kitchen")}, ) ) == {"turn off kitchen", "turn off the kitchen"} def test_permutation(): assert set(sample_sentence(parse_sentence("a;b;[c] d"))) == { "a b d", "a b c d", "a c d b", "a d b", "b a d", "b a c d", "b c d a", "b d a", "c d a b", "c d b a", "d a b", "d b a", } hassil-3.0.1/tests/test_trie.py000066400000000000000000000027241477332007300165270ustar00rootroot00000000000000from hassil.trie import Trie def test_insert_find() -> None: """Test inserting and finding values in the trie.""" trie = Trie() trie.insert("1", 1) trie.insert("two", 2) trie.insert("10", 10) trie.insert("twenty two", 22) text = "set to 10" results = list(trie.find(text)) assert results == [(8, "1", 1), (9, "10", 10)] for end_pos, number_text, number_value in results: start_pos = end_pos - len(number_text) assert text[start_pos:end_pos] == number_text assert int(number_text) == number_value assert list(trie.find("set to 1, then *two*, then finally twenty two please!")) == [ (8, "1", 1), (19, "two", 2), (45, "twenty two", 22), ] # Without unique, *[two]* and twenty [two] will return 2 assert list( trie.find("set to 1, then *two*, then finally twenty two please!", unique=False) ) == [ (8, "1", 1), (19, "two", 2), (45, "two", 2), (45, "twenty two", 22), ] # Test a character in between assert not list(trie.find("tw|o")) # Test non-existent value assert not list(trie.find("three")) # Test empty string assert not list(trie.find("")) def test_multiple_values() -> None: """Test that we can insert multiple values for the same string.""" trie = Trie() trie.insert("test", 1) trie.insert("test", 2) assert list(trie.find("this is a test")) == [(14, "test", 1), (14, "test", 2)] hassil-3.0.1/tests/test_util.py000066400000000000000000000022111477332007300165300ustar00rootroot00000000000000from hassil.util import ( is_template, merge_dict, normalize_text, normalize_whitespace, remove_escapes, remove_punctuation, ) def test_merge_dict(): base_dict = {"a": 1, "list": [1], "dict": {"a": 1}} merge_dict(base_dict, {"a": 2, "list": [2], "dict": {"b": 2}}) assert base_dict == {"a": 2, "list": [1, 2], "dict": {"a": 1, "b": 2}} def test_remove_escapes(): assert remove_escapes("\\[test\\]") == "[test]" def test_normalize_whitespace(): assert normalize_whitespace("this is a test") == "this is a test" def test_normalize_text(): assert normalize_text("tHIS is A Test") == "tHIS is A Test" def test_is_template(): assert not is_template("just some plain text") assert is_template("[optional] word") assert is_template("a {list}") assert is_template("a ") assert is_template("(a group)") assert is_template("an | alternative") def test_remove_punctuation(): assert remove_punctuation("test") == "test" assert remove_punctuation("test.") == "test" assert remove_punctuation("A.C.") == "A.C." assert remove_punctuation("A.C") == "A.C" hassil-3.0.1/tox.ini000066400000000000000000000003411477332007300143150ustar00rootroot00000000000000[tox] env_list = py{39,310,311,312,313} minversion = 4.12.1 [testenv] description = run the tests with pytest package = wheel wheel_build_env = .pkg deps = pytest>=6 commands = pytest {tty:--color=yes} {posargs}