pax_global_header00006660000000000000000000000064143412047460014517gustar00rootroot0000000000000052 comment=eaded326a7d6c6411b1bc2d33c7151167228752d jeroenterheerdt-pycsspeechtts-eaded32/000077500000000000000000000000001434120474600203605ustar00rootroot00000000000000jeroenterheerdt-pycsspeechtts-eaded32/.gitignore000066400000000000000000000022051434120474600223470ustar00rootroot00000000000000# Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # C extensions *.so # Distribution / packaging .Python env/ build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ wheels/ *.egg-info/ .installed.cfg *.egg # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *.cover .hypothesis/ # Translations *.mo *.pot # Django stuff: *.log local_settings.py # Flask stuff: instance/ .webassets-cache # Scrapy stuff: .scrapy # Sphinx documentation docs/_build/ # PyBuilder target/ # Jupyter Notebook .ipynb_checkpoints # pyenv .python-version # celery beat schedule file celerybeat-schedule # SageMath parsed files *.sage.py # dotenv .env # virtualenv .venv venv/ ENV/ # Spyder project settings .spyderproject .spyproject # Rope project settings .ropeproject # mkdocs documentation /site # mypy .mypy_cache/ jeroenterheerdt-pycsspeechtts-eaded32/LICENSE000066400000000000000000000020621434120474600213650ustar00rootroot00000000000000MIT License Copyright (c) 2018 Jeroen ter Heerdt Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. jeroenterheerdt-pycsspeechtts-eaded32/MANIFEST.in000066400000000000000000000000211434120474600221070ustar00rootroot00000000000000include README.mdjeroenterheerdt-pycsspeechtts-eaded32/README.md000066400000000000000000000023371434120474600216440ustar00rootroot00000000000000# pycsspeechtts Python (py) library to use Microsofts Cognitive Services Speech (csspeech) Text to Speech (tts) API. The cryptic name is the combination of the abbrevations shown above. Usage: ```python from pycsspeechtts import TTSTranslator t = TTSTranslator("YOUR API KEY","westeurope") data = t.speak(text='The default voice is using Microsoft Neural Voice. When using a neural voice, synthesized speech is nearly indistinguishable from the human recordings.') with open("file1.wav", "wb") as f: f.write(data) data = t.speak('en-gb','Male','I am Max', 'George, Apollo', 'riff-16khz-16bit-mono-pcm', text='I am Max') with open("file2.wav", "wb") as f: f.write(data) ``` You can also use custom voice by specifying `isCustom=True` and providing a `customEndpoint`: ```python from pycsspeechtts import TTSTranslator t = TTSTranslator("YOUR API KEY","westeurope", isCustom=True, customEndpoint=MyEndpoint) data = t.speak(language='en-gb',gender='Male',voiceType="ArchieNeural",text="This is a test for custom voice") ``` See test.py for more samples. Refer to https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support to find the valid values for language, gender, voicetype and output formats.jeroenterheerdt-pycsspeechtts-eaded32/pyproject.toml000066400000000000000000000011321434120474600232710ustar00rootroot00000000000000[build-system] requires = ["hatchling"] build-backend = "hatchling.build" [project] name = "pycsspeechtts" version = "1.0.8" authors = [ { name="Jeroen ter Heerdt"}, ] description = "Python 3 interface to Microsoft Cognitive Services Text To Speech" readme = "README.md" requires-python = ">=3.7" classifiers = [ "Programming Language :: Python :: 3", "License :: OSI Approved :: MIT License", "Operating System :: OS Independent", ] [project.urls] "Homepage" = "https://github.com/jeroenterheerdt/pycsspeechtts" "Bug Tracker" = "https://github.com/jeroenterheerdt/pycsspeechtts/issues"jeroenterheerdt-pycsspeechtts-eaded32/setup.py000066400000000000000000000012251434120474600220720ustar00rootroot00000000000000from setuptools import setup, find_packages with open("README.md", "r") as fh: long_description = fh.read() setup(name='pycsspeechtts', version='1.0.8', description='Python 3 interface to Microsoft Cognitive Services Text To Speech', long_description=long_description, long_description_content_type="text/markdown", url='https://github.com/jeroenterheerdt/pycsspeechtts', author='Jeroen ter Heerdt', license='MIT', install_requires=['requests>=2.0'], tests_require=['mock'], test_suite='tests', packages=find_packages(exclude=["dist"]), include_package_data=True, zip_safe=True) jeroenterheerdt-pycsspeechtts-eaded32/src/000077500000000000000000000000001434120474600211475ustar00rootroot00000000000000jeroenterheerdt-pycsspeechtts-eaded32/src/pycsspeechtts/000077500000000000000000000000001434120474600240505ustar00rootroot00000000000000jeroenterheerdt-pycsspeechtts-eaded32/src/pycsspeechtts/__init__.py000066400000000000000000000000001434120474600261470ustar00rootroot00000000000000jeroenterheerdt-pycsspeechtts-eaded32/src/pycsspeechtts/pycsspeechtts.py000066400000000000000000000071631434120474600273320ustar00rootroot00000000000000""" Python wrapper for Microsoft Cognitive Services Text-to-speech translator """ import requests import json from xml.etree import ElementTree import logging import sys _LOGGER = logging.getLogger(__name__) AccessTokenUrlTemplate = "https://{}.api.cognitive.microsoft.com/sts/v1.0/issueToken" SpeechUrlTemplate = "https://{}.tts.speech.microsoft.com/cognitiveservices/v1" class TTSTranslator(object): """ Interface class for the Microsoft Cognitive Services Text-to-speech translator """ def __init__(self, apiKey, region="eastus", isCustom=False, customEndpoint=None): self._apiKey = apiKey self._geoLocation = region self._isCustom = isCustom self._customEndpoint=customEndpoint headers = {"Ocp-Apim-Subscription-Key": self._apiKey} if not self._isCustom: response = requests.post(AccessTokenUrlTemplate.format( self._geoLocation), headers=headers) response.raise_for_status() self._accesstoken = str(response.text) _LOGGER.debug("Connection Initialized OK") def speak(self, language="en-us", gender="Female", voiceType="JessaNeural", output="riff-24khz-16bit-mono-pcm", rate="+0.00%", volume="+0.00%", pitch="default", contour="(0%,+0%) (100%,+0%)", text=None): def name_lang(language): lang1,lang2 = language.split("-") return "{}-{}".format(lang1,lang2.upper()) body = ElementTree.Element('speak', version='1.0') body.set('{http://www.w3.org/XML/1998/namespace}lang', language) body.set('xmlns','http://www.w3.org/2001/10/synthesis') body.set('xmlns:mstts', 'http://www.w3.org/2001/mstts') voice = ElementTree.SubElement(body, 'voice') voice.set('{http://www.w3.org/XML/1998/namespace}lang', language) voice.set('{http://www.w3.org/XML/1998/namespace}gender', gender) voice.set( 'name', 'Microsoft Server Speech Text to Speech Voice ('+name_lang(language)+', '+voiceType+')') endpoint = None if self._isCustom: # this is a custom voice endpoint = self._customEndpoint headers = {"Content-Type": "application/ssml+xml", "X-Microsoft-OutputFormat": output, "Ocp-Apim-Subscription-Key": self._apiKey, "User-Agent": "PYCSSpeechTTS" } voice.text = text else: # not a custom voice, generate the endpoint endpoint = SpeechUrlTemplate.format(self._geoLocation) headers = {"Content-Type": "application/ssml+xml", "X-Microsoft-OutputFormat": output, "Authorization": "Bearer " + self._accesstoken, "X-Search-AppId": "07D3234E49CE426DAA29772419F436CA", "X-Search-ClientID": "1ECFAE91408841A480F00935DC390960", "User-Agent": "PYCSSpeechTTS" } voice.append(ElementTree.XML(''+text+'')) prosody = voice.find('prosody') prosody.set('rate', rate) prosody.set('volume', volume) prosody.set('pitch', pitch) prosody.set('contour', contour) response = requests.post( endpoint, ElementTree.tostring(body), headers=headers) if response.status_code == requests.codes.ok: _LOGGER.debug("Text synthesis OK") return response.content else: _LOGGER.error("Text synthesis failed, statuscode " + str(response.status_code)+", reason: "+response.text) return None jeroenterheerdt-pycsspeechtts-eaded32/src/pycsspeechtts/test.py000066400000000000000000000035571434120474600254130ustar00rootroot00000000000000from pycsspeechtts import TTSTranslator useCustom = True api_key = "YOUR_API_KEY" custom_endpoint = "custom_endpoint" region = "westus" if not useCustom: t = TTSTranslator(api_key, region=region) # Speaking with default language of english US and default Female voice data = t.speak(text='This is a test') # Adding a pause to test XML support data = t.speak(text='This is a test with a long pause') # Change speed with -50% data = t.speak(text="This is a test", rate="-50%") # Change pitch to high data = t.speak(text="This is a test", pitch="high") # Change volume to +20% data = t.speak(text="This is a test", volume="+20%") # See https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support for the accepted values of the parameters below data = t.speak('en-GB', 'Male', 'George, Apollo', 'riff-16khz-16bit-mono-pcm', text='I am Max') data = t.speak('cs-CZ', 'Male', 'Jakub', text='Pojďme vyzkoušet klasickou českou testovací větu. Příliš žluťoučký kůň úpěl ďábelské ódy.') # Using contour to change pitch from normal at 0% of speech and +100% at 100% of speech data = t.speak(text="The Wall Street Journal - which says it's spoken to people close to the ongoing investigation - says the information it has paints a picture of a catastrophic failure that quickly overwhelmed the flight crew", contour="(0%,+0%) (100%,+100%)") else: # Test custom voice t = TTSTranslator(api_key, region=region, isCustom=True, customEndpoint=custom_endpoint) data = t.speak(language='en-gb',gender='Male',voiceType="ArchieNeural",text="This is a test for custom voice") if data == None: print("An error occurred") else: with open("file.wav", "wb") as f: f.write(data) print("Success! Open file.wav to hear the results")