Contenerized text-to-speech engine based on Coqui's TTS with web API
Jacek Kowalski
2024-08-01 07fc027270023234d0cdc9744633480fa4da649c
commit | author | age
564abd 1 import enum
JK 2 import io
3
4 import fastapi
5 import pydantic
6 import TTS.api
7
8 import config
9
10 models = {}
11
12 for id, model in config.models.items():
13     models[id] = TTS.api.TTS(model).to("cpu")
14
15
16 LanguageModel = enum.Enum('LanguageModel', {k: k for k in models.keys()})
17
18
19 class SynthesizeRequest(pydantic.BaseModel):
20     language: LanguageModel
21     text: str
22
23
24 class SynthesizeResponse(fastapi.Response):
25     media_type = 'audio/wav'
26
27
28 app = fastapi.FastAPI()
29
30 @app.post('/synthesize', response_class=SynthesizeResponse)
31 async def synthesize(request: SynthesizeRequest) -> SynthesizeResponse:
32     with io.BytesIO() as fp:
33         models[request.language.value].tts_to_file(request.text, file_path=fp)
34         return SynthesizeResponse(content = fp.getvalue())