import enum import io import fastapi import pydantic import TTS.api import config models = {} for id, model in config.models.items(): models[id] = TTS.api.TTS(model).to("cpu") LanguageModel = enum.Enum('LanguageModel', {k: k for k in models.keys()}) class SynthesizeRequest(pydantic.BaseModel): language: LanguageModel text: str class SynthesizeResponse(fastapi.Response): media_type = 'audio/wav' app = fastapi.FastAPI() @app.post('/synthesize', response_class=SynthesizeResponse) async def synthesize(request: SynthesizeRequest) -> SynthesizeResponse: with io.BytesIO() as fp: models[request.language.value].tts_to_file(request.text, file_path=fp) return SynthesizeResponse(content = fp.getvalue())