import warnings warnings.filterwarnings("ignore", category=UserWarning) warnings.filterwarnings("ignore", category=FutureWarning) import logging logging.getLogger().setLevel(logging.ERROR) import argparse import os import sys import time TTS_ENGINE = None os.environ["CUDA_LAUNCH_BLOCKING"] = "1" def server_mode(args): while True: input = sys.stdin.readline().strip() input = input.split("voice") try: args.speed = float(input[0]) except: args.speed = 1 try: args.voice = int(input[1]) except: args.voice = 8 output_path = initiate(args) print(output_path) sys.stdout.flush() def current_env(): """Detect current virtual environment.""" venv_path = os.environ.get("VIRTUAL_ENV") if venv_path: return os.path.basename(venv_path) raise ValueError("Please set env first") def initiate(args): model = args.get('model') if isinstance(args, dict) else getattr(args, 'model', None) if not model: if current_env() == "kokoro_env": from .engines.kokoro import KokoroTTSProcessor as TTSEngine elif current_env() == "kitten_env": from .engines.kitten import KittenTTSProcessor as TTSEngine else: from .engines.chatterbox import ChatterboxTTSProcessor as TTSEngine else: if model == "kokoro": from .engines.kokoro import KokoroTTSProcessor as TTSEngine elif model == "kitten": from .engines.kitten import KittenTTSProcessor as TTSEngine else: from .engines.chatterbox import ChatterboxTTSProcessor as TTSEngine global TTS_ENGINE if not TTS_ENGINE: TTS_ENGINE = TTSEngine(stream_audio=args.stream_text) try: import torch import gc torch.cuda.empty_cache() torch.cuda.synchronize() gc.collect() gc.collect() time.sleep(1) print("\n🧹 Cleared PyTorch CUDA cache") except: pass if args.stream_text: TTS_ENGINE.stream_real_time_text(args) text = TTS_ENGINE.read_content_file() for text_chunk in text.split(): TTS_ENGINE.feed_text_chunk(text_chunk) time.sleep(0.1) # Optional delay TTS_ENGINE.stop_all_streaming() else: TTS_ENGINE.save_audio(args) def main(): """Main entry point.""" parser = argparse.ArgumentParser( description="Text-to-Speech processor" ) parser.add_argument( "--server-mode", action="store_true", help="Run in server mode (read commands from stdin)" ) parser.add_argument( "--speed", type=float, help=f"Speech speed" ) parser.add_argument( "--voice", type=int, help=f"Voice index" ) parser.add_argument( "--stream-text", action="store_true", help="Enable streaming text output" ) parser.add_argument( "--model", help="model name" ) args = parser.parse_args() if args.server_mode: server_mode(args) else: success = initiate(args) return 0 if success else 1 if __name__ == "__main__": main()