play_tts_llama.py (4150B)
1 from time import sleep 2 from random import random, randint 3 4 import serial 5 import json 6 import argparse 7 import os 8 9 import torch 10 import torchaudio 11 import simpleaudio as sa 12 13 from vosk import Model as srModel 14 from vosk import KaldiRecognizer 15 16 import pyaudio 17 18 from pyllamacpp.model import Model 19 20 from tortoise.api import TextToSpeech, MODELS_DIR 21 from tortoise.utils.audio import load_audio, load_voice, load_voices 22 23 ser = serial.Serial('/dev/ttyUSB0', 9600) # Establish the connection on a specific port 24 25 tts = TextToSpeech() 26 voice_samples, conditioning_latents = load_voice('sylvie', ['/home/twinlizzie/tts_voices/voices']) 27 28 user_name = "Denise Jaimes" 29 robot_name = "Sylvie Deane" 30 31 prompt_context = "Act as " + robot_name + ". " + robot_name + " is helpful, kind, honest, and never fails to answer " + user_name + "'s questions with detail. You were born in France and raised in French Polynesia, Rapa Iti." 32 prompt_context += "\n" 33 prompt_context += "\n" + user_name + ": Nice to meet you " + robot_name + "!" 34 prompt_context += "\n" + robot_name + ": Welcome, " + user_name + "! I'm here to assist you with anything you need. What can I do for you today?" 35 36 previous_user_ctx = "" 37 previous_robot_ctx = "" 38 39 prompt_prefix = "\n" + user_name + ": " 40 prompt_suffix = "\n" + robot_name + ": " 41 42 model = Model(model_path='/home/twinlizzie/llm_models/7B/ggml-model-q4_0.bin', n_ctx=1024) 43 srmodel = srModel("/home/twinlizzie/sr_models/vosk-model-small-en-us-0.15") 44 45 while True: 46 recognizer = KaldiRecognizer(srmodel, 16000) 47 48 mic = pyaudio.PyAudio() 49 stream = mic.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=8192) 50 stream.start_stream() 51 52 user_prompt = "" 53 54 print('Say something to Sylvie: ') 55 while True: 56 srdata = stream.read(4096) 57 58 if recognizer.AcceptWaveform(srdata): 59 srtext = recognizer.Result() 60 61 # convert the recognizerResult string into a dictionary 62 resultDict = json.loads(srtext) 63 if not resultDict.get("text", "") == "": 64 print(resultDict["text"]) 65 user_prompt += resultDict["text"] 66 user_prompt += "?" 67 break 68 else: 69 print("no input sound") 70 71 srand = randint(0,1) 72 qrand = randint(0,8) 73 74 if srand == 1: 75 wave_obj = sa.WaveObject.from_wave_file("/home/twinlizzie/tts_voices/fillers/question" + str(qrand) + ".wav") 76 play_obj = wave_obj.play() 77 78 ser.write(str(33).encode()) 79 80 play_obj.wait_done() 81 else: 82 ser.write(str(34).encode()) 83 84 gen_text = "" 85 86 partial_prompt = prompt_prefix + user_prompt + prompt_suffix 87 full_prompt = prompt_context + previous_user_ctx + previous_robot_ctx + partial_prompt 88 89 for token in model.generate(full_prompt, n_predict=24, n_threads=12, repeat_penalty=1.2, repeat_last_n=1024): 90 #print(token, end='', flush=True) 91 gen_text += token 92 93 # Check to see if there's a dot. And then only retrieve the text before the last dot. 94 if gen_text.find(".")!=-1: 95 last_dot_index = gen_text.rfind('.') 96 gen_text = gen_text[:last_dot_index+1] 97 98 # Check if there are multiple lines. And then only retrieve first line. 99 if '\n' in gen_text: 100 lines = gen_text.split('\n') 101 first_line = lines[0] 102 gen_text = first_line 103 104 previous_user_ctx = partial_prompt 105 previous_robot_ctx = gen_text 106 107 ser.write(str(35).encode()) 108 109 print("\nPlaying back the generated text:") 110 print(gen_text) 111 112 gen_audio = tts.tts_with_preset(gen_text, voice_samples=voice_samples, conditioning_latents=conditioning_latents, preset='ultra_fast') 113 torchaudio.save('/home/twinlizzie/tts_voices/results/sylvie_gen.wav', gen_audio.squeeze(0).cpu(), 24000, bits_per_sample=16) 114 115 wave_obj = sa.WaveObject.from_wave_file("/home/twinlizzie/tts_voices/results/sylvie_gen.wav") 116 play_obj = wave_obj.play() 117 118 if len(gen_text) > 60: 119 ser.write(str(29).encode()) 120 else: 121 ser.write(str(25).encode()) 122 123 play_obj.wait_done()