sylvie-2024

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

play_tts_llama.py (4150B)


      1 from time import sleep
      2 from random import random, randint
      3 
      4 import serial
      5 import json
      6 import argparse
      7 import os
      8 
      9 import torch
     10 import torchaudio
     11 import simpleaudio as sa
     12 
     13 from vosk import Model as srModel
     14 from vosk import KaldiRecognizer
     15 
     16 import pyaudio
     17 
     18 from pyllamacpp.model import Model
     19 
     20 from tortoise.api import TextToSpeech, MODELS_DIR
     21 from tortoise.utils.audio import load_audio, load_voice, load_voices
     22 
     23 ser = serial.Serial('/dev/ttyUSB0', 9600) # Establish the connection on a specific port
     24 
     25 tts = TextToSpeech()
     26 voice_samples, conditioning_latents = load_voice('sylvie', ['/home/twinlizzie/tts_voices/voices'])
     27 
     28 user_name = "Denise Jaimes"
     29 robot_name = "Sylvie Deane"
     30 
     31 prompt_context = "Act as " + robot_name + ". " + robot_name + " is helpful, kind, honest, and never fails to answer " + user_name + "'s questions with detail. You were born in France and raised in French Polynesia, Rapa Iti."
     32 prompt_context += "\n"
     33 prompt_context += "\n" + user_name + ": Nice to meet you " + robot_name + "!"
     34 prompt_context += "\n" + robot_name + ": Welcome, " + user_name + "! I'm here to assist you with anything you need. What can I do for you today?"
     35 
     36 previous_user_ctx = ""
     37 previous_robot_ctx = ""
     38 
     39 prompt_prefix = "\n" + user_name + ": "
     40 prompt_suffix = "\n" + robot_name + ": "
     41 
     42 model = Model(model_path='/home/twinlizzie/llm_models/7B/ggml-model-q4_0.bin', n_ctx=1024)
     43 srmodel = srModel("/home/twinlizzie/sr_models/vosk-model-small-en-us-0.15")
     44 
     45 while True:
     46     recognizer = KaldiRecognizer(srmodel, 16000)
     47     
     48     mic = pyaudio.PyAudio()
     49     stream = mic.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=8192)
     50     stream.start_stream()
     51     
     52     user_prompt = ""
     53     
     54     print('Say something to Sylvie: ')
     55     while True:
     56         srdata = stream.read(4096)
     57     
     58         if recognizer.AcceptWaveform(srdata):
     59             srtext = recognizer.Result()
     60 
     61             # convert the recognizerResult string into a dictionary  
     62             resultDict = json.loads(srtext)
     63             if not resultDict.get("text", "") == "":
     64                 print(resultDict["text"])
     65                 user_prompt += resultDict["text"]
     66                 user_prompt += "?"
     67                 break
     68             else:
     69                 print("no input sound")
     70 
     71     srand = randint(0,1)
     72     qrand = randint(0,8)
     73     
     74     if srand == 1:
     75         wave_obj = sa.WaveObject.from_wave_file("/home/twinlizzie/tts_voices/fillers/question" + str(qrand) + ".wav")
     76         play_obj = wave_obj.play()
     77     
     78         ser.write(str(33).encode())    
     79 
     80         play_obj.wait_done()   
     81     else:
     82         ser.write(str(34).encode())    
     83     
     84     gen_text = ""
     85 
     86     partial_prompt = prompt_prefix + user_prompt + prompt_suffix
     87     full_prompt = prompt_context + previous_user_ctx + previous_robot_ctx + partial_prompt
     88 
     89     for token in model.generate(full_prompt, n_predict=24, n_threads=12, repeat_penalty=1.2, repeat_last_n=1024):
     90         #print(token, end='', flush=True)
     91         gen_text += token
     92 
     93     # Check to see if there's a dot. And then only retrieve the text before the last dot.
     94     if gen_text.find(".")!=-1:
     95         last_dot_index = gen_text.rfind('.')
     96         gen_text = gen_text[:last_dot_index+1]
     97         
     98     # Check if there are multiple lines. And then only retrieve first line.    
     99     if '\n' in gen_text:
    100         lines = gen_text.split('\n')
    101         first_line = lines[0]
    102         gen_text = first_line     
    103         
    104     previous_user_ctx = partial_prompt
    105     previous_robot_ctx = gen_text
    106 
    107     ser.write(str(35).encode())
    108     
    109     print("\nPlaying back the generated text:")
    110     print(gen_text)
    111 
    112     gen_audio = tts.tts_with_preset(gen_text, voice_samples=voice_samples, conditioning_latents=conditioning_latents, preset='ultra_fast')
    113     torchaudio.save('/home/twinlizzie/tts_voices/results/sylvie_gen.wav', gen_audio.squeeze(0).cpu(), 24000, bits_per_sample=16)
    114 
    115     wave_obj = sa.WaveObject.from_wave_file("/home/twinlizzie/tts_voices/results/sylvie_gen.wav")
    116     play_obj = wave_obj.play()
    117     
    118     if len(gen_text) > 60:
    119         ser.write(str(29).encode())
    120     else:
    121         ser.write(str(25).encode())    
    122     
    123     play_obj.wait_done()