voice-and-motor.py (4616B)
1 import serial 2 import time 3 import json 4 import simpleaudio as sa 5 import pyaudio 6 from vosk import Model as srModel 7 from vosk import KaldiRecognizer 8 from dimits import Dimits 9 import ollama 10 #from ollama import generate 11 12 # Open serial port 13 ser = serial.Serial('/dev/ttyUSB0', 115200, timeout=1) 14 dt_model = Dimits("en_US-amy-medium") 15 16 srmodel = srModel("/home/khan/srmodels/vosk-model-small-en-us-0.15") 17 recognizer = KaldiRecognizer(srmodel, 16000) 18 19 #previous_prompt = "" 20 21 if not ser.is_open: 22 print("Failed to open serial port.") 23 24 print("Serial port opened successfully.") 25 26 def send_command(ser, command): 27 ser.write(command.encode()) 28 ser.write(b'\n') # Assuming commands are terminated with a newline character 29 30 def generate_and_save_tts(dt_model, text): 31 # Initialize Dimits with the desired voice model 32 dt = dt_model 33 # Convert text to audio and save it as a WAV file 34 audio_path = dt.text_2_audio_file(text, "tts_output", "/home/khan/sylvie-2024/python/conversation/llama", format="wav") 35 # Return None as the audio path (since it's saved to a file directly) 36 return audio_path 37 38 def play_audio_file(audio_path): 39 # Play the saved WAV file using simpleaudio 40 wave_obj = sa.WaveObject.from_wave_file(audio_path) 41 play_obj = wave_obj.play() 42 play_obj.wait_done() # Wait for the audio playback to finish 43 44 def main(): 45 mic = pyaudio.PyAudio() 46 stream = mic.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=8192) 47 stream.start_stream() 48 49 admin_name = "Princess Khan" 50 admin_pronoun = "She" 51 52 user_prompt = "You are Sylvie Deane who is a cute librarian from French Polynesia. You are 33 years old. You were born on April 12th, in 1991. Your job is to chat with me, " + admin_name + ". We have only just met on Tinder, and we are not sure about each other yet. I asked you: \n" + admin_name + ": '" 53 54 print('Ask Sylvie a question: ') 55 while True: 56 srdata = stream.read(4096) 57 58 if recognizer.AcceptWaveform(srdata): 59 srtext = recognizer.Result() 60 61 # Convert the recognizer result string into a dictionary 62 resultDict = json.loads(srtext) 63 if not resultDict.get("text", "") == "": 64 user_prompt += resultDict["text"] 65 user_prompt += "?'" 66 user_prompt += " Now write a short and concise response in English. \n Sylvie Deane:" 67 print(user_prompt) 68 break 69 else: 70 print("No input sound") 71 72 send_command(ser, "1sr") 73 74 # Generate a response using the Ollama API 75 ollama_stream = ollama.chat( 76 model='dolphin-phi2-usb', 77 messages=[{'role': 'user', 'content': user_prompt}], 78 stream=True, 79 ) 80 81 generated_text = "" 82 user_text = "" 83 for chunk in ollama_stream: 84 generated_text += chunk['message']['content'] 85 if len(generated_text) >= 75: 86 #print(generated_text) 87 #user_text = generated_text 88 break 89 print(generated_text) 90 user_text = generated_text 91 92 # Check to see if there's a dot. And then only retrieve the text before the last dot. 93 #if user_text.find(".")!=-1: 94 # last_dot_index = user_text.rfind('.') 95 # user_text = user_text[:last_dot_index+1] 96 97 # Check if there are multiple lines. And then only retrieve first line. 98 if '\n' in user_text: 99 lines = user_text.split('\n') 100 #first_line = lines[0].replace("\n", "").replace("\r", "") 101 first_line = lines[0].strip() 102 #second_line = lines[1].replace("\n", "").replace("\r", "") 103 second_line = lines[1].strip() 104 if first_line == "" or first_line == "\n": 105 user_text = second_line 106 else: 107 user_text = first_line 108 else: 109 user_text = user_text.strip() 110 111 # Determine the command type based on the length of the input 112 if len(user_text.split()) > 10: 113 command_type = "1sm" # Use long command for speech 114 elif len(user_text.split()) > 5: 115 command_type = "1so" # Use medium command for speech 116 else: 117 command_type = "1sn" 118 119 # Generate the TTS audio file with the user's text 120 audio_path = generate_and_save_tts(dt_model, user_text) 121 122 # Play the TTS audio file using simpleaudio 123 send_command(ser, command_type) # Use variable command_type 124 print("Sent motor command") 125 play_audio_file(audio_path) 126 time.sleep(3) 127 128 if __name__ == "__main__": 129 while True: 130 main() 131 #break # Exit after one iteration, remove or modify for continuous operation