sylvie-2024

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

voice-and-motor.py (4616B)


      1 import serial
      2 import time
      3 import json
      4 import simpleaudio as sa
      5 import pyaudio
      6 from vosk import Model as srModel
      7 from vosk import KaldiRecognizer
      8 from dimits import Dimits
      9 import ollama
     10 #from ollama import generate
     11 
     12 # Open serial port
     13 ser = serial.Serial('/dev/ttyUSB0', 115200, timeout=1)
     14 dt_model = Dimits("en_US-amy-medium")
     15 
     16 srmodel = srModel("/home/khan/srmodels/vosk-model-small-en-us-0.15")
     17 recognizer = KaldiRecognizer(srmodel, 16000)
     18 
     19 #previous_prompt = ""
     20 
     21 if not ser.is_open:
     22     print("Failed to open serial port.")
     23 
     24 print("Serial port opened successfully.")
     25 
     26 def send_command(ser, command):
     27     ser.write(command.encode())
     28     ser.write(b'\n')  # Assuming commands are terminated with a newline character
     29 
     30 def generate_and_save_tts(dt_model, text):
     31     # Initialize Dimits with the desired voice model
     32     dt = dt_model
     33     # Convert text to audio and save it as a WAV file
     34     audio_path = dt.text_2_audio_file(text, "tts_output", "/home/khan/sylvie-2024/python/conversation/llama", format="wav")
     35     # Return None as the audio path (since it's saved to a file directly)
     36     return audio_path
     37 
     38 def play_audio_file(audio_path):
     39     # Play the saved WAV file using simpleaudio
     40     wave_obj = sa.WaveObject.from_wave_file(audio_path)
     41     play_obj = wave_obj.play()
     42     play_obj.wait_done()  # Wait for the audio playback to finish
     43 
     44 def main():
     45     mic = pyaudio.PyAudio()
     46     stream = mic.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=8192)
     47     stream.start_stream()    
     48     
     49     admin_name = "Princess Khan"
     50     admin_pronoun = "She"
     51 
     52     user_prompt = "You are Sylvie Deane who is a cute librarian from French Polynesia. You are 33 years old. You were born on April 12th, in 1991. Your job is to chat with me, " + admin_name + ". We have only just met on Tinder, and we are not sure about each other yet. I asked you: \n" + admin_name + ": '"
     53     
     54     print('Ask Sylvie a question: ')
     55     while True:
     56         srdata = stream.read(4096)
     57     
     58         if recognizer.AcceptWaveform(srdata):
     59             srtext = recognizer.Result()
     60 
     61             # Convert the recognizer result string into a dictionary  
     62             resultDict = json.loads(srtext)
     63             if not resultDict.get("text", "") == "":
     64                 user_prompt += resultDict["text"]
     65                 user_prompt += "?'"
     66                 user_prompt += " Now write a short and concise response in English. \n Sylvie Deane:"
     67                 print(user_prompt)
     68                 break
     69             else:
     70                 print("No input sound")
     71 
     72     send_command(ser, "1sr")
     73 
     74     # Generate a response using the Ollama API
     75     ollama_stream = ollama.chat(
     76         model='dolphin-phi2-usb',
     77         messages=[{'role': 'user', 'content': user_prompt}],
     78         stream=True,
     79     )
     80 
     81     generated_text = ""
     82     user_text = ""
     83     for chunk in ollama_stream:
     84         generated_text += chunk['message']['content']
     85         if len(generated_text) >= 75:
     86             #print(generated_text)
     87             #user_text = generated_text
     88             break
     89     print(generated_text)
     90     user_text = generated_text
     91 
     92     # Check to see if there's a dot. And then only retrieve the text before the last dot.
     93     #if user_text.find(".")!=-1:
     94     #    last_dot_index = user_text.rfind('.')
     95     #    user_text = user_text[:last_dot_index+1]
     96 
     97     # Check if there are multiple lines. And then only retrieve first line.
     98     if '\n' in user_text:
     99         lines = user_text.split('\n')
    100         #first_line = lines[0].replace("\n", "").replace("\r", "")
    101         first_line = lines[0].strip()
    102         #second_line = lines[1].replace("\n", "").replace("\r", "")
    103         second_line = lines[1].strip()
    104         if first_line == "" or first_line == "\n":
    105             user_text = second_line
    106         else:
    107             user_text = first_line
    108     else:
    109         user_text = user_text.strip()
    110 
    111     # Determine the command type based on the length of the input
    112     if len(user_text.split()) > 10:
    113         command_type = "1sm"  # Use long command for speech
    114     elif len(user_text.split()) > 5:
    115         command_type = "1so"  # Use medium command for speech
    116     else:
    117         command_type = "1sn"
    118 
    119     # Generate the TTS audio file with the user's text
    120     audio_path = generate_and_save_tts(dt_model, user_text)
    121 
    122     # Play the TTS audio file using simpleaudio
    123     send_command(ser, command_type)  # Use variable command_type
    124     print("Sent motor command")
    125     play_audio_file(audio_path)
    126     time.sleep(3)
    127 
    128 if __name__ == "__main__":
    129     while True:
    130         main()
    131         #break  # Exit after one iteration, remove or modify for continuous operation