Piper TTS and Ollama Optimization - sylvie-2024 - Unnamed repository; edit this file 'description' to name the repository.

commit 0ce39d14146df3261bceed824ab36f29c1068902
parent d52d260f3003e3dbebccd9f64594d80f0114b805
Author: khanumballz <[email protected]>
Date:   Sat, 18 May 2024 07:10:27 +0100

Piper TTS and Ollama Optimization

Diffstat:
A python/conversation/llama/tts_output.wav  | 0 
A python/conversation/llama/voice-and-motor.py  | 104 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

2 files changed, 104 insertions(+), 0 deletions(-)
diff --git a/python/conversation/llama/tts_output.wav b/python/conversation/llama/tts_output.wav
Binary files differ.
diff --git a/python/conversation/llama/voice-and-motor.py b/python/conversation/llama/voice-and-motor.py
@@ -0,0 +1,104 @@
+import serial
+import time
+import json
+import simpleaudio as sa
+import pyaudio
+from vosk import Model as srModel
+from vosk import KaldiRecognizer
+from dimits import Dimits
+import ollama
+#from ollama import generate
+
+# Open serial port
+ser = serial.Serial('/dev/ttyUSB0', 115200, timeout=1)
+dt_model = Dimits("en_US-amy-medium")
+
+srmodel = srModel("/home/khan/srmodels/vosk-model-small-en-us-0.15")
+recognizer = KaldiRecognizer(srmodel, 16000)
+
+if not ser.is_open:
+    print("Failed to open serial port.")
+
+print("Serial port opened successfully.")
+
+def send_command(ser, command):
+    ser.write(command.encode())
+    ser.write(b'\n')  # Assuming commands are terminated with a newline character
+
+def generate_and_save_tts(dt_model, text):
+    # Initialize Dimits with the desired voice model
+    dt = dt_model
+    # Convert text to audio and save it as a WAV file
+    audio_path = dt.text_2_audio_file(text, "tts_output", "/home/khan/sylvie-2024/python/conversation/llama", format="wav")
+    # Return None as the audio path (since it's saved to a file directly)
+    return audio_path
+
+def play_audio_file(audio_path):
+    # Play the saved WAV file using simpleaudio
+    wave_obj = sa.WaveObject.from_wave_file(audio_path)
+    play_obj = wave_obj.play()
+    play_obj.wait_done()  # Wait for the audio playback to finish
+
+def main():
+    mic = pyaudio.PyAudio()
+    stream = mic.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=8192)
+    stream.start_stream()    
+    
+    user_prompt = "You are Sylvie Deane who is a cute librarian from French Polynesia, and somebody asked you: "
+    
+    print('Ask Sylvie a question: ')
+    while True:
+        srdata = stream.read(4096)
+    
+        if recognizer.AcceptWaveform(srdata):
+            srtext = recognizer.Result()
+
+            # Convert the recognizer result string into a dictionary  
+            resultDict = json.loads(srtext)
+            if not resultDict.get("text", "") == "":
+                user_prompt += resultDict["text"]
+                user_prompt += "?"
+                user_prompt += " So here is how you responded:"
+                print(user_prompt)
+                break
+            else:
+                print("No input sound")
+
+    send_command(ser, "1sr")
+
+    # Generate a response using the Ollama API
+    ollama_stream = ollama.chat(
+        model='dolphin-phi2-usb',
+        messages=[{'role': 'user', 'content': user_prompt}],
+        stream=True,
+    )
+
+    generated_text = ""
+    for chunk in ollama_stream:
+        generated_text += chunk['message']['content']
+        if len(generated_text.split()) >= 20:
+            print(generated_text)
+            user_text = generated_text
+            break
+
+    # Determine the command type based on the length of the input
+    if len(user_text.split()) > 10:
+        command_type = "1sm"  # Use long command for speech
+    elif len(user_text.split()) > 5:
+        command_type = "1so"  # Use medium command for speech
+    else:
+        command_type = "1sn"
+
+    # Generate the TTS audio file with the user's text
+    audio_path = generate_and_save_tts(dt_model, user_text)
+
+    # Play the TTS audio file using simpleaudio
+    send_command(ser, command_type)  # Use variable command_type
+    print("Sent motor command")
+    play_audio_file(audio_path)
+    time.sleep(3)
+
+if __name__ == "__main__":
+    while True:
+        main()
+        #break  # Exit after one iteration, remove or modify for continuous operation

	sylvie-2024 Unnamed repository; edit this file 'description' to name the repository.
	Log \| Files \| Refs \| README \| LICENSE

A	python/conversation/llama/tts_output.wav	\|	0
A	python/conversation/llama/voice-and-motor.py	\|	104	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++