commit d52d260f3003e3dbebccd9f64594d80f0114b805
parent 47c9681979a11b1e650818eeb81d0e3ea6431217
Author: khanumballz <[email protected]>
Date: Sat, 18 May 2024 07:09:32 +0100
Piper TTS and Ollama Optimization
Diffstat:
1 file changed, 0 insertions(+), 105 deletions(-)
diff --git a/python/animation/voice-and-motor.py b/python/animation/voice-and-motor.py
@@ -1,105 +0,0 @@
-import serial
-import threading
-import time
-from dimits import Dimits
-import ollama
-import pyaudio
-import json
-
-from vosk import Model as srModel
-from vosk import KaldiRecognizer
-
-def send_command(ser, command):
- ser.write(command.encode())
- ser.write(b'\n') # Assuming commands are terminated with a newline character
-
-def play_tts(text, command_type):
- # Initialize Dimits with the desired voice model
- dt = Dimits("en_US-amy-medium")
- # Convert text to audio and play it using the aplay engine
- dt.text_2_speech(text, engine="aplay", command=command_type)
-
-def main():
- # Open serial port
- ser = serial.Serial('/dev/ttyUSB0', 115200, timeout=1)
-
- if not ser.is_open:
- print("Failed to open serial port.")
- return
-
- print("Serial port opened successfully.")
-
- user_prompt = "You are Sylvie Deane who is a cute librarian from French Polynesia, and somebody asked you: "
-
- srmodel = srModel("/home/khan/srmodels/vosk-model-small-en-us-0.15")
- recognizer = KaldiRecognizer(srmodel, 16000)
-
- mic = pyaudio.PyAudio()
- stream = mic.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=8192)
- stream.start_stream()
-
- print('Ask Sylvie a question: ')
- while True:
- srdata = stream.read(4096)
-
- if recognizer.AcceptWaveform(srdata):
- srtext = recognizer.Result()
-
- # convert the recognizerResult string into a dictionary
- resultDict = json.loads(srtext)
- if not resultDict.get("text", "") == "":
- #print(resultDict["text"])
- user_prompt += resultDict["text"]
- user_prompt += "?"
- user_prompt += " So here is how you responded:"
- print(user_prompt)
- break
- else:
- print("no input sound")
-
- send_command(ser, "1sr")
-
- stream = ollama.chat(
- model='dolphin-phi2-usb',
- messages=[{'role': 'user', 'content': user_prompt}],
- stream=True,
- )
-
- generated_text = ""
- for chunk in stream:
- generated_text += chunk['message']['content']
- #print(generated_text)
- if len(generated_text.split()) >= 15:
- print(generated_text)
- user_text = generated_text
- break
-
- # Prompt the user to enter text for TTS
- #user_text = input("Enter the text for speech: ")
-
- # Determine the command type based on the length of the input
- if len(user_text.split()) > 7: # Check if the input has more than 5 words
- command_type = "1sm" # Use long command for speech
- elif len(user_text.split()) > 4:
- command_type = "1so" # Use short command for speech
- else:
- command_type = "1sn"
-
- # Create a thread to play the TTS audio with the user's text
- tts_thread = threading.Thread(target=play_tts, args=(user_text, command_type))
- tts_thread.start()
-
- # Delay to ensure synchronization (adjust as needed)
- time.sleep(3.5) # Adjust the delay based on the duration of the TTS audio
-
- # Initiate animation sequence after the delay
- send_command(ser, "1s3")
- send_command(ser, command_type) # Use variable command_type
- print("Sent motor command")
- time.sleep(12)
-
- # Wait for the TTS thread to finish before exiting
- tts_thread.join()
-
-if __name__ == "__main__":
- main()