commit bfe355e794c16e103375877eb0ae3ef96e5977e9
parent 51fd4d9ce2f44702304116c8a4c07e1dbf7d0c80
Author: khanumballz <[email protected]>
Date: Thu, 16 May 2024 08:59:46 +0100
Piper TTS & Ollama Python Bindings
Diffstat:
1 file changed, 34 insertions(+), 2 deletions(-)
diff --git a/python/animation/voice-and-motor.py b/python/animation/voice-and-motor.py
@@ -3,6 +3,11 @@ import threading
import time
from dimits import Dimits
import ollama
+import pyaudio
+import json
+
+from vosk import Model as srModel
+from vosk import KaldiRecognizer
def send_command(ser, command):
ser.write(command.encode())
@@ -24,11 +29,38 @@ def main():
print("Serial port opened successfully.")
- prompt_text = input("Enter the prompt for the robot: ")
+ #prompt_text = input("Enter the prompt for the robot: ")
+
+ srmodel = srModel("/home/khan/srmodels/vosk-model-small-en-us-0.15")
+
+ recognizer = KaldiRecognizer(srmodel, 16000)
+
+ mic = pyaudio.PyAudio()
+ stream = mic.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=8192)
+ stream.start_stream()
+ user_prompt = ""
+
+ print('Say something to Sylvie: ')
+ while True:
+ srdata = stream.read(4096)
+
+ if recognizer.AcceptWaveform(srdata):
+ srtext = recognizer.Result()
+
+ # convert the recognizerResult string into a dictionary
+ resultDict = json.loads(srtext)
+ if not resultDict.get("text", "") == "":
+ #print(resultDict["text"])
+ user_prompt += resultDict["text"]
+ user_prompt += "?"
+ print(user_prompt)
+ break
+ else:
+ print("no input sound")
stream = ollama.chat(
model='dolphin-phi2-usb',
- messages=[{'role': 'user', 'content': prompt_text}],
+ messages=[{'role': 'user', 'content': user_prompt}],
stream=True,
)