Skip to content

Commit

Permalink
Public Preview: 🚀 Live Mode, a rough structure is prepared until flue…
Browse files Browse the repository at this point in the history
…nt functionality is implemented #5!
  • Loading branch information
omegaui committed Jan 1, 2023
1 parent 0866e43 commit 808c5b8
Show file tree
Hide file tree
Showing 8 changed files with 65 additions and 10 deletions.
1 change: 1 addition & 0 deletions .idea/modules.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions live_mode_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import numpy as np
import soundfile


threshold = []
trainingDataSet = []

Expand Down Expand Up @@ -51,3 +52,4 @@ def compare():
print(distance)

return distance <= threshold

4 changes: 0 additions & 4 deletions live_mode_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,10 +129,6 @@ def compare(key1, key2):
for key in training_data_set:
frames = b''.join(training_data_set[key])

file = open(f'training-data/live_mode_training_data{key}.bin', 'wb')
file.write(frames)
file.close()

wf = wave.open(f'training-data/live_mode_training_audio{key}.wav', 'wb')
wf.setnchannels(2)
wf.setsampwidth(pyAudio.get_sample_size(pyaudio.paInt16))
Expand Down
1 change: 1 addition & 0 deletions lvc_gui_flutter/lib/main.dart
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ Future<void> main() async {
appWindow.minSize = initialSize;
appWindow.size = initialSize;
appWindow.alignment = Alignment.center;
appWindow.position = Offset(200, 200);
appWindow.show();
});

Expand Down
67 changes: 61 additions & 6 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@
print('unable to create training directory')
exit(1)

# initializing PyAudio ...
pyAudio = pyaudio.PyAudio()

def log(text, color=None, attrs=None):
if attrs is None:
Expand Down Expand Up @@ -70,9 +72,6 @@ def main(model='base', ui='false'):
WAVE_OUTPUT_FILENAME = "misc/last-mic-fetch.wav" # default file which will be overwritten in every RECORD_SECONDS
SPEECH_THRESHOLD = config_manager.config['speech-threshold'] # speech threshold default 4000 Hz

# initializing PyAudio ...
pyAudio = pyaudio.PyAudio()

# Opening Microphone Stream with above created configuration ...
stream = pyAudio.open(format=FORMAT,
channels=CHANNELS,
Expand Down Expand Up @@ -106,11 +105,13 @@ def main(model='base', ui='false'):
# well, this is under development,
# I don't recommend activating live mode until it is ready!
if config_manager.config['live-mode']:
voice_feedback.speak("initializing live mode ...", wait=True)
live_mode_manager.init()
voice_feedback.speak("say my name to trigger actions ...", wait=True)
while True:
frames = []
chunk_array = array('h')
log("listening ...", "blue", attrs=["bold"])
log("sleeping ...", "blue", attrs=["bold"])
for i in range(0, int(44100 / 1024 * 2)):
data = stream.read(1024)
frames.append(data) # stacking every audio frame into the list
Expand All @@ -134,9 +135,11 @@ def main(model='base', ui='false'):

log("comparing ...", "blue", attrs=["bold"])
if live_mode_manager.compare():
voice_feedback.speak('yes sir!', wait=True)
exit(0)
voice_feedback.speak('match test succeeded', wait=True)
log("listening ...", "blue", attrs=['bold'])
listen_for_live_mode(stream, audio_model, CHUNK, FORMAT, CHANNELS, RATE, RECORD_SECONDS, WAVE_OUTPUT_FILENAME, SPEECH_THRESHOLD)
else:
log('live mode: match test failed!', "red", attrs=['bold'])
voice_feedback.speak('match test failed!', wait=True)

frames.clear()
Expand Down Expand Up @@ -206,5 +209,57 @@ def analyze_text(text):
command_manager.launch_if_any(text)


def listen_for_live_mode(stream, audio_model, chunk, audio_format, channels, rate, record_seconds, wave_output_filename, speech_threshold):
frames = []
chunk_array = array('h')
log("listening ...", "blue", attrs=["bold"])
for i in range(0, int(rate / chunk * record_seconds)):
data = stream.read(chunk)
frames.append(data) # stacking every audio frame into the list
chunk_array.extend(array('h', data))
chunk_array = trim(chunk_array)
if len(chunk_array) == 0: # clip is empty
log('no voice')
return
elif max(chunk_array) < speech_threshold: # no voice in clip
log('no speech in clip')
return
print("saving audio ...")

# writing the wave file
wf = wave.open(wave_output_filename, 'wb')
wf.setnchannels(channels)
wf.setsampwidth(pyAudio.get_sample_size(audio_format))
wf.setframerate(rate)
wf.writeframes(b''.join(frames))
wf.close()

# checking if master control mode is enabled
# and performing audio analysis if enabled
if config_manager.config['master-mode']:
log('performing master mode analysis ...', "green", attrs=['bold'])
if not isMasterSpeaking():
log('performing master mode analysis ... failed', "red", attrs=['bold'])
if config_manager.config['master-mode-barrier-speech-enabled']:
voice_feedback.speak(config_manager.config['master-mode-barrier-speech'], wait=True)
return
log('performing master mode analysis ... succeeded', "green", attrs=['bold'])
voice_feedback.give_transcription_feedback()
log("transcribing audio data ...")
# transcribing audio ...
# fp16 isn't supported on every CPU using,
# fp32 by default.
result = audio_model.transcribe(wave_output_filename, fp16=False, language='english')

log("analyzing results ...", "magenta", attrs=["bold"])
# analyzing results ...
analyze_text(result["text"].lower().strip())

frames.clear()





# spawning the process
main()
Binary file modified misc/execution-feedback.mp3
Binary file not shown.
Binary file modified misc/exiting-feedback.mp3
Binary file not shown.
Binary file modified misc/greeting.mp3
Binary file not shown.

0 comments on commit 808c5b8

Please sign in to comment.