diff --git a/.idea/modules.xml b/.idea/modules.xml index 80d5e66..57775d3 100644 --- a/.idea/modules.xml +++ b/.idea/modules.xml @@ -4,6 +4,7 @@ + \ No newline at end of file diff --git a/live_mode_manager.py b/live_mode_manager.py index 1b8b8a9..20ad82f 100644 --- a/live_mode_manager.py +++ b/live_mode_manager.py @@ -4,6 +4,7 @@ import numpy as np import soundfile + threshold = [] trainingDataSet = [] @@ -51,3 +52,4 @@ def compare(): print(distance) return distance <= threshold + diff --git a/live_mode_setup.py b/live_mode_setup.py index bdeaed2..80ea501 100644 --- a/live_mode_setup.py +++ b/live_mode_setup.py @@ -129,10 +129,6 @@ def compare(key1, key2): for key in training_data_set: frames = b''.join(training_data_set[key]) - file = open(f'training-data/live_mode_training_data{key}.bin', 'wb') - file.write(frames) - file.close() - wf = wave.open(f'training-data/live_mode_training_audio{key}.wav', 'wb') wf.setnchannels(2) wf.setsampwidth(pyAudio.get_sample_size(pyaudio.paInt16)) diff --git a/lvc_gui_flutter/lib/main.dart b/lvc_gui_flutter/lib/main.dart index c9dc93a..f6a8c8f 100644 --- a/lvc_gui_flutter/lib/main.dart +++ b/lvc_gui_flutter/lib/main.dart @@ -16,6 +16,7 @@ Future main() async { appWindow.minSize = initialSize; appWindow.size = initialSize; appWindow.alignment = Alignment.center; + appWindow.position = Offset(200, 200); appWindow.show(); }); diff --git a/main.py b/main.py index 2b05039..2e82f8e 100755 --- a/main.py +++ b/main.py @@ -28,6 +28,8 @@ print('unable to create training directory') exit(1) +# initializing PyAudio ... +pyAudio = pyaudio.PyAudio() def log(text, color=None, attrs=None): if attrs is None: @@ -70,9 +72,6 @@ def main(model='base', ui='false'): WAVE_OUTPUT_FILENAME = "misc/last-mic-fetch.wav" # default file which will be overwritten in every RECORD_SECONDS SPEECH_THRESHOLD = config_manager.config['speech-threshold'] # speech threshold default 4000 Hz - # initializing PyAudio ... - pyAudio = pyaudio.PyAudio() - # Opening Microphone Stream with above created configuration ... stream = pyAudio.open(format=FORMAT, channels=CHANNELS, @@ -106,11 +105,13 @@ def main(model='base', ui='false'): # well, this is under development, # I don't recommend activating live mode until it is ready! if config_manager.config['live-mode']: + voice_feedback.speak("initializing live mode ...", wait=True) live_mode_manager.init() + voice_feedback.speak("say my name to trigger actions ...", wait=True) while True: frames = [] chunk_array = array('h') - log("listening ...", "blue", attrs=["bold"]) + log("sleeping ...", "blue", attrs=["bold"]) for i in range(0, int(44100 / 1024 * 2)): data = stream.read(1024) frames.append(data) # stacking every audio frame into the list @@ -134,9 +135,11 @@ def main(model='base', ui='false'): log("comparing ...", "blue", attrs=["bold"]) if live_mode_manager.compare(): - voice_feedback.speak('yes sir!', wait=True) - exit(0) + voice_feedback.speak('match test succeeded', wait=True) + log("listening ...", "blue", attrs=['bold']) + listen_for_live_mode(stream, audio_model, CHUNK, FORMAT, CHANNELS, RATE, RECORD_SECONDS, WAVE_OUTPUT_FILENAME, SPEECH_THRESHOLD) else: + log('live mode: match test failed!', "red", attrs=['bold']) voice_feedback.speak('match test failed!', wait=True) frames.clear() @@ -206,5 +209,57 @@ def analyze_text(text): command_manager.launch_if_any(text) +def listen_for_live_mode(stream, audio_model, chunk, audio_format, channels, rate, record_seconds, wave_output_filename, speech_threshold): + frames = [] + chunk_array = array('h') + log("listening ...", "blue", attrs=["bold"]) + for i in range(0, int(rate / chunk * record_seconds)): + data = stream.read(chunk) + frames.append(data) # stacking every audio frame into the list + chunk_array.extend(array('h', data)) + chunk_array = trim(chunk_array) + if len(chunk_array) == 0: # clip is empty + log('no voice') + return + elif max(chunk_array) < speech_threshold: # no voice in clip + log('no speech in clip') + return + print("saving audio ...") + + # writing the wave file + wf = wave.open(wave_output_filename, 'wb') + wf.setnchannels(channels) + wf.setsampwidth(pyAudio.get_sample_size(audio_format)) + wf.setframerate(rate) + wf.writeframes(b''.join(frames)) + wf.close() + + # checking if master control mode is enabled + # and performing audio analysis if enabled + if config_manager.config['master-mode']: + log('performing master mode analysis ...', "green", attrs=['bold']) + if not isMasterSpeaking(): + log('performing master mode analysis ... failed', "red", attrs=['bold']) + if config_manager.config['master-mode-barrier-speech-enabled']: + voice_feedback.speak(config_manager.config['master-mode-barrier-speech'], wait=True) + return + log('performing master mode analysis ... succeeded', "green", attrs=['bold']) + voice_feedback.give_transcription_feedback() + log("transcribing audio data ...") + # transcribing audio ... + # fp16 isn't supported on every CPU using, + # fp32 by default. + result = audio_model.transcribe(wave_output_filename, fp16=False, language='english') + + log("analyzing results ...", "magenta", attrs=["bold"]) + # analyzing results ... + analyze_text(result["text"].lower().strip()) + + frames.clear() + + + + + # spawning the process main() diff --git a/misc/execution-feedback.mp3 b/misc/execution-feedback.mp3 index 7c35885..4702514 100644 Binary files a/misc/execution-feedback.mp3 and b/misc/execution-feedback.mp3 differ diff --git a/misc/exiting-feedback.mp3 b/misc/exiting-feedback.mp3 index dd39f6c..8953d06 100644 Binary files a/misc/exiting-feedback.mp3 and b/misc/exiting-feedback.mp3 differ diff --git a/misc/greeting.mp3 b/misc/greeting.mp3 index 7bbcf42..be358f5 100644 Binary files a/misc/greeting.mp3 and b/misc/greeting.mp3 differ