Merge branch 'main' into CN-language_support

chidiwilliams · Jan 9, 2025 · a0becc2 · a0becc2
2 parents 3973272 + e76c1bb
commit a0becc2
Show file tree

Hide file tree

Showing 56 changed files with 2,120 additions and 624 deletions.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -277,6 +277,7 @@ jobs:
           name: Buzz-${{ runner.os }}-${{ runner.arch }}
 
       - name: Rename .dmg files
+        if: runner.os == 'macOS'
         run: |
           for file in Buzz*.dmg; do
             mv "$file" "${file%.dmg}-${{ runner.arch }}.dmg"

diff --git a/.gitignore b/.gitignore
@@ -5,6 +5,7 @@ build/
 .coverage*
 !.coveragerc
 .env
+.DS_Store
 htmlcov/
 coverage.xml
 .idea/

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -45,7 +45,7 @@ Linux versions get also pushed to the snap. To install latest development versio
 
 1. Clone the repository `git clone --recursive https://github.com/chidiwilliams/buzz.git`
 2. Enter repo folder `cd buzz`
-3. Install Poetry `sudo apt-get install python3-poetry`
+3. Install Poetry `pipx install poetry`
 4. Activate the virtual environment `poetry shell`
 5. Install the dependencies `poetry install`
 6. Install system dependencies you may be missing 

diff --git a/Makefile b/Makefile
@@ -55,7 +55,6 @@ benchmarks: buzz/whisper_cpp.py translation_mo
 
 dist/Buzz dist/Buzz.app: buzz/whisper_cpp.py translation_mo
 	pyinstaller --noconfirm Buzz.spec
-	./dist/Buzz/Buzz --version
 
 version:
 	poetry version ${version}

diff --git a/README.md b/README.md
@@ -56,3 +56,8 @@ sudo snap connect buzz:password-manager-service
 sudo snap connect buzz:pulseaudio
 sudo snap connect buzz:removable-media
 ```
+
+
+### Latest development version
+
+For info on how to get latest development version with latest features and bug fixes see [FAQ](https://chidiwilliams.github.io/buzz/docs/faq#9-where-can-i-get-latest-development-version).
diff --git a/buzz/__version__.py b/buzz/__version__.py
@@ -1 +1 @@
-VERSION = "1.2.0"
+VERSION = "1.3.0"
diff --git a/buzz/assets/url.svg b/buzz/assets/url.svg
diff --git a/buzz/buzz.py b/buzz/buzz.py
@@ -65,4 +65,5 @@ def main():
 
     app = Application(sys.argv)
     parse_command_line(app)
+    app.show_main_window()
     sys.exit(app.exec())
diff --git a/buzz/cli.py b/buzz/cli.py
@@ -1,6 +1,7 @@
 import enum
 import sys
 import typing
+import urllib.parse
 
 from PyQt6.QtCore import QCommandLineParser, QCommandLineOption
 
@@ -44,6 +45,9 @@ def parse_command_line(app: Application):
         print(parser.helpText())
         sys.exit(1)
 
+def is_url(path: str) -> bool:
+    parsed = urllib.parse.urlparse(path)
+    return all([parsed.scheme, parsed.netloc])
 
 def parse(app: Application, parser: QCommandLineParser):
     parser.addPositionalArgument("<command>", "One of the following commands:\n- add")
@@ -109,6 +113,7 @@ def parse(app: Application, parser: QCommandLineParser):
         srt_option = QCommandLineOption(["srt"], "Output result in an SRT file.")
         vtt_option = QCommandLineOption(["vtt"], "Output result in a VTT file.")
         txt_option = QCommandLineOption("txt", "Output result in a TXT file.")
+        hide_gui_option = QCommandLineOption("hide-gui", "Hide the main application window.")
 
         parser.addOptions(
             [
@@ -124,6 +129,7 @@ def parse(app: Application, parser: QCommandLineParser):
                 srt_option,
                 vtt_option,
                 txt_option,
+                hide_gui_option,
             ]
         )
 
@@ -201,21 +207,29 @@ def parse(app: Application, parser: QCommandLineParser):
             word_level_timings=word_timestamps,
             openai_access_token=openai_access_token,
         )
-        file_transcription_options = FileTranscriptionOptions(
-            file_paths=file_paths,
-            output_formats=output_formats,
-        )
 
         for file_path in file_paths:
+            path_is_url = is_url(file_path)
+
+            file_transcription_options = FileTranscriptionOptions(
+                file_paths=[file_path] if not path_is_url else None,
+                url=file_path if path_is_url else None,
+                output_formats=output_formats,
+            )
+
             transcription_task = FileTranscriptionTask(
-                file_path=file_path,
+                file_path=file_path if not path_is_url else None,
+                url=file_path if path_is_url else None,
+                source=FileTranscriptionTask.Source.FILE_IMPORT if not path_is_url else FileTranscriptionTask.Source.URL_IMPORT,
                 model_path=model_path,
                 transcription_options=transcription_options,
                 file_transcription_options=file_transcription_options,
                 output_directory=output_directory if output_directory != "" else None,
             )
-            app.add_task(transcription_task)
+            app.add_task(transcription_task, quit_on_complete=True)
 
+        if parser.isSet(hide_gui_option):
+            app.hide_main_window = True
 
 T = typing.TypeVar("T", bound=enum.Enum)
 

diff --git a/buzz/db/dao/transcription_dao.py b/buzz/db/dao/transcription_dao.py
@@ -1,3 +1,4 @@
+import uuid
 from datetime import datetime
 from uuid import UUID
 
@@ -31,7 +32,9 @@ def create_transcription(self, task: FileTranscriptionTask):
                 time_queued,
                 url,
                 whisper_model_size,
-                hugging_face_model_id
+                hugging_face_model_id,
+                word_level_timings,
+                extract_speech
             ) VALUES (
                 :id,
                 :export_formats,
@@ -45,9 +48,11 @@ def create_transcription(self, task: FileTranscriptionTask):
                 :time_queued,
                 :url,
                 :whisper_model_size,
-                :hugging_face_model_id
+                :hugging_face_model_id,
+                :word_level_timings,
+                :extract_speech
             )
-        """
+            """
         )
         query.bindValue(":id", str(task.uid))
         query.bindValue(
@@ -82,9 +87,78 @@ def create_transcription(self, task: FileTranscriptionTask):
             if task.transcription_options.model.hugging_face_model_id
             else None,
         )
+        query.bindValue(
+            ":word_level_timings",
+            task.transcription_options.word_level_timings
+        )
+        query.bindValue(
+            ":extract_speech",
+            task.transcription_options.extract_speech
+        )
+        if not query.exec():
+            raise Exception(query.lastError().text())
+
+    def copy_transcription(self, id: UUID) -> UUID:
+        query = self._create_query()
+        query.prepare("SELECT * FROM transcription WHERE id = :id")
+        query.bindValue(":id", str(id))
+        if not query.exec():
+            raise Exception(query.lastError().text())
+        if not query.next():
+            raise Exception("Transcription not found")
+
+        transcription_data = {field.name: query.value(field.name) for field in
+                              self.entity.__dataclass_fields__.values()}
+
+        new_id = uuid.uuid4()
+        transcription_data["id"] = str(new_id)
+        transcription_data["time_queued"] = datetime.now().isoformat()
+        transcription_data["status"] = FileTranscriptionTask.Status.QUEUED.value
+
+        query.prepare(
+            """
+            INSERT INTO transcription (
+                id,
+                export_formats,
+                file,
+                output_folder,
+                language,
+                model_type,
+                source,
+                status,
+                task,
+                time_queued,
+                url,
+                whisper_model_size,
+                hugging_face_model_id,
+                word_level_timings,
+                extract_speech
+            ) VALUES (
+                :id,
+                :export_formats,
+                :file,
+                :output_folder,
+                :language,
+                :model_type,
+                :source,
+                :status,
+                :task,
+                :time_queued,
+                :url,
+                :whisper_model_size,
+                :hugging_face_model_id,
+                :word_level_timings,
+                :extract_speech
+            )
+            """
+        )
+        for key, value in transcription_data.items():
+            query.bindValue(f":{key}", value)
         if not query.exec():
             raise Exception(query.lastError().text())
 
+        return new_id
+
     def update_transcription_as_started(self, id: UUID):
         query = self._create_query()
         query.prepare(

diff --git a/buzz/db/entity/transcription.py b/buzz/db/entity/transcription.py
@@ -16,6 +16,8 @@ class Transcription(Entity):
     model_type: str = ModelType.WHISPER.value
     whisper_model_size: str | None = None
     hugging_face_model_id: str | None = None
+    word_level_timings: str | None = None
+    extract_speech: str | None = None
     language: str | None = None
     id: str = field(default_factory=lambda: str(uuid.uuid4()))
     error_message: str | None = None

diff --git a/buzz/db/service/transcription_service.py b/buzz/db/service/transcription_service.py
@@ -19,6 +19,9 @@ def __init__(
     def create_transcription(self, task):
         self.transcription_dao.create_transcription(task)
 
+    def copy_transcription(self, id: UUID) -> UUID:
+        return self.transcription_dao.copy_transcription(id)
+
     def update_transcription_as_started(self, id: UUID):
         self.transcription_dao.update_transcription_as_started(id)
 

diff --git a/buzz/file_transcriber_queue_worker.py b/buzz/file_transcriber_queue_worker.py
@@ -1,6 +1,8 @@
 import logging
 import multiprocessing
 import queue
+import demucs.api
+from pathlib import Path
 from typing import Optional, Tuple, List, Set
 from uuid import UUID
 
@@ -53,6 +55,22 @@ def run(self):
 
             break
 
+        if self.current_task.transcription_options.extract_speech:
+            def separator_progress_callback(progress):
+                self.task_progress.emit(self.current_task, int(progress["segment_offset"] * 100) / int(progress["audio_length"] * 100))
+
+            separator = demucs.api.Separator(
+                progress=True,
+                callback=separator_progress_callback,
+            )
+            _, separated = separator.separate_audio_file(Path(self.current_task.file_path))
+
+            task_file_path = Path(self.current_task.file_path)
+            speech_path = task_file_path.with_name(f"{task_file_path.stem}_speech.flac")
+            demucs.api.save_audio(separated["vocals"], speech_path, samplerate=separator.samplerate)
+
+            self.current_task.file_path = str(speech_path)
+
         logging.debug("Starting next transcription task")
 
         model_type = self.current_task.transcription_options.model.model_type

diff --git a/buzz/locale.py b/buzz/locale.py
@@ -1,3 +1,4 @@
+import os
 import logging
 import gettext
 
@@ -9,10 +10,14 @@
 locale_dir = get_path("locale")
 gettext.bindtextdomain("buzz", locale_dir)
 
-logging.debug(f"UI locales {QLocale().uiLanguages()}")
+custom_locale = os.getenv("BUZZ_LOCALE")
+
+languages = [custom_locale] if custom_locale else QLocale().uiLanguages()
+
+logging.debug(f"UI locales {languages}")
 
 translate = gettext.translation(
-    APP_NAME.lower(), locale_dir, languages=QLocale().uiLanguages(), fallback=True
+    APP_NAME.lower(), locale_dir, languages=languages, fallback=True
 )
 
 _ = translate.gettext