0.1 version of plugin

degiz · Dec 26, 2022 · 60fdef5 · 60fdef5
1 parent 786788b
commit 60fdef5
Show file tree

Hide file tree

Showing 14 changed files with 425 additions and 219 deletions.
diff --git a/.flake8 b/.flake8
@@ -0,0 +1,3 @@
+[flake8]
+max-line-length = 88
+extend-ignore = D100, D104
diff --git a/.gitignore b/.gitignore
@@ -8,3 +8,5 @@ __pycache__
 
 .DS_Store
 **.sqlite*
+*.org
+*.zip
diff --git a/Makefile b/Makefile
@@ -0,0 +1,16 @@
+build:
+	zip Kobo2Calibre.zip \
+		converter.py \
+		db.py \
+		plugin.py \
+		__init__.py \
+		kobo2calibre.py \
+		plugin-import-name-kobo2calibre.txt \
+		images/icon.png
+
+run:
+	calibre-customize -b $(shell pwd); calibre
+
+test:
+	flake8 .
+	rm -rf .mypy_cache && mypy . --explicit-package-bases --namespace-packages
diff --git a/README.md b/README.md
@@ -2,21 +2,25 @@
 
 Embed highlights from Kobo device in Calibre book. Tested on the books converted using [calibre-kobo-driver](https://github.com/jgoguen/calibre-kobo-driver).
 
-The script will:
+The plugin will:
 
 - import your highlights from the Kobo device DB
 - try to match the highlights with books from your Calibre library
 - insert highlights into the Calibre database so that you can further edit them using a fantastic Calibre book viewer
 
-# installation
+# Installation as Calibre plugin
+
+Check the releases section, and download the latest `Kobo2Calibre.zip`. Install it as any other Calibre plugin. Make sure to add the plugin to `toolbar when a device is connected`.
+
+# installation for CLI usage
 
 The script itself doesn't have dependencies, but if you want to contribute, you can use `poetry install` to install all the dev dependencies:
 
 ```bash
 poetry update && poetry install
 ```
 
-# Usage
+## Usage
 
 **Warning: this script is in the alpha stage; please back up your Calibre library before using it!**
 

diff --git a/__init__.py b/__init__.py
@@ -1,24 +1,29 @@
+from typing import Optional
+
 from calibre.customize import InterfaceActionBase
+from PyQt6.QtWidgets import QWidget
 
 
 class Kobo2Calibre(InterfaceActionBase):
+    """Base class for Calibre plugin."""
+
     name = "Kobo2Calibre"
     description = "Embed highlights from Kobo device into matching books in Calibre"
     supported_platforms = ["windows", "osx", "linux"]
     author = "Alexander Khizov"
-    version = (0, 0, 1)
+    version = (0, 1, 0)
     minimum_calibre_version = (6, 10, 0)
 
-    actual_plugin = "calibre_plugins.kobo2calibre.ui:Kobo2CalibrePlugin"
+    actual_plugin = "calibre_plugins.kobo2calibre.plugin:Kobo2CalibrePlugin"
 
     def is_customizable(self) -> bool:
+        """Return True if the plugin has a configuration dialog."""
         return False
 
-    def config_widget(self):  # type: ignore
+    def config_widget(self) -> Optional[QWidget]:
+        """Return the configuration widget."""
         return None
 
-    def save_settings(self, config_widget) -> None:  # type: ignore
+    def save_settings(self, _: QWidget) -> None:
+        """Save the settings from the config widget."""
         pass
-
-    def load_icon(self):  # type: ignore
-        return None
diff --git a/calibre.py b/calibre.py
diff --git a/converter.py b/converter.py
@@ -1,23 +1,112 @@
-import pathlib
 import logging
+import pathlib
+import re
+import tempfile
+import time
 import uuid
-from typing import Optional
+import zipfile
 from datetime import datetime
-import time
-from sentence_tokenizer import REGEX_KTE
+from typing import Dict, List, Optional, Tuple
 
 import bs4
-
 from bs4 import BeautifulSoup
 
-import db
-from db import CalibreHighlight
-
+try:
+    # For calibre gui plugin
+    from calibre_plugins.kobo2calibre import db  # pyright: reportMissingImports=false
+except ImportError:
+    # For cli
+    import db  # type: ignore
 
 logger = logging.getLogger(__name__)
 
+# A regex that will most likely work on books converted with KTE plugin
+REGEX_KTE = re.compile(
+    r'(\s*.*?[\.\!\?\:][\'"\u201c\u201d\u2018\u2019\u2026]?\s*)',
+    re.UNICODE | re.MULTILINE,
+)
+
+
+def get_spine_index_map(
+    root_dir: pathlib.Path,
+) -> Tuple[Dict[str, int], Dict[str, str]]:
+    """Get the spine index map from the content.opf file."""
+    content_file = [f for f in root_dir.rglob("content.opf")][0]
+    with open(str(content_file)) as f:
+        soup = bs4.BeautifulSoup(f.read(), "html.parser")
+
+        # Read spine
+        spine_ids = [
+            s["idref"]
+            for s in soup.package.spine.children
+            if type(s) == bs4.element.Tag
+        ]
+        spine_index = {idref: i for i, idref in enumerate(spine_ids)}
+
+        logger.debug(f"Spine index: {spine_index}")
+
+        # Read manifest
+        hrefs = [
+            s
+            for s in soup.package.manifest
+            if type(s) == bs4.element.Tag
+            and "application/xhtml" in s["media-type"]
+            and (s["id"] in spine_ids)
+        ]
+        logger.debug(f"Found {len(hrefs)} hrefs")
+        result = {}
+        fixed_paths = {}
+        for h in hrefs:
+            final_href = h["href"]
+            if not pathlib.Path(root_dir / final_href).exists():
+                path = [r for r in root_dir.rglob(f"{h['href'].split('/')[-1]}")][0]
+                final_href = str(path.relative_to(root_dir))
+                fixed_paths[h["href"]] = final_href
+            result[final_href] = spine_index[h["id"]]
+
+        return result, fixed_paths
+
+
+def process_calibre_epub(
+    book_calibre_epub: pathlib.Path, book_id: int, highlights: List[db.KoboHighlight]
+) -> List[db.CalibreHighlight]:
+    """Process a calibre epub file and return a list of highlights."""
+    result = []
+    with tempfile.TemporaryDirectory() as tmpdirname:
+        with zipfile.ZipFile(book_calibre_epub, "r") as zip_ref:
+            zip_ref.extractall(tmpdirname)
+
+            try:
+                spine_index_map, fixed_path = get_spine_index_map(
+                    pathlib.Path(tmpdirname)
+                )
+
+                logger.debug(f"Spine index map: {spine_index_map}")
+
+                count = 0
+                for i, h in enumerate(highlights):
+                    if h.content_path in fixed_path:
+                        highlights[i] = highlights[i]._replace(
+                            content_path=fixed_path[h.content_path]
+                        )
+                    calibre_highlight = parse_kobo_highlights(
+                        tmpdirname, h, book_id, spine_index_map
+                    )
+                    if calibre_highlight:
+                        result.append(calibre_highlight)
+                        logger.debug(f"Found highlight: {calibre_highlight}")
+                        count += 1
+                logger.debug(f"..found {count} highlights")
+            except Exception as e:
+                logger.error(
+                    f"..failed to convert the highlights: {e} "
+                    f"book: {book_calibre_epub}"
+                )
+    return result
+
 
 def get_prev_sentences_offset(node, n_sentences_offset) -> int:
+    """Get the offset of the previous n sentences."""
     logger.debug(
         "Getting prev sentences offset, node: %s, offset: %d", node, n_sentences_offset
     )
@@ -34,6 +123,7 @@ def get_prev_sentences_offset(node, n_sentences_offset) -> int:
 
 
 def encode_cfi(target_node, target_offset) -> str:
+    """Encode a CFI for calibre."""
     logger.debug(
         "Encoding CFI, target_node: %s, target_offset: %s", target_node, target_offset
     )
@@ -73,7 +163,8 @@ def encode_cfi(target_node, target_offset) -> str:
 
 def parse_kobo_highlights(
     book_prefix, highlight, book_id, spine_index_map
-) -> Optional[CalibreHighlight]:
+) -> Optional[db.CalibreHighlight]:
+    """Parse a kobo highlight and return a calibre highlight."""
     kobo_n_tag_start, kobo_n_sentence_start = [
         int(i) for i in highlight.start_path.split("\\.")[1:]
     ]
@@ -109,19 +200,15 @@ def parse_kobo_highlights(
 
             if (
                 not isinstance(child, bs4.element.NavigableString)
+                or isinstance(child, bs4.element.Comment)
                 or str(child) == "\n"
                 or str(child) == " "
                 or str(child) == "\u00A0"  # non-breaking space, used in the tables
                 or str(child).strip() == ""
             ):
                 continue
 
-            if "30_rm_draft-3-4" in str(input_filename):
-                logger.debug(
-                    f"input_filename: {input_filename}\n"
-                    f"n_tag: {n_tag}\n"
-                    f"child: {child}"
-                )
+            logger.debug(f"Including tag #{n_tag}: {child}")
 
             if n_tag == kobo_n_tag_start:
                 target_start_node = (str(child), child)
-Original file line number
+Diff line change
@@ Expand Up / @@ -8,3 +8,5 @@ __pycache__ @@
     .DS_Store
     **.sqlite*
+    *.org
+    *.zip