Skip to content

Commit

Permalink
0.1 version of plugin
Browse files Browse the repository at this point in the history
  • Loading branch information
degiz committed Dec 26, 2022
1 parent 786788b commit 60fdef5
Show file tree
Hide file tree
Showing 14 changed files with 425 additions and 219 deletions.
3 changes: 3 additions & 0 deletions .flake8
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[flake8]
max-line-length = 88
extend-ignore = D100, D104
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,5 @@ __pycache__

.DS_Store
**.sqlite*
*.org
*.zip
16 changes: 16 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
build:
zip Kobo2Calibre.zip \
converter.py \
db.py \
plugin.py \
__init__.py \
kobo2calibre.py \
plugin-import-name-kobo2calibre.txt \
images/icon.png

run:
calibre-customize -b $(shell pwd); calibre

test:
flake8 .
rm -rf .mypy_cache && mypy . --explicit-package-bases --namespace-packages
10 changes: 7 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,25 @@

Embed highlights from Kobo device in Calibre book. Tested on the books converted using [calibre-kobo-driver](https://github.com/jgoguen/calibre-kobo-driver).

The script will:
The plugin will:

- import your highlights from the Kobo device DB
- try to match the highlights with books from your Calibre library
- insert highlights into the Calibre database so that you can further edit them using a fantastic Calibre book viewer

# installation
# Installation as Calibre plugin

Check the releases section, and download the latest `Kobo2Calibre.zip`. Install it as any other Calibre plugin. Make sure to add the plugin to `toolbar when a device is connected`.

# installation for CLI usage

The script itself doesn't have dependencies, but if you want to contribute, you can use `poetry install` to install all the dev dependencies:

```bash
poetry update && poetry install
```

# Usage
## Usage

**Warning: this script is in the alpha stage; please back up your Calibre library before using it!**

Expand Down
19 changes: 12 additions & 7 deletions __init__.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,29 @@
from typing import Optional

from calibre.customize import InterfaceActionBase
from PyQt6.QtWidgets import QWidget


class Kobo2Calibre(InterfaceActionBase):
"""Base class for Calibre plugin."""

name = "Kobo2Calibre"
description = "Embed highlights from Kobo device into matching books in Calibre"
supported_platforms = ["windows", "osx", "linux"]
author = "Alexander Khizov"
version = (0, 0, 1)
version = (0, 1, 0)
minimum_calibre_version = (6, 10, 0)

actual_plugin = "calibre_plugins.kobo2calibre.ui:Kobo2CalibrePlugin"
actual_plugin = "calibre_plugins.kobo2calibre.plugin:Kobo2CalibrePlugin"

def is_customizable(self) -> bool:
"""Return True if the plugin has a configuration dialog."""
return False

def config_widget(self): # type: ignore
def config_widget(self) -> Optional[QWidget]:
"""Return the configuration widget."""
return None

def save_settings(self, config_widget) -> None: # type: ignore
def save_settings(self, _: QWidget) -> None:
"""Save the settings from the config widget."""
pass

def load_icon(self): # type: ignore
return None
58 changes: 0 additions & 58 deletions calibre.py

This file was deleted.

117 changes: 102 additions & 15 deletions converter.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,112 @@
import pathlib
import logging
import pathlib
import re
import tempfile
import time
import uuid
from typing import Optional
import zipfile
from datetime import datetime
import time
from sentence_tokenizer import REGEX_KTE
from typing import Dict, List, Optional, Tuple

import bs4

from bs4 import BeautifulSoup

import db
from db import CalibreHighlight

try:
# For calibre gui plugin
from calibre_plugins.kobo2calibre import db # pyright: reportMissingImports=false
except ImportError:
# For cli
import db # type: ignore

logger = logging.getLogger(__name__)

# A regex that will most likely work on books converted with KTE plugin
REGEX_KTE = re.compile(
r'(\s*.*?[\.\!\?\:][\'"\u201c\u201d\u2018\u2019\u2026]?\s*)',
re.UNICODE | re.MULTILINE,
)


def get_spine_index_map(
root_dir: pathlib.Path,
) -> Tuple[Dict[str, int], Dict[str, str]]:
"""Get the spine index map from the content.opf file."""
content_file = [f for f in root_dir.rglob("content.opf")][0]
with open(str(content_file)) as f:
soup = bs4.BeautifulSoup(f.read(), "html.parser")

# Read spine
spine_ids = [
s["idref"]
for s in soup.package.spine.children
if type(s) == bs4.element.Tag
]
spine_index = {idref: i for i, idref in enumerate(spine_ids)}

logger.debug(f"Spine index: {spine_index}")

# Read manifest
hrefs = [
s
for s in soup.package.manifest
if type(s) == bs4.element.Tag
and "application/xhtml" in s["media-type"]
and (s["id"] in spine_ids)
]
logger.debug(f"Found {len(hrefs)} hrefs")
result = {}
fixed_paths = {}
for h in hrefs:
final_href = h["href"]
if not pathlib.Path(root_dir / final_href).exists():
path = [r for r in root_dir.rglob(f"{h['href'].split('/')[-1]}")][0]
final_href = str(path.relative_to(root_dir))
fixed_paths[h["href"]] = final_href
result[final_href] = spine_index[h["id"]]

return result, fixed_paths


def process_calibre_epub(
book_calibre_epub: pathlib.Path, book_id: int, highlights: List[db.KoboHighlight]
) -> List[db.CalibreHighlight]:
"""Process a calibre epub file and return a list of highlights."""
result = []
with tempfile.TemporaryDirectory() as tmpdirname:
with zipfile.ZipFile(book_calibre_epub, "r") as zip_ref:
zip_ref.extractall(tmpdirname)

try:
spine_index_map, fixed_path = get_spine_index_map(
pathlib.Path(tmpdirname)
)

logger.debug(f"Spine index map: {spine_index_map}")

count = 0
for i, h in enumerate(highlights):
if h.content_path in fixed_path:
highlights[i] = highlights[i]._replace(
content_path=fixed_path[h.content_path]
)
calibre_highlight = parse_kobo_highlights(
tmpdirname, h, book_id, spine_index_map
)
if calibre_highlight:
result.append(calibre_highlight)
logger.debug(f"Found highlight: {calibre_highlight}")
count += 1
logger.debug(f"..found {count} highlights")
except Exception as e:
logger.error(
f"..failed to convert the highlights: {e} "
f"book: {book_calibre_epub}"
)
return result


def get_prev_sentences_offset(node, n_sentences_offset) -> int:
"""Get the offset of the previous n sentences."""
logger.debug(
"Getting prev sentences offset, node: %s, offset: %d", node, n_sentences_offset
)
Expand All @@ -34,6 +123,7 @@ def get_prev_sentences_offset(node, n_sentences_offset) -> int:


def encode_cfi(target_node, target_offset) -> str:
"""Encode a CFI for calibre."""
logger.debug(
"Encoding CFI, target_node: %s, target_offset: %s", target_node, target_offset
)
Expand Down Expand Up @@ -73,7 +163,8 @@ def encode_cfi(target_node, target_offset) -> str:

def parse_kobo_highlights(
book_prefix, highlight, book_id, spine_index_map
) -> Optional[CalibreHighlight]:
) -> Optional[db.CalibreHighlight]:
"""Parse a kobo highlight and return a calibre highlight."""
kobo_n_tag_start, kobo_n_sentence_start = [
int(i) for i in highlight.start_path.split("\\.")[1:]
]
Expand Down Expand Up @@ -109,19 +200,15 @@ def parse_kobo_highlights(

if (
not isinstance(child, bs4.element.NavigableString)
or isinstance(child, bs4.element.Comment)
or str(child) == "\n"
or str(child) == " "
or str(child) == "\u00A0" # non-breaking space, used in the tables
or str(child).strip() == ""
):
continue

if "30_rm_draft-3-4" in str(input_filename):
logger.debug(
f"input_filename: {input_filename}\n"
f"n_tag: {n_tag}\n"
f"child: {child}"
)
logger.debug(f"Including tag #{n_tag}: {child}")

if n_tag == kobo_n_tag_start:
target_start_node = (str(child), child)
Expand Down
Loading

0 comments on commit 60fdef5

Please sign in to comment.