Skip to content

Commit

Permalink
strip headings by default
Browse files Browse the repository at this point in the history
  • Loading branch information
marph91 committed Feb 2, 2025
1 parent ca22ddc commit 7672fca
Show file tree
Hide file tree
Showing 7 changed files with 38 additions and 19 deletions.
5 changes: 4 additions & 1 deletion src/formats/cacher.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import common
import converter
import intermediate_format as imf
import markdown_lib.common


class Converter(converter.BaseConverter):
Expand All @@ -23,9 +24,11 @@ def convert_note(self, file_: dict, notebook: imf.Notebook, tags: imf.Tags):
return
title = Path(file_["filename"]).stem
self.logger.debug(f'Converting note "{title}"')

_, body = markdown_lib.common.split_title_from_body(file_["content"])
note_imf = imf.Note(
title,
file_["content"],
body,
created=dt.datetime.fromisoformat(file_["createdAt"]),
updated=dt.datetime.fromisoformat(file_["updatedAt"]),
source_application=self.format,
Expand Down
2 changes: 1 addition & 1 deletion src/formats/joplin.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ class Converter(converter.BaseConverter):

@common.catch_all_exceptions
def convert_note(self, markdown: str, metadata_json: dict, parent_id_note_map):
title, body = markdown_lib.common.split_h1_title_from_body(markdown)
title, body = markdown_lib.common.split_title_from_body(markdown, h1=False)
self.logger.debug(f'Converting note "{title}"')
note_imf = imf.Note(
title.strip(),
Expand Down
7 changes: 3 additions & 4 deletions src/formats/notion.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,13 +104,12 @@ def convert_note(

self.logger.debug(f'Converting note "{title}"')
body = item.read_text(encoding="utf-8")
if item.suffix.lower() == ".md":
# first line is title, second is whitespace
body = "\n".join(item.read_text(encoding="utf-8").split("\n")[2:])
else: # html
if item.suffix.lower() == ".html":
# html, else markdown
body = markdown_lib.common.markup_to_markdown(
body, custom_filter=[markdown_lib.html_filter.notion_streamline_lists]
)
_, body = markdown_lib.common.split_title_from_body(body)

# find links
resources, note_links = self.handle_markdown_links(body, item)
Expand Down
4 changes: 2 additions & 2 deletions src/formats/simplenote.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ class Converter(converter.BaseConverter):
@common.catch_all_exceptions
def convert_note(self, note_simplenote):
# title is the first line
title, body = markdown_lib.common.split_h1_title_from_body(
note_simplenote["content"]
title, body = markdown_lib.common.split_title_from_body(
note_simplenote["content"], h1=False
)
self.logger.debug(f'Converting note "{title}"')

Expand Down
6 changes: 4 additions & 2 deletions src/formats/textbundle.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,11 @@ def convert_note(self, file_: Path, parent_notebook: imf.Notebook):
title = file_.parent.stem
self.logger.debug(f'Converting note "{title}"')

note_imf = imf.Note(
title, file_.read_text(encoding="utf-8"), source_application=self.format
# title = first line header
_, body = markdown_lib.common.split_title_from_body(
file_.read_text(encoding="utf-8")
)
note_imf = imf.Note(title, body, source_application=self.format)
note_imf.tags = [
imf.Tag(tag)
for tag in markdown_lib.common.get_inline_tags(note_imf.body, ["#"])
Expand Down
31 changes: 23 additions & 8 deletions src/markdown_lib/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,29 @@
LOGGER = logging.getLogger("jimmy")


def split_h1_title_from_body(markdown_: str) -> tuple[str, str]:
# TODO: doctest
try:
title, body = markdown_.split("\n", maxsplit=1)
except ValueError:
title = markdown_
body = ""
return title.lstrip("# "), body.lstrip()
def split_title_from_body(markdown_: str, h1: bool = True) -> tuple[str, str]:
r"""
>>> split_title_from_body("# heading\n\n b")
('heading', 'b')
>>> split_title_from_body("heading\n\n b")
('', 'heading\n\n b')
>>> split_title_from_body("heading\n\n b", h1=False)
('heading', 'b')
>>> split_title_from_body("😄\n\n# heading")
('', '😄\n\n# heading')
"""
if markdown_.startswith("# ") or not h1:
try:
title, body = markdown_.split("\n", maxsplit=1)
title = title.lstrip("# ")
body = body.lstrip()
except ValueError:
title = markdown_
body = ""
else:
title = ""
body = markdown_
return title, body


@dataclass
Expand Down
2 changes: 1 addition & 1 deletion test/data
Submodule data updated 39 files
+0 −1 reference_data/bear/test_1/2023-10-11T081102Z.md
+0 −2 reference_data/bear/test_1/Archived File.md
+0 −2 reference_data/bear/test_1/File with asset, content, and a tag.md
+0 −1 reference_data/bear/test_1/File with heading only, no content.md
+0 −1 reference_data/bear/test_1/Trashed file.md
+0 −1 reference_data/bear/test_2/2023-10-11T081102Z.md
+0 −2 reference_data/bear/test_2/Archived File.md
+0 −2 reference_data/bear/test_2/File with asset, content, and a tag.md
+0 −1 reference_data/bear/test_2/File with heading only, no content.md
+0 −2 reference_data/bear/test_2/File with two assets.md
+0 −1 reference_data/bear/test_2/Trashed file.md
+0 −2 reference_data/cacher/test_1/0. Welcome to Cacher/welcome.md
+0 −2 reference_data/cacher/test_1/1. Snippets/snippets.md
+0 −2 reference_data/cacher/test_1/2. Labels/labels.md
+0 −2 reference_data/cacher/test_1/3. Teams/teams.md
+0 −2 reference_data/cacher/test_1/4. Integrations/integrations.md
+0 −2 reference_data/cacher/test_1/5. Sharing/sharing.md
+0 −2 reference_data/notion/test_3/Untitled.md
+0 −2 reference_data/notion/test_4/Testing.md
+0 −2 reference_data/notion/test_4/Testing/sub page.md
+0 −2 reference_data/notion/test_5/Testseite.md
+0 −2 reference_data/notion/test_5/Testseite/subpage.md
+0 −2 reference_data/notion/test_5/Testseite/subpage/another subpage.md
+0 −2 reference_data/notion/test_5/Zu Beginn.md
+0 −2 reference_data/notion/test_5/another test note.md
+0 −2 reference_data/notion/test_5/note with image and some other stuff.md
+0 −2 reference_data/notion/test_5/page with link.md
+0 −2 reference_data/notion/test_6/Notion-Testspace.md
+0 −2 reference_data/notion/test_6/Notion-Testspace/Attachment Tests.md
+0 −2 reference_data/notion/test_6/Notion-Testspace/Block Reference.md
+0 −2 reference_data/notion/test_6/Notion-Testspace/Formatting Tests.md
+0 −2 reference_data/textbundle/test_1/Textbundle Example.md
+0 −2 reference_data/textbundle/test_2/Bug report in tables_bear.md
+0 −3 reference_data/textbundle/test_3/Python CHP NOTES.md
+0 −2 reference_data/textbundle/test_4/Textbundle Example v1.md
+0 −2 reference_data/textbundle/test_5/Textbundle Example v2.md
+0 −2 reference_data/textbundle/test_6/Textbundle Example v1/Textbundle Example v1.md
+0 −2 reference_data/textbundle/test_6/Textbundle Example v2/Textbundle Example v2.md
+0 −2 reference_data/textbundle/test_6/example/Textbundle Example.md

0 comments on commit 7672fca

Please sign in to comment.