Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
pawamoy committed Feb 4, 2025
1 parent dcc7868 commit e0dc894
Show file tree
Hide file tree
Showing 5 changed files with 243 additions and 93 deletions.
1 change: 1 addition & 0 deletions mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ theme:
- content.tooltips
- navigation.footer
- navigation.indexes
- navigation.instant.preview
- navigation.sections
- navigation.tabs
- navigation.tabs.sticky
Expand Down
146 changes: 107 additions & 39 deletions src/mkdocs_autorefs/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,21 @@
import contextlib
import functools
import logging
from collections import defaultdict
from pathlib import PurePosixPath as URL # noqa: N814
from typing import TYPE_CHECKING, Any, Callable
from urllib.parse import urlsplit
from warnings import warn

from mkdocs.config.base import Config
from mkdocs.config.config_options import Type
from mkdocs.plugins import BasePlugin
from mkdocs.plugins import BasePlugin, event_priority
from mkdocs.structure.pages import Page
from mkdocs.structure.files import Files
from mkdocs.structure.nav import Section
from jinja2.environment import Environment

from mkdocs_autorefs.references import AutorefsExtension, fix_refs, relative_url
from mkdocs_autorefs.references import AutorefsExtension, URLAndTitle, _find_backlinks, fix_refs, relative_url

if TYPE_CHECKING:
from collections.abc import Sequence
Expand All @@ -43,6 +47,15 @@
log = logging.getLogger(f"mkdocs.plugins.{__name__}") # type: ignore[assignment]


# TODO: BACKLINKS: Record URLs directly. It's wrong to record ids and use them later
# to fetch all associated URLs: not all these URLs link to the cross-ref'd object.
# Also, don't store URLs + titles, only store URLs in maps, and store titles in a separate dict.
# Also also, backlinks should be fetched for all aliases of a given identifier,
# not just for this specific identifier. For example, mkdocstrings-python will create
# an autoref for a parameter default value with `used-by` type and `object.canonical.path` as id,
# But if we don't render the object with this canonical path but instead `object.path`,
# then we won't find the backlinks for it.

class AutorefsConfig(Config):
"""Configuration options for the `autorefs` plugin."""

Expand Down Expand Up @@ -76,7 +89,7 @@ class AutorefsPlugin(BasePlugin[AutorefsConfig]):
"""

scan_toc: bool = True
current_page: str | None = None
current_page: Page | None = None
# YORE: Bump 2: Remove line.
legacy_refs: bool = True

Expand Down Expand Up @@ -111,7 +124,9 @@ def __init__(self) -> None:
# This logic unfolds in `_get_item_url`.
self._primary_url_map: dict[str, list[str]] = {}
self._secondary_url_map: dict[str, list[str]] = {}
self._title_map: dict[str, str] = {}
self._abs_url_map: dict[str, str] = {}
self._backlinks: dict[str, dict[str, set[str]]] = defaultdict(lambda: defaultdict(set))
# YORE: Bump 2: Remove line.
self._get_fallback_anchor: Callable[[str], tuple[str, ...]] | None = None

Expand All @@ -133,22 +148,69 @@ def get_fallback_anchor(self, value: Callable[[str], tuple[str, ...]] | None) ->
stacklevel=2,
)

def register_anchor(self, page: str, identifier: str, anchor: str | None = None, *, primary: bool = True) -> None:
def _record_backlink(self, identifier: str, backlink_type: str, backlink_anchor: str, page_url: str) -> None:
"""Record a backlink.
Arguments:
identifier: The target identifier.
backlink_type: The type of backlink.
backlink_anchor: The backlink target anchor.
page_url: The URL of the page containing the backlink.
"""
if identifier in self._primary_url_map or identifier in self._secondary_url_map:
self._backlinks[identifier][backlink_type].add(f"{page_url}#{backlink_anchor}")

def get_backlinks(self, *identifiers: str, from_url: str) -> dict[str, set[URLAndTitle]]:
"""Return the backlinks to an identifier relative to the given URL.
Arguments:
*identifiers: The identifiers to get backlinks for.
from_url: The URL of the page where backlinks are rendered.
Returns:
A dictionary of backlinks, with the type of reference as key and a list of URLs as value.
"""
relative_backlinks: dict[str, set[URLAndTitle]] = defaultdict(set)
for identifier in identifiers:
backlinks = self._backlinks.get(identifier, {})
for backlink_type, backlink_urls in backlinks.items():
for backlink_url in backlink_urls:
relative_backlinks[backlink_type].add((relative_url(from_url, backlink_url), self._title_map[backlink_url]))
return relative_backlinks

def _breadcrumbs(self, page: Page | Section, title: str) -> str:
breadcrumbs = [title, page.title]
while page.parent:
page = page.parent
breadcrumbs.append(page.title)
return " ❭ ".join(reversed(breadcrumbs))

def register_anchor(
self,
identifier: str,
anchor: str | None = None,
*,
title: str | None = None,
primary: bool = True,
) -> None:
"""Register that an anchor corresponding to an identifier was encountered when rendering the page.
Arguments:
page: The relative URL of the current page. Examples: `'foo/bar/'`, `'foo/index.html'`
identifier: The identifier to register.
anchor: The anchor on the page, without `#`. If not provided, defaults to the identifier.
title: The title of the anchor (optional).
primary: Whether this anchor is the primary one for the identifier.
"""
page_anchor = f"{page}#{anchor or identifier}"
page_anchor = f"{self.current_page.url}#{anchor or identifier}"
url_map = self._primary_url_map if primary else self._secondary_url_map
if identifier in url_map:
if page_anchor not in url_map[identifier]:
url_map[identifier].append(page_anchor)
else:
url_map[identifier] = [page_anchor]
if title and page_anchor not in self._title_map:
title = self._breadcrumbs(self.current_page, title) if self.current_page else title
self._title_map[page_anchor] = title

def register_url(self, identifier: str, url: str) -> None:
"""Register that the identifier should be turned into a link to this URL.
Expand Down Expand Up @@ -240,7 +302,7 @@ def get_item_url(
from_url: str | None = None,
# YORE: Bump 2: Remove line.
fallback: Callable[[str], Sequence[str]] | None = None,
) -> str:
) -> URLAndTitle:
"""Return a site-relative URL with anchor to the identifier, if it's present anywhere.
Arguments:
Expand All @@ -252,11 +314,12 @@ def get_item_url(
"""
# YORE: Bump 2: Replace `, fallback` with `` within line.
url = self._get_item_url(identifier, from_url, fallback)
title = self._title_map.get(url) or None
if from_url is not None:
parsed = urlsplit(url)
if not parsed.scheme and not parsed.netloc:
return relative_url(from_url, url)
return url
url = relative_url(from_url, url)
return url, title

def on_config(self, config: MkDocsConfig) -> MkDocsConfig | None:
"""Instantiate our Markdown extension.
Expand Down Expand Up @@ -287,7 +350,7 @@ def on_page_markdown(self, markdown: str, page: Page, **kwargs: Any) -> str: #
The same Markdown. We only use this hook to keep a reference to the current page URL,
used during Markdown conversion by the anchor scanner tree processor.
"""
self.current_page = page.url
self.current_page = page
return markdown

def on_page_content(self, html: str, page: Page, **kwargs: Any) -> str: # noqa: ARG002
Expand All @@ -306,56 +369,61 @@ def on_page_content(self, html: str, page: Page, **kwargs: Any) -> str: # noqa:
Returns:
The same HTML. We only use this hook to map anchors to URLs.
"""
self.current_page = page
# Collect `std`-domain URLs.
if self.scan_toc:
log.debug("Mapping identifiers to URLs for page %s", page.file.src_path)
for item in page.toc.items:
self.map_urls(page.url, item)
self.map_urls(item)
return html

def map_urls(self, base_url: str, anchor: AnchorLink) -> None:
def map_urls(self, anchor: AnchorLink) -> None:
"""Recurse on every anchor to map its ID to its absolute URL.
This method populates `self._primary_url_map` by side-effect.
Arguments:
base_url: The base URL to use as a prefix for each anchor's relative URL.
anchor: The anchor to process and to recurse on.
"""
self.register_anchor(base_url, anchor.id, primary=True)
self.register_anchor(anchor.id, title=anchor.title, primary=True)
for child in anchor.children:
self.map_urls(base_url, child)
self.map_urls(child)

def on_post_page(self, output: str, page: Page, **kwargs: Any) -> str: # noqa: ARG002
"""Fix cross-references.
@event_priority(-50) # Late, after mkdocstrings has finished loading inventories.
def on_env(self, env: Environment, /, *, config: MkDocsConfig, files: Files) -> Environment:
"""Apply cross-references and collect backlinks.
Hook for the [`on_post_page` event](https://www.mkdocs.org/user-guide/plugins/#on_post_page).
Hook for the [`on_env` event](https://www.mkdocs.org/user-guide/plugins/#on_env).
In this hook, we try to fix unresolved references of the form `[title][identifier]` or `[identifier][]`.
Doing that allows the user of `autorefs` to cross-reference objects in their documentation strings.
It uses the native Markdown syntax so it's easy to remember and use.
We log a warning for each reference that we couldn't map to an URL, but try to be smart and ignore identifiers
that do not look legitimate (sometimes documentation can contain strings matching
our [`AUTO_REF_RE`][mkdocs_autorefs.references.AUTO_REF_RE] regular expression that did not intend to reference anything).
We currently ignore references when their identifier contains a space or a slash.
We log a warning for each reference that we couldn't map to an URL.
We also collect backlinks at the same time. We fix cross-refs and collect backlinks in a single pass
for performance reasons (we don't want to run the regular expression on each page twice).
Arguments:
output: HTML converted from Markdown.
page: The related MkDocs page instance.
kwargs: Additional arguments passed by MkDocs.
env: The MkDocs environment.
config: The MkDocs config object.
files: The list of files in the MkDocs project.
Returns:
Modified HTML.
The unmodified environment.
"""
log.debug("Fixing references in page %s", page.file.src_path)

# YORE: Bump 2: Replace `, fallback=self.get_fallback_anchor` with `` within line.
url_mapper = functools.partial(self.get_item_url, from_url=page.url, fallback=self.get_fallback_anchor)
# YORE: Bump 2: Replace `, _legacy_refs=self.legacy_refs` with `` within line.
fixed_output, unmapped = fix_refs(output, url_mapper, _legacy_refs=self.legacy_refs)

if unmapped and log.isEnabledFor(logging.WARNING):
for ref, context in unmapped:
message = f"from {context.filepath}:{context.lineno}: ({context.origin}) " if context else ""
log.warning(f"{page.file.src_path}: {message}Could not find cross-reference target '{ref}'")

return fixed_output
for file in files:
if file.page and file.page.content:
log.debug("Applying cross-refs in page %s", file.page.file.src_path)

# YORE: Bump 2: Replace `, fallback=self.get_fallback_anchor` with `` within line.
url_mapper = functools.partial(self.get_item_url, from_url=file.page.url, fallback=self.get_fallback_anchor)
backlink_recorder = functools.partial(self._record_backlink, page_url=file.page.url)
# YORE: Bump 2: Replace `, _legacy_refs=self.legacy_refs` with `` within line.
file.page.content, unmapped = fix_refs(file.page.content, url_mapper, record_backlink=backlink_recorder, _legacy_refs=self.legacy_refs)

if unmapped and log.isEnabledFor(logging.WARNING):
for ref, context in unmapped:
message = f"from {context.filepath}:{context.lineno}: ({context.origin}) " if context else ""
log.warning(f"{file.page.file.src_path}: {message}Could not find cross-reference target '{ref}'")

return env
Loading

0 comments on commit e0dc894

Please sign in to comment.