wip

mkdocstrings · Feb 4, 2025 · e0dc894 · e0dc894
1 parent dcc7868
commit e0dc894
Show file tree

Hide file tree

Showing 5 changed files with 243 additions and 93 deletions.
diff --git a/mkdocs.yml b/mkdocs.yml
@@ -40,6 +40,7 @@ theme:
   - content.tooltips
   - navigation.footer
   - navigation.indexes
+  - navigation.instant.preview
   - navigation.sections
   - navigation.tabs
   - navigation.tabs.sticky

diff --git a/src/mkdocs_autorefs/plugin.py b/src/mkdocs_autorefs/plugin.py
@@ -15,17 +15,21 @@
 import contextlib
 import functools
 import logging
+from collections import defaultdict
 from pathlib import PurePosixPath as URL  # noqa: N814
 from typing import TYPE_CHECKING, Any, Callable
 from urllib.parse import urlsplit
 from warnings import warn
 
 from mkdocs.config.base import Config
 from mkdocs.config.config_options import Type
-from mkdocs.plugins import BasePlugin
+from mkdocs.plugins import BasePlugin, event_priority
 from mkdocs.structure.pages import Page
+from mkdocs.structure.files import Files
+from mkdocs.structure.nav import Section
+from jinja2.environment import Environment
 
-from mkdocs_autorefs.references import AutorefsExtension, fix_refs, relative_url
+from mkdocs_autorefs.references import AutorefsExtension, URLAndTitle, _find_backlinks, fix_refs, relative_url
 
 if TYPE_CHECKING:
     from collections.abc import Sequence
@@ -43,6 +47,15 @@
     log = logging.getLogger(f"mkdocs.plugins.{__name__}")  # type: ignore[assignment]
 
 
+# TODO: BACKLINKS: Record URLs directly. It's wrong to record ids and use them later
+# to fetch all associated URLs: not all these URLs link to the cross-ref'd object.
+# Also, don't store URLs + titles, only store URLs in maps, and store titles in a separate dict.
+# Also also, backlinks should be fetched for all aliases of a given identifier,
+# not just for this specific identifier. For example, mkdocstrings-python will create
+# an autoref for a parameter default value with `used-by` type and `object.canonical.path` as id,
+# But if we don't render the object with this canonical path but instead `object.path`,
+# then we won't find the backlinks for it.
+
 class AutorefsConfig(Config):
     """Configuration options for the `autorefs` plugin."""
 
@@ -76,7 +89,7 @@ class AutorefsPlugin(BasePlugin[AutorefsConfig]):
     """
 
     scan_toc: bool = True
-    current_page: str | None = None
+    current_page: Page | None = None
     # YORE: Bump 2: Remove line.
     legacy_refs: bool = True
 
@@ -111,7 +124,9 @@ def __init__(self) -> None:
         # This logic unfolds in `_get_item_url`.
         self._primary_url_map: dict[str, list[str]] = {}
         self._secondary_url_map: dict[str, list[str]] = {}
+        self._title_map: dict[str, str] = {}
         self._abs_url_map: dict[str, str] = {}
+        self._backlinks: dict[str, dict[str, set[str]]] = defaultdict(lambda: defaultdict(set))
         # YORE: Bump 2: Remove line.
         self._get_fallback_anchor: Callable[[str], tuple[str, ...]] | None = None
 
@@ -133,22 +148,69 @@ def get_fallback_anchor(self, value: Callable[[str], tuple[str, ...]] | None) ->
                 stacklevel=2,
             )
 
-    def register_anchor(self, page: str, identifier: str, anchor: str | None = None, *, primary: bool = True) -> None:
+    def _record_backlink(self, identifier: str, backlink_type: str, backlink_anchor: str, page_url: str) -> None:
+        """Record a backlink.
+
+        Arguments:
+            identifier: The target identifier.
+            backlink_type: The type of backlink.
+            backlink_anchor: The backlink target anchor.
+            page_url: The URL of the page containing the backlink.
+        """
+        if identifier in self._primary_url_map or identifier in self._secondary_url_map:
+            self._backlinks[identifier][backlink_type].add(f"{page_url}#{backlink_anchor}")
+
+    def get_backlinks(self, *identifiers: str, from_url: str) -> dict[str, set[URLAndTitle]]:
+        """Return the backlinks to an identifier relative to the given URL.
+
+        Arguments:
+            *identifiers: The identifiers to get backlinks for.
+            from_url: The URL of the page where backlinks are rendered.
+
+        Returns:
+            A dictionary of backlinks, with the type of reference as key and a list of URLs as value.
+        """
+        relative_backlinks: dict[str, set[URLAndTitle]] = defaultdict(set)
+        for identifier in identifiers:
+            backlinks = self._backlinks.get(identifier, {})
+            for backlink_type, backlink_urls in backlinks.items():
+                for backlink_url in backlink_urls:
+                    relative_backlinks[backlink_type].add((relative_url(from_url, backlink_url), self._title_map[backlink_url]))
+        return relative_backlinks
+
+    def _breadcrumbs(self, page: Page | Section, title: str) -> str:
+        breadcrumbs = [title, page.title]
+        while page.parent:
+            page = page.parent
+            breadcrumbs.append(page.title)
+        return " ❭ ".join(reversed(breadcrumbs))
+
+    def register_anchor(
+        self,
+        identifier: str,
+        anchor: str | None = None,
+        *,
+        title: str | None = None,
+        primary: bool = True,
+    ) -> None:
         """Register that an anchor corresponding to an identifier was encountered when rendering the page.
 
         Arguments:
-            page: The relative URL of the current page. Examples: `'foo/bar/'`, `'foo/index.html'`
             identifier: The identifier to register.
             anchor: The anchor on the page, without `#`. If not provided, defaults to the identifier.
+            title: The title of the anchor (optional).
             primary: Whether this anchor is the primary one for the identifier.
         """
-        page_anchor = f"{page}#{anchor or identifier}"
+        page_anchor = f"{self.current_page.url}#{anchor or identifier}"
         url_map = self._primary_url_map if primary else self._secondary_url_map
         if identifier in url_map:
             if page_anchor not in url_map[identifier]:
                 url_map[identifier].append(page_anchor)
         else:
             url_map[identifier] = [page_anchor]
+        if title and page_anchor not in self._title_map:
+            title = self._breadcrumbs(self.current_page, title) if self.current_page else title
+            self._title_map[page_anchor] = title
 
     def register_url(self, identifier: str, url: str) -> None:
         """Register that the identifier should be turned into a link to this URL.
@@ -240,7 +302,7 @@ def get_item_url(
         from_url: str | None = None,
         # YORE: Bump 2: Remove line.
         fallback: Callable[[str], Sequence[str]] | None = None,
-    ) -> str:
+    ) -> URLAndTitle:
         """Return a site-relative URL with anchor to the identifier, if it's present anywhere.
 
         Arguments:
@@ -252,11 +314,12 @@ def get_item_url(
         """
         # YORE: Bump 2: Replace `, fallback` with `` within line.
         url = self._get_item_url(identifier, from_url, fallback)
+        title = self._title_map.get(url) or None
         if from_url is not None:
             parsed = urlsplit(url)
             if not parsed.scheme and not parsed.netloc:
-                return relative_url(from_url, url)
-        return url
+                url = relative_url(from_url, url)
+        return url, title
 
     def on_config(self, config: MkDocsConfig) -> MkDocsConfig | None:
         """Instantiate our Markdown extension.
@@ -287,7 +350,7 @@ def on_page_markdown(self, markdown: str, page: Page, **kwargs: Any) -> str:  #
             The same Markdown. We only use this hook to keep a reference to the current page URL,
                 used during Markdown conversion by the anchor scanner tree processor.
         """
-        self.current_page = page.url
+        self.current_page = page
         return markdown
 
     def on_page_content(self, html: str, page: Page, **kwargs: Any) -> str:  # noqa: ARG002
@@ -306,56 +369,61 @@ def on_page_content(self, html: str, page: Page, **kwargs: Any) -> str:  # noqa:
         Returns:
             The same HTML. We only use this hook to map anchors to URLs.
         """
+        self.current_page = page
+        # Collect `std`-domain URLs.
         if self.scan_toc:
             log.debug("Mapping identifiers to URLs for page %s", page.file.src_path)
             for item in page.toc.items:
-                self.map_urls(page.url, item)
+                self.map_urls(item)
         return html
 
-    def map_urls(self, base_url: str, anchor: AnchorLink) -> None:
+    def map_urls(self, anchor: AnchorLink) -> None:
         """Recurse on every anchor to map its ID to its absolute URL.
 
         This method populates `self._primary_url_map` by side-effect.
 
         Arguments:
-            base_url: The base URL to use as a prefix for each anchor's relative URL.
             anchor: The anchor to process and to recurse on.
         """
-        self.register_anchor(base_url, anchor.id, primary=True)
+        self.register_anchor(anchor.id, title=anchor.title, primary=True)
         for child in anchor.children:
-            self.map_urls(base_url, child)
+            self.map_urls(child)
 
-    def on_post_page(self, output: str, page: Page, **kwargs: Any) -> str:  # noqa: ARG002
-        """Fix cross-references.
+    @event_priority(-50)  # Late, after mkdocstrings has finished loading inventories.
+    def on_env(self, env:  Environment, /, *, config: MkDocsConfig, files: Files) -> Environment:
+        """Apply cross-references and collect backlinks.
 
-        Hook for the [`on_post_page` event](https://www.mkdocs.org/user-guide/plugins/#on_post_page).
+        Hook for the [`on_env` event](https://www.mkdocs.org/user-guide/plugins/#on_env).
         In this hook, we try to fix unresolved references of the form `[title][identifier]` or `[identifier][]`.
         Doing that allows the user of `autorefs` to cross-reference objects in their documentation strings.
         It uses the native Markdown syntax so it's easy to remember and use.
 
-        We log a warning for each reference that we couldn't map to an URL, but try to be smart and ignore identifiers
-        that do not look legitimate (sometimes documentation can contain strings matching
-        our [`AUTO_REF_RE`][mkdocs_autorefs.references.AUTO_REF_RE] regular expression that did not intend to reference anything).
-        We currently ignore references when their identifier contains a space or a slash.
+        We log a warning for each reference that we couldn't map to an URL.
+
+        We also collect backlinks at the same time. We fix cross-refs and collect backlinks in a single pass
+        for performance reasons (we don't want to run the regular expression on each page twice).
 
         Arguments:
-            output: HTML converted from Markdown.
-            page: The related MkDocs page instance.
-            kwargs: Additional arguments passed by MkDocs.
+            env: The MkDocs environment.
+            config: The MkDocs config object.
+            files: The list of files in the MkDocs project.
 
         Returns:
-            Modified HTML.
+            The unmodified environment.
         """
-        log.debug("Fixing references in page %s", page.file.src_path)
-
-        # YORE: Bump 2: Replace `, fallback=self.get_fallback_anchor` with `` within line.
-        url_mapper = functools.partial(self.get_item_url, from_url=page.url, fallback=self.get_fallback_anchor)
-        # YORE: Bump 2: Replace `, _legacy_refs=self.legacy_refs` with `` within line.
-        fixed_output, unmapped = fix_refs(output, url_mapper, _legacy_refs=self.legacy_refs)
-
-        if unmapped and log.isEnabledFor(logging.WARNING):
-            for ref, context in unmapped:
-                message = f"from {context.filepath}:{context.lineno}: ({context.origin}) " if context else ""
-                log.warning(f"{page.file.src_path}: {message}Could not find cross-reference target '{ref}'")
-
-        return fixed_output
+        for file in files:
+            if file.page and file.page.content:
+                log.debug("Applying cross-refs in page %s", file.page.file.src_path)
+
+                # YORE: Bump 2: Replace `, fallback=self.get_fallback_anchor` with `` within line.
+                url_mapper = functools.partial(self.get_item_url, from_url=file.page.url, fallback=self.get_fallback_anchor)
+                backlink_recorder = functools.partial(self._record_backlink, page_url=file.page.url)
+                # YORE: Bump 2: Replace `, _legacy_refs=self.legacy_refs` with `` within line.
+                file.page.content, unmapped = fix_refs(file.page.content, url_mapper, record_backlink=backlink_recorder, _legacy_refs=self.legacy_refs)
+
+                if unmapped and log.isEnabledFor(logging.WARNING):
+                    for ref, context in unmapped:
+                        message = f"from {context.filepath}:{context.lineno}: ({context.origin}) " if context else ""
+                        log.warning(f"{file.page.file.src_path}: {message}Could not find cross-reference target '{ref}'")
+
+        return env