Add suggestions for malformed links in Markdown

Guess what existing page might have been meant based on the provided link, and mention it in the log message.
2026-03-27 09:58:31 +07:00 · 2023-07-01 15:24:17 +02:00
parent 4150d2b8ce
commit ca5160af27
2 changed files with 111 additions and 26 deletions
--- a/mkdocs/structure/pages.py
+++ b/mkdocs/structure/pages.py
@@ -4,7 +4,7 @@ import copy
 import logging
 import posixpath
 import warnings
-from typing import TYPE_CHECKING, Any, Callable, MutableMapping
+from typing import TYPE_CHECKING, Any, Callable, Iterator, MutableMapping
 from urllib.parse import unquote as urlunquote
 from urllib.parse import urljoin, urlsplit, urlunsplit

@@ -14,6 +14,7 @@ import markdown.postprocessors
 import markdown.treeprocessors
 from markdown.util import AMP_SUBSTITUTE

+from mkdocs import utils
 from mkdocs.structure import StructureItem
 from mkdocs.structure.toc import get_toc
 from mkdocs.utils import get_build_date, get_markdown_title, meta, weak_property
@@ -306,52 +307,109 @@ class _RelativePathTreeprocessor(markdown.treeprocessors.Treeprocessor):

        return root

+    @classmethod
+    def _target_uri(cls, src_path: str, dest_path: str):
+        return posixpath.normpath(posixpath.join(src_path, dest_path).lstrip('/'))
+
+    @classmethod
+    def _target_uris(cls, use_directory_urls: bool, file: File, path: str) -> Iterator[str]:
+        """First yields the resolved file uri for the link, then proceeds to yield guesses for possible mistakes."""
+        target_uri = cls._target_uri(posixpath.dirname(file.src_uri), path)
+        yield target_uri
+
+        tried = {target_uri}
+        prefixes = [target_uri]
+        if use_directory_urls and file.name != 'index':
+            # User might have added an extra '../' because that's how to make an invalid link work with use_directory_urls.
+            prefixes.insert(0, cls._target_uri(file.src_uri, path))
+        suffixes = ['/index.md', '/README.md']
+        if not target_uri.endswith('/') and '.' in posixpath.basename(path):
+            suffixes.insert(0, '')
+        if use_directory_urls and path and '.' not in posixpath.basename(path):
+            suffixes.insert(0, '.md')
+
+        for pref in prefixes:
+            for suf in suffixes:
+                if pref == '.' and not suf.startswith('/'):
+                    continue
+                guess = posixpath.normpath(pref + suf)
+                if guess not in tried:
+                    yield guess
+                    tried.add(guess)
+
    def path_to_url(self, url: str) -> str:
        scheme, netloc, path, query, fragment = urlsplit(url)

+        warning_level, warning = 0, ''
+
        # Ignore URLs unless they are a relative link to a source file.
        if scheme or netloc:  # External link.
            return url
        elif url.startswith('/') or url.startswith('\\'):  # Absolute link.
-            log.log(
-                self.config.validation.links.absolute_links,
-                f"Doc file '{self.file.src_uri}' contains an absolute link '{url}', it was left as is.",
-            )
-            return url
+            warning_level = self.config.validation.links.absolute_links
+            warning = f"Doc file '{self.file.src_uri}' contains an absolute link '{url}', it was left as is."
        elif AMP_SUBSTITUTE in url:  # AMP_SUBSTITUTE is used internally by Markdown only for email.
            return url
        elif not path:  # Self-link containing only query or fragment.
            return url

+        path = urlunquote(path)
        # Determine the filepath of the target.
-        target_uri = posixpath.join(posixpath.dirname(self.file.src_uri), urlunquote(path))
-        target_uri = posixpath.normpath(target_uri).lstrip('/')
+        possible_target_uris = self._target_uris(self.config.use_directory_urls, self.file, path)

-        # Validate that the target exists in files collection.
-        target_file = self.files.get_file_from_path(target_uri)
-        if target_file is None:
-            if '.' not in posixpath.split(path)[-1]:
+        if warning:
+            # For absolute path (already has a warning), the primary lookup path should be preserved as a tip option.
+            target_uri = url
+            target_file = None
+        else:
+            # Validate that the target exists in files collection.
+            target_uri = next(possible_target_uris)
+            target_file = self.files.get_file_from_path(target_uri)
+
+        if target_file is None and not warning:
+            # Primary lookup path had no match, definitely produce a warning, just choose which one.
+            if '.' not in posixpath.basename(path):
                # No '.' in the last part of a path indicates path does not point to a file.
-                log.log(
-                    self.config.validation.links.unrecognized_links,
+                warning_level = self.config.validation.links.unrecognized_links
+                warning = (
                    f"Doc file '{self.file.src_uri}' contains an unrecognized relative link '{url}', "
-                    f"it was left as is.",
+                    f"it was left as is."
                )
            else:
                target = f" '{target_uri}'" if target_uri != url else ""
-                log.log(
-                    self.config.validation.links.not_found,
+                warning_level = self.config.validation.links.not_found
+                warning = (
                    f"Doc file '{self.file.src_uri}' contains a relative link '{url}', "
-                    f"but the target{target} is not found among documentation files.",
+                    f"but the target{target} is not found among documentation files."
                )
+
+        if warning:
+            # There was no match, so try to guess what other file could've been intended.
+            if warning_level > logging.DEBUG:
+                suggest_url = ''
+                for path in possible_target_uris:
+                    if self.files.get_file_from_path(path) is not None:
+                        path = utils.get_relative_url(path, self.file.url)
+                        suggest_url = urlunsplit(('', '', path, query, fragment))
+                        break
+                else:
+                    if '@' in url and '.' in url and '/' not in url:
+                        suggest_url = f'mailto:{url}'
+                if suggest_url:
+                    warning += f" Did you mean '{suggest_url}'?"
+            log.log(warning_level, warning)
            return url
+
+        assert target_uri is not None
+        assert target_file is not None
        if target_file.inclusion.is_excluded():
-            log.log(
-                min(logging.INFO, self.config.validation.links.not_found),
+            warning_level = min(logging.INFO, self.config.validation.links.not_found)
+            warning = (
                f"Doc file '{self.file.src_uri}' contains a link to "
-                f"'{target_uri}' which is excluded from the built site.",
+                f"'{target_uri}' which is excluded from the built site."
            )
-        path = target_file.url_relative_to(self.file)
+            log.log(warning_level, warning)
+        path = utils.get_relative_url(target_file.url, self.file.url)
        return urlunsplit(('', '', path, query, fragment))

    def _register(self, md: markdown.Markdown) -> None:
--- a/mkdocs/tests/structure/page_tests.py
+++ b/mkdocs/tests/structure/page_tests.py
@@ -964,15 +964,34 @@ class RelativePathExtensionTests(unittest.TestCase):
            '<a href="http://example.com/index.md">external</a>',
        )

-    def test_absolute_link(self):
+    def test_absolute_link_with_suggestion(self):
        self.assertEqual(
            self.get_rendered_result(
                content='[absolute link](/path/to/file.md)',
-                files=['index.md'],
-                logs="INFO:Doc file 'index.md' contains an absolute link '/path/to/file.md', it was left as is.",
+                files=['index.md', 'path/to/file.md'],
+                logs="INFO:Doc file 'index.md' contains an absolute link '/path/to/file.md', it was left as is. Did you mean 'path/to/file.md'?",
            ),
            '<a href="/path/to/file.md">absolute link</a>',
        )
+        self.assertEqual(
+            self.get_rendered_result(
+                use_directory_urls=False,
+                content='[absolute link](/path/to/file/)',
+                files=['path/index.md', 'path/to/file.md'],
+                logs="INFO:Doc file 'path/index.md' contains an absolute link '/path/to/file/', it was left as is.",
+            ),
+            '<a href="/path/to/file/">absolute link</a>',
+        )
+        self.assertEqual(
+            self.get_rendered_result(
+                content='[absolute link](/path/to/file)',
+                files=['path/index.md', 'path/to/file.md'],
+                logs="INFO:Doc file 'path/index.md' contains an absolute link '/path/to/file', it was left as is. Did you mean 'to/file.md'?",
+            ),
+            '<a href="/path/to/file">absolute link</a>',
+        )
+
+    def test_absolute_link(self):
        self.assertEqual(
            self.get_rendered_result(
                validation=dict(links=dict(absolute_links='warn')),
@@ -982,6 +1001,14 @@ class RelativePathExtensionTests(unittest.TestCase):
            ),
            '<a href="/path/to/file.md">absolute link</a>',
        )
+        self.assertEqual(
+            self.get_rendered_result(
+                validation=dict(links=dict(absolute_links='ignore')),
+                content='[absolute link](/path/to/file.md)',
+                files=['index.md'],
+            ),
+            '<a href="/path/to/file.md">absolute link</a>',
+        )

    def test_absolute_win_local_path(self):
        for use_directory_urls in True, False:
@@ -1010,7 +1037,7 @@ class RelativePathExtensionTests(unittest.TestCase):
            self.get_rendered_result(
                content='[contact](mail@example.com)',
                files=['index.md'],
-                logs="WARNING:Doc file 'index.md' contains a relative link 'mail@example.com', but the target is not found among documentation files.",
+                logs="WARNING:Doc file 'index.md' contains a relative link 'mail@example.com', but the target is not found among documentation files. Did you mean 'mailto:mail@example.com'?",
            ),
            '<a href="mail@example.com">contact</a>',
        )