diff --git a/mkdocs/structure/pages.py b/mkdocs/structure/pages.py index ef13e512..8b0a642d 100644 --- a/mkdocs/structure/pages.py +++ b/mkdocs/structure/pages.py @@ -10,6 +10,7 @@ from urllib.parse import unquote as urlunquote from urllib.parse import urljoin, urlsplit, urlunsplit import markdown +import markdown.extensions.toc import markdown.htmlparser # type: ignore import markdown.postprocessors import markdown.treeprocessors @@ -549,7 +550,7 @@ class _HTMLHandler(markdown.htmlparser.htmlparser.HTMLParser): # type: ignore[n class _ExtractTitleTreeprocessor(markdown.treeprocessors.Treeprocessor): title: str | None = None - postprocessors: Sequence[markdown.postprocessors.Postprocessor] = () + md: markdown.Markdown def run(self, root: etree.Element) -> etree.Element: for el in root: @@ -561,14 +562,15 @@ class _ExtractTitleTreeprocessor(markdown.treeprocessors.Treeprocessor): # Extract the text only, recursively. title = ''.join(el.itertext()) # Unescape per Markdown implementation details. - for pp in self.postprocessors: - title = pp.run(title) - self.title = title + title = markdown.extensions.toc.stashedHTML2text( + title, self.md, strip_entities=False + ) + self.title = title.strip() break return root def _register(self, md: markdown.Markdown) -> None: - self.postprocessors = tuple(md.postprocessors) + self.md = md md.treeprocessors.register(self, "mkdocs_extract_title", priority=-1) # After the end. diff --git a/mkdocs/structure/toc.py b/mkdocs/structure/toc.py index 6d09867b..e1df40be 100644 --- a/mkdocs/structure/toc.py +++ b/mkdocs/structure/toc.py @@ -33,7 +33,7 @@ class AnchorLink: self.children = [] title: str - """The text of the item.""" + """The text of the item, as HTML.""" @property def url(self) -> str: diff --git a/mkdocs/tests/structure/page_tests.py b/mkdocs/tests/structure/page_tests.py index ee6aa159..77d8542b 100644 --- a/mkdocs/tests/structure/page_tests.py +++ b/mkdocs/tests/structure/page_tests.py @@ -6,9 +6,11 @@ import textwrap import unittest from unittest import mock +import markdown + from mkdocs.config.defaults import MkDocsConfig from mkdocs.structure.files import File, Files -from mkdocs.structure.pages import Page, _RelativePathTreeprocessor +from mkdocs.structure.pages import Page, _ExtractTitleTreeprocessor, _RelativePathTreeprocessor from mkdocs.tests.base import dedent, tempdir DOCS_DIR = os.path.join( @@ -315,9 +317,16 @@ class PageTests(unittest.TestCase): self.assertEqual(pg.parent, None) self.assertEqual(pg.previous_page, None) self.assertEqual(pg.title, 'Welcome to MkDocs') - pg.render(cfg, fl) + pg.render(cfg, Files([fl])) self.assertEqual(pg.title, 'Welcome to MkDocs') + def _test_extract_title(self, content, expected, extensions={}): + md = markdown.Markdown(extensions=list(extensions.keys()), extension_configs=extensions) + extract_title_ext = _ExtractTitleTreeprocessor() + extract_title_ext._register(md) + md.convert(content) + self.assertEqual(extract_title_ext.title, expected) + _SETEXT_CONTENT = dedent( ''' Welcome to MkDocs Setext @@ -327,46 +336,37 @@ class PageTests(unittest.TestCase): ''' ) - @tempdir(files={'testing_setext_title.md': _SETEXT_CONTENT}) - def test_page_title_from_setext_markdown(self, docs_dir): - cfg = load_config() - fl = File('testing_setext_title.md', docs_dir, docs_dir, use_directory_urls=True) - pg = Page(None, fl, cfg) - self.assertIsNone(pg.title) - pg.read_source(cfg) - self.assertEqual(pg.title, 'Testing setext title') - pg.render(cfg, fl) - self.assertEqual(pg.title, 'Welcome to MkDocs Setext') + def test_page_title_from_setext_markdown(self): + self._test_extract_title( + self._SETEXT_CONTENT, + expected='Welcome to MkDocs Setext', + ) - @tempdir(files={'testing_setext_title.md': _SETEXT_CONTENT}) - def test_page_title_from_markdown_stripped_anchorlinks(self, docs_dir): - cfg = MkDocsConfig() - cfg.site_name = 'example' - cfg.markdown_extensions = {'toc': {'permalink': '&'}} - self.assertEqual(cfg.validate(), ([], [])) - fl = File('testing_setext_title.md', docs_dir, docs_dir, use_directory_urls=True) - pg = Page(None, fl, cfg) - pg.read_source(cfg) - pg.render(cfg, fl) - self.assertEqual(pg.title, 'Welcome to MkDocs Setext') + def test_page_title_from_markdown_stripped_anchorlinks(self): + self._test_extract_title( + self._SETEXT_CONTENT, + extensions={'toc': {'permalink': '&'}}, + expected='Welcome to MkDocs Setext', + ) - _FORMATTING_CONTENT = dedent( - ''' - # \\*Hello --- *beautiful* `world` + def test_page_title_from_markdown_strip_formatting(self): + self._test_extract_title( + '''# \\*Hello --- *beautiful* `wor