From d62eae64fecab4ee50a595b5ed77275f73a94b55 Mon Sep 17 00:00:00 2001 From: Oleh Prypin Date: Sat, 24 Feb 2024 13:31:40 +0100 Subject: [PATCH] When getting the title, extract alt content of img tags --- mkdocs/tests/structure/page_tests.py | 6 +-- mkdocs/utils/rendering.py | 79 ++++++++++++++++++---------- 2 files changed, 54 insertions(+), 31 deletions(-) diff --git a/mkdocs/tests/structure/page_tests.py b/mkdocs/tests/structure/page_tests.py index 617264d1..14bb85da 100644 --- a/mkdocs/tests/structure/page_tests.py +++ b/mkdocs/tests/structure/page_tests.py @@ -391,10 +391,8 @@ class PageTests(unittest.TestCase): self._test_extract_title('''# foo bar''', expected='foo bar') def test_page_title_from_markdown_strip_image(self): - self._test_extract_title( - '''# Hi ![😄](hah.png)''', - expected='Hi', # TODO: Should the alt text of the image be extracted? - ) + self._test_extract_title('''# Hi ![😄](hah.png)''', expected='Hi 😄') + self._test_extract_title('''# Hi *-![😄](hah.png)-*''', expected='Hi -😄-') _ATTRLIST_CONTENT = dedent( ''' diff --git a/mkdocs/utils/rendering.py b/mkdocs/utils/rendering.py index 7361b253..545e1efb 100644 --- a/mkdocs/utils/rendering.py +++ b/mkdocs/utils/rendering.py @@ -1,10 +1,14 @@ +from __future__ import annotations + import copy -from typing import Callable -from xml.etree import ElementTree as etree +from typing import TYPE_CHECKING, Callable import markdown import markdown.treeprocessors +if TYPE_CHECKING: + from xml.etree import ElementTree as etree + # TODO: This will become unnecessary after min-versions have Markdown >=3.4 _unescape: Callable[[str], str] try: @@ -16,7 +20,10 @@ except AttributeError: def get_heading_text(el: etree.Element, md: markdown.Markdown) -> str: - el = _remove_fnrefs(_remove_anchorlink(el)) + el = copy.deepcopy(el) + _remove_anchorlink(el) + _remove_fnrefs(el) + _extract_alt_texts(el) return _strip_tags(_render_inner_html(el, md)) @@ -49,31 +56,49 @@ def _render_inner_html(el: etree.Element, md: markdown.Markdown) -> str: return text -def _remove_anchorlink(el: etree.Element) -> etree.Element: - """Drop anchorlink from a copy of the element, if present.""" +def _remove_anchorlink(el: etree.Element) -> None: + """Drop anchorlink from the element, if present.""" if len(el) > 0 and el[-1].tag == 'a' and el[-1].get('class') == 'headerlink': - el = copy.copy(el) del el[-1] - return el -def _remove_fnrefs(root: etree.Element) -> etree.Element: - """Remove footnote references from a copy of the element, if any are present.""" - # If there are no `sup` elements, then nothing to do. - if next(root.iter('sup'), None) is None: - return root - root = copy.deepcopy(root) - # Find parent elements that contain `sup` elements. - for parent in root.iterfind('.//sup/..'): - carry_text = "" - for child in reversed(parent): # Reversed for the ability to mutate during iteration. - # Remove matching footnote references but carry any `tail` text to preceding elements. - if child.tag == 'sup' and child.get('id', '').startswith('fnref'): - carry_text = (child.tail or "") + carry_text - parent.remove(child) - elif carry_text: - child.tail = (child.tail or "") + carry_text - carry_text = "" - if carry_text: - parent.text = (parent.text or "") + carry_text - return root +def _remove_fnrefs(root: etree.Element) -> None: + """Remove footnote references from the element, if any are present.""" + for parent in root.findall('.//sup[@id]/..'): + _replace_elements_with_text(parent, _predicate_for_fnrefs) + + +def _predicate_for_fnrefs(el: etree.Element) -> str | None: + if el.tag == 'sup' and el.get('id', '').startswith('fnref'): + return '' + return None + + +def _extract_alt_texts(root: etree.Element) -> None: + """For images that have an `alt` attribute, replace them with this content.""" + for parent in root.findall('.//img[@alt]/..'): + _replace_elements_with_text(parent, _predicate_for_alt_texts) + + +def _predicate_for_alt_texts(el: etree.Element) -> str | None: + if el.tag == 'img' and (alt := el.get('alt')): + return alt + return None + + +def _replace_elements_with_text( + parent: etree.Element, predicate: Callable[[etree.Element], str | None] +) -> None: + """For each child element, if matched, replace it with the text returned from the predicate.""" + carry_text = "" + for child in reversed(parent): # Reversed for the ability to mutate during iteration. + # Remove matching elements but carry any `tail` text to preceding elements. + new_text = predicate(child) + if new_text is not None: + carry_text = new_text + (child.tail or "") + carry_text + parent.remove(child) + elif carry_text: + child.tail = (child.tail or "") + carry_text + carry_text = "" + if carry_text: + parent.text = (parent.text or "") + carry_text