From d62eae64fecab4ee50a595b5ed77275f73a94b55 Mon Sep 17 00:00:00 2001
From: Oleh Prypin <oleh@pryp.in>
Date: Sat, 24 Feb 2024 13:31:40 +0100
Subject: [PATCH] When getting the title, extract alt content of img tags

---
 mkdocs/tests/structure/page_tests.py |  6 +--
 mkdocs/utils/rendering.py            | 79 ++++++++++++++++++----------
 2 files changed, 54 insertions(+), 31 deletions(-)
diff --git a/mkdocs/tests/structure/page_tests.py b/mkdocs/tests/structure/page_tests.py
index 617264d1..14bb85da 100644
--- a/mkdocs/tests/structure/page_tests.py
+++ b/mkdocs/tests/structure/page_tests.py
@@ -391,10 +391,8 @@ class PageTests(unittest.TestCase):
         self._test_extract_title('''# foo <!-- comment with <em> --> bar''', expected='foo bar')
 
     def test_page_title_from_markdown_strip_image(self):
-        self._test_extract_title(
-            '''# Hi ![😄](hah.png)''',
-            expected='Hi',  # TODO: Should the alt text of the image be extracted?
-        )
+        self._test_extract_title('''# Hi ![😄](hah.png)''', expected='Hi 😄')
+        self._test_extract_title('''# Hi *-![😄](hah.png)-*''', expected='Hi -😄-')
 
     _ATTRLIST_CONTENT = dedent(
         '''
diff --git a/mkdocs/utils/rendering.py b/mkdocs/utils/rendering.py
index 7361b253..545e1efb 100644
--- a/mkdocs/utils/rendering.py
+++ b/mkdocs/utils/rendering.py
@@ -1,10 +1,14 @@
+from __future__ import annotations
+
 import copy
-from typing import Callable
-from xml.etree import ElementTree as etree
+from typing import TYPE_CHECKING, Callable
 
 import markdown
 import markdown.treeprocessors
 
+if TYPE_CHECKING:
+    from xml.etree import ElementTree as etree
+
 # TODO: This will become unnecessary after min-versions have Markdown >=3.4
 _unescape: Callable[[str], str]
 try:
@@ -16,7 +20,10 @@ except AttributeError:
 
 
 def get_heading_text(el: etree.Element, md: markdown.Markdown) -> str:
-    el = _remove_fnrefs(_remove_anchorlink(el))
+    el = copy.deepcopy(el)
+    _remove_anchorlink(el)
+    _remove_fnrefs(el)
+    _extract_alt_texts(el)
     return _strip_tags(_render_inner_html(el, md))
 
 
@@ -49,31 +56,49 @@ def _render_inner_html(el: etree.Element, md: markdown.Markdown) -> str:
     return text
 
 
-def _remove_anchorlink(el: etree.Element) -> etree.Element:
-    """Drop anchorlink from a copy of the element, if present."""
+def _remove_anchorlink(el: etree.Element) -> None:
+    """Drop anchorlink from the element, if present."""
     if len(el) > 0 and el[-1].tag == 'a' and el[-1].get('class') == 'headerlink':
-        el = copy.copy(el)
         del el[-1]
-    return el
 
 
-def _remove_fnrefs(root: etree.Element) -> etree.Element:
-    """Remove footnote references from a copy of the element, if any are present."""
-    # If there are no `sup` elements, then nothing to do.
-    if next(root.iter('sup'), None) is None:
-        return root
-    root = copy.deepcopy(root)
-    # Find parent elements that contain `sup` elements.
-    for parent in root.iterfind('.//sup/..'):
-        carry_text = ""
-        for child in reversed(parent):  # Reversed for the ability to mutate during iteration.
-            # Remove matching footnote references but carry any `tail` text to preceding elements.
-            if child.tag == 'sup' and child.get('id', '').startswith('fnref'):
-                carry_text = (child.tail or "") + carry_text
-                parent.remove(child)
-            elif carry_text:
-                child.tail = (child.tail or "") + carry_text
-                carry_text = ""
-        if carry_text:
-            parent.text = (parent.text or "") + carry_text
-    return root
+def _remove_fnrefs(root: etree.Element) -> None:
+    """Remove footnote references from the element, if any are present."""
+    for parent in root.findall('.//sup[@id]/..'):
+        _replace_elements_with_text(parent, _predicate_for_fnrefs)
+
+
+def _predicate_for_fnrefs(el: etree.Element) -> str | None:
+    if el.tag == 'sup' and el.get('id', '').startswith('fnref'):
+        return ''
+    return None
+
+
+def _extract_alt_texts(root: etree.Element) -> None:
+    """For images that have an `alt` attribute, replace them with this content."""
+    for parent in root.findall('.//img[@alt]/..'):
+        _replace_elements_with_text(parent, _predicate_for_alt_texts)
+
+
+def _predicate_for_alt_texts(el: etree.Element) -> str | None:
+    if el.tag == 'img' and (alt := el.get('alt')):
+        return alt
+    return None
+
+
+def _replace_elements_with_text(
+    parent: etree.Element, predicate: Callable[[etree.Element], str | None]
+) -> None:
+    """For each child element, if matched, replace it with the text returned from the predicate."""
+    carry_text = ""
+    for child in reversed(parent):  # Reversed for the ability to mutate during iteration.
+        # Remove matching elements but carry any `tail` text to preceding elements.
+        new_text = predicate(child)
+        if new_text is not None:
+            carry_text = new_text + (child.tail or "") + carry_text
+            parent.remove(child)
+        elif carry_text:
+            child.tail = (child.tail or "") + carry_text
+            carry_text = ""
+    if carry_text:
+        parent.text = (parent.text or "") + carry_text