mirror of
https://github.com/mkdocs/mkdocs.git
synced 2026-03-27 09:58:31 +07:00
When getting the title, extract alt content of img tags
This commit is contained in:
@@ -391,10 +391,8 @@ class PageTests(unittest.TestCase):
|
||||
self._test_extract_title('''# foo <!-- comment with <em> --> bar''', expected='foo bar')
|
||||
|
||||
def test_page_title_from_markdown_strip_image(self):
|
||||
self._test_extract_title(
|
||||
'''# Hi ''',
|
||||
expected='Hi', # TODO: Should the alt text of the image be extracted?
|
||||
)
|
||||
self._test_extract_title('''# Hi ''', expected='Hi 😄')
|
||||
self._test_extract_title('''# Hi *--*''', expected='Hi -😄-')
|
||||
|
||||
_ATTRLIST_CONTENT = dedent(
|
||||
'''
|
||||
|
||||
@@ -1,10 +1,14 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import copy
|
||||
from typing import Callable
|
||||
from xml.etree import ElementTree as etree
|
||||
from typing import TYPE_CHECKING, Callable
|
||||
|
||||
import markdown
|
||||
import markdown.treeprocessors
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from xml.etree import ElementTree as etree
|
||||
|
||||
# TODO: This will become unnecessary after min-versions have Markdown >=3.4
|
||||
_unescape: Callable[[str], str]
|
||||
try:
|
||||
@@ -16,7 +20,10 @@ except AttributeError:
|
||||
|
||||
|
||||
def get_heading_text(el: etree.Element, md: markdown.Markdown) -> str:
|
||||
el = _remove_fnrefs(_remove_anchorlink(el))
|
||||
el = copy.deepcopy(el)
|
||||
_remove_anchorlink(el)
|
||||
_remove_fnrefs(el)
|
||||
_extract_alt_texts(el)
|
||||
return _strip_tags(_render_inner_html(el, md))
|
||||
|
||||
|
||||
@@ -49,31 +56,49 @@ def _render_inner_html(el: etree.Element, md: markdown.Markdown) -> str:
|
||||
return text
|
||||
|
||||
|
||||
def _remove_anchorlink(el: etree.Element) -> etree.Element:
|
||||
"""Drop anchorlink from a copy of the element, if present."""
|
||||
def _remove_anchorlink(el: etree.Element) -> None:
|
||||
"""Drop anchorlink from the element, if present."""
|
||||
if len(el) > 0 and el[-1].tag == 'a' and el[-1].get('class') == 'headerlink':
|
||||
el = copy.copy(el)
|
||||
del el[-1]
|
||||
return el
|
||||
|
||||
|
||||
def _remove_fnrefs(root: etree.Element) -> etree.Element:
|
||||
"""Remove footnote references from a copy of the element, if any are present."""
|
||||
# If there are no `sup` elements, then nothing to do.
|
||||
if next(root.iter('sup'), None) is None:
|
||||
return root
|
||||
root = copy.deepcopy(root)
|
||||
# Find parent elements that contain `sup` elements.
|
||||
for parent in root.iterfind('.//sup/..'):
|
||||
carry_text = ""
|
||||
for child in reversed(parent): # Reversed for the ability to mutate during iteration.
|
||||
# Remove matching footnote references but carry any `tail` text to preceding elements.
|
||||
if child.tag == 'sup' and child.get('id', '').startswith('fnref'):
|
||||
carry_text = (child.tail or "") + carry_text
|
||||
parent.remove(child)
|
||||
elif carry_text:
|
||||
child.tail = (child.tail or "") + carry_text
|
||||
carry_text = ""
|
||||
if carry_text:
|
||||
parent.text = (parent.text or "") + carry_text
|
||||
return root
|
||||
def _remove_fnrefs(root: etree.Element) -> None:
|
||||
"""Remove footnote references from the element, if any are present."""
|
||||
for parent in root.findall('.//sup[@id]/..'):
|
||||
_replace_elements_with_text(parent, _predicate_for_fnrefs)
|
||||
|
||||
|
||||
def _predicate_for_fnrefs(el: etree.Element) -> str | None:
|
||||
if el.tag == 'sup' and el.get('id', '').startswith('fnref'):
|
||||
return ''
|
||||
return None
|
||||
|
||||
|
||||
def _extract_alt_texts(root: etree.Element) -> None:
|
||||
"""For images that have an `alt` attribute, replace them with this content."""
|
||||
for parent in root.findall('.//img[@alt]/..'):
|
||||
_replace_elements_with_text(parent, _predicate_for_alt_texts)
|
||||
|
||||
|
||||
def _predicate_for_alt_texts(el: etree.Element) -> str | None:
|
||||
if el.tag == 'img' and (alt := el.get('alt')):
|
||||
return alt
|
||||
return None
|
||||
|
||||
|
||||
def _replace_elements_with_text(
|
||||
parent: etree.Element, predicate: Callable[[etree.Element], str | None]
|
||||
) -> None:
|
||||
"""For each child element, if matched, replace it with the text returned from the predicate."""
|
||||
carry_text = ""
|
||||
for child in reversed(parent): # Reversed for the ability to mutate during iteration.
|
||||
# Remove matching elements but carry any `tail` text to preceding elements.
|
||||
new_text = predicate(child)
|
||||
if new_text is not None:
|
||||
carry_text = new_text + (child.tail or "") + carry_text
|
||||
parent.remove(child)
|
||||
elif carry_text:
|
||||
child.tail = (child.tail or "") + carry_text
|
||||
carry_text = ""
|
||||
if carry_text:
|
||||
parent.text = (parent.text or "") + carry_text
|
||||
|
||||
Reference in New Issue
Block a user