mirror of
https://github.com/mkdocs/mkdocs.git
synced 2026-03-27 09:58:31 +07:00
Merge pull request #402 from d0ugal/toc_refactor
Refactor the TOC parsing code
This commit is contained in:
@@ -13,6 +13,7 @@ if PY2:
|
||||
httpserver = httpserver
|
||||
import SocketServer
|
||||
socketserver = SocketServer
|
||||
from HTMLParser import HTMLParser
|
||||
|
||||
import itertools
|
||||
zip = itertools.izip
|
||||
@@ -30,6 +31,7 @@ else: # PY3
|
||||
httpserver = httpserver
|
||||
import socketserver
|
||||
socketserver = socketserver
|
||||
from html.parser import HTMLParser
|
||||
|
||||
zip = zip
|
||||
|
||||
|
||||
@@ -29,6 +29,20 @@ class TableOfContentsTests(unittest.TestCase):
|
||||
toc = self.markdown_to_toc(md)
|
||||
self.assertEqual(str(toc).strip(), expected)
|
||||
|
||||
def test_indented_toc_html(self):
|
||||
md = dedent("""
|
||||
# Heading 1
|
||||
## <code>Heading</code> 2
|
||||
## Heading 3
|
||||
""")
|
||||
expected = dedent("""
|
||||
Heading 1 - #heading-1
|
||||
Heading 2 - #heading-2
|
||||
Heading 3 - #heading-3
|
||||
""")
|
||||
toc = self.markdown_to_toc(md)
|
||||
self.assertEqual(str(toc).strip(), expected)
|
||||
|
||||
def test_flat_toc(self):
|
||||
md = dedent("""
|
||||
# Heading 1
|
||||
|
||||
@@ -14,9 +14,7 @@ The steps we take to generate a table of contents are:
|
||||
* Parse table of contents HTML into the underlying data structure.
|
||||
"""
|
||||
|
||||
import re
|
||||
|
||||
TOC_LINK_REGEX = re.compile('<a href=["]([^"]*)["]>([^<]*)</a>')
|
||||
from mkdocs.compat import HTMLParser
|
||||
|
||||
|
||||
class TableOfContents(object):
|
||||
@@ -52,6 +50,32 @@ class AnchorLink(object):
|
||||
return ret
|
||||
|
||||
|
||||
class TOCParser(HTMLParser):
|
||||
|
||||
def __init__(self):
|
||||
HTMLParser.__init__(self)
|
||||
self.links = []
|
||||
|
||||
self.in_anchor = True
|
||||
self.attrs = None
|
||||
self.title = ''
|
||||
|
||||
def handle_starttag(self, tag, attrs):
|
||||
|
||||
if tag == 'a':
|
||||
self.in_anchor = True
|
||||
self.attrs = dict(attrs)
|
||||
|
||||
def handle_endtag(self, tag):
|
||||
if tag == 'a':
|
||||
self.in_anchor = False
|
||||
|
||||
def handle_data(self, data):
|
||||
|
||||
if self.in_anchor:
|
||||
self.title += data
|
||||
|
||||
|
||||
def _parse_html_table_of_contents(html):
|
||||
"""
|
||||
Given a table of contents string that has been automatically generated by
|
||||
@@ -63,9 +87,11 @@ def _parse_html_table_of_contents(html):
|
||||
parents = []
|
||||
ret = []
|
||||
for line in lines:
|
||||
match = TOC_LINK_REGEX.search(line)
|
||||
if match:
|
||||
href, title = match.groups()
|
||||
parser = TOCParser()
|
||||
parser.feed(line)
|
||||
if parser.title:
|
||||
href = parser.attrs['href']
|
||||
title = parser.title
|
||||
nav = AnchorLink(title, href)
|
||||
# Add the item to its parent if required. If it is a topmost
|
||||
# item then instead append it to our return value.
|
||||
|
||||
Reference in New Issue
Block a user