Merge pull request #402 from d0ugal/toc_refactor

Refactor the TOC parsing code
This commit is contained in:
Dougal Matthews
2015-04-03 10:00:53 +01:00
3 changed files with 48 additions and 6 deletions

View File

@@ -13,6 +13,7 @@ if PY2:
httpserver = httpserver
import SocketServer
socketserver = SocketServer
from HTMLParser import HTMLParser
import itertools
zip = itertools.izip
@@ -30,6 +31,7 @@ else: # PY3
httpserver = httpserver
import socketserver
socketserver = socketserver
from html.parser import HTMLParser
zip = zip

View File

@@ -29,6 +29,20 @@ class TableOfContentsTests(unittest.TestCase):
toc = self.markdown_to_toc(md)
self.assertEqual(str(toc).strip(), expected)
def test_indented_toc_html(self):
md = dedent("""
# Heading 1
## <code>Heading</code> 2
## Heading 3
""")
expected = dedent("""
Heading 1 - #heading-1
Heading 2 - #heading-2
Heading 3 - #heading-3
""")
toc = self.markdown_to_toc(md)
self.assertEqual(str(toc).strip(), expected)
def test_flat_toc(self):
md = dedent("""
# Heading 1

View File

@@ -14,9 +14,7 @@ The steps we take to generate a table of contents are:
* Parse table of contents HTML into the underlying data structure.
"""
import re
TOC_LINK_REGEX = re.compile('<a href=["]([^"]*)["]>([^<]*)</a>')
from mkdocs.compat import HTMLParser
class TableOfContents(object):
@@ -52,6 +50,32 @@ class AnchorLink(object):
return ret
class TOCParser(HTMLParser):
def __init__(self):
HTMLParser.__init__(self)
self.links = []
self.in_anchor = True
self.attrs = None
self.title = ''
def handle_starttag(self, tag, attrs):
if tag == 'a':
self.in_anchor = True
self.attrs = dict(attrs)
def handle_endtag(self, tag):
if tag == 'a':
self.in_anchor = False
def handle_data(self, data):
if self.in_anchor:
self.title += data
def _parse_html_table_of_contents(html):
"""
Given a table of contents string that has been automatically generated by
@@ -63,9 +87,11 @@ def _parse_html_table_of_contents(html):
parents = []
ret = []
for line in lines:
match = TOC_LINK_REGEX.search(line)
if match:
href, title = match.groups()
parser = TOCParser()
parser.feed(line)
if parser.title:
href = parser.attrs['href']
title = parser.title
nav = AnchorLink(title, href)
# Add the item to its parent if required. If it is a topmost
# item then instead append it to our return value.