From 6b6d56e81de43a5208a4f2c9b5bd3e2c6daf7055 Mon Sep 17 00:00:00 2001 From: Tom Christie Date: Wed, 22 Jan 2014 19:46:37 +0000 Subject: [PATCH] Split out toc module, adding tests. --- .gitignore | 1 + mkdocs/build.py | 60 ++++++--------------------- mkdocs/serve.py | 15 +++---- mkdocs/test.py | 108 +++++++++++++++++++++++++++++++++++++++++++----- mkdocs/toc.py | 104 ++++++++++++++++++++++++++++++++++++++++++++++ mkdocs/utils.py | 22 ++++++++++ runtests | 8 ++++ 7 files changed, 253 insertions(+), 65 deletions(-) create mode 100644 mkdocs/toc.py create mode 100755 runtests diff --git a/.gitignore b/.gitignore index 158399fb..65a3d8ab 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,7 @@ env/ dist/ *.egg-info/ *.pyc +.coverage # The 'React' theme that we use for the docs is purchased from the # wrapbootstrap.com site, and is not licensed for re-use. diff --git a/mkdocs/build.py b/mkdocs/build.py index 365d6eca..58ddff20 100644 --- a/mkdocs/build.py +++ b/mkdocs/build.py @@ -1,17 +1,16 @@ #coding: utf-8 -from mkdocs.utils import copy_media_files, write_file, get_html_path -import collections +from mkdocs import toc, utils import jinja2 import markdown import os import re -TOC_LINK_REGEX = re.compile('([^<]*)') - - class NavItem(object): + """ + Used to represent table of contents and navbar items. + """ def __init__(self, title, url, children=None): self.title, self.url = title, url self.children = children or [] @@ -43,13 +42,13 @@ def build_pages(config): previous_url, next_url = path_to_previous_and_next_urls(path, config) source_path = os.path.join(config['docs_dir'], path) - output_path = os.path.join(config['build_dir'], get_html_path(path)) + output_path = os.path.join(config['build_dir'], utils.get_html_path(path)) # Get the markdown text source_content = open(source_path, 'r').read().decode('utf-8') # Prepend a table of contents marker for the TOC extension - source_content = source_content + '\n\n[TOC]' + source_content = toc.pre_process(source_content) # Generate the HTML from the markdown source md = markdown.Markdown(extensions=['meta', 'toc']) @@ -57,10 +56,10 @@ def build_pages(config): meta = md.Meta # Strip out the generated table of contents - (content, toc_html) = content.split('', 1) + (content, toc_html) = toc.post_process(content) # Post process the generated table of contents into a data structure - toc = generate_toc(toc_html) + table_of_contents = toc.TableOfContents(toc_html) # Allow 'template:' override in md source files. if 'template' in meta: @@ -77,7 +76,7 @@ def build_pages(config): 'page_title': active_nav.title, 'content': content, - 'toc': toc, + 'toc': table_of_contents, 'nav': nav, 'meta': meta, 'config': config, @@ -90,40 +89,7 @@ def build_pages(config): } output_content = template.render(context) - write_file(output_content.encode('utf-8'), output_path) - - -def generate_toc(toc_html): - """ - Given a table of contents string that has been automatically generated by - the markdown library, parse it into a tree of NavItem instances. - """ - depth = 0 - lines = toc_html.splitlines()[2:-2] - parents = [] - ret = [] - for line in lines: - match = TOC_LINK_REGEX.search(line) - if match: - href, title = match.groups() - nav = NavItem(title, href) - # Add the item to its parent if required. If it is a topmost - # item then instead append it to our return value. - if parents: - parents[-1].children.append(nav) - else: - ret.append(nav) - # If this item has children, store it as the current parent - if line.endswith(''): - parents.pop() - - # For the table of contents, always mark the first element as active - if ret: - ret[0].active = True - - return ret + utils.write_file(output_content.encode('utf-8'), output_path) def generate_nav(config): @@ -184,7 +150,7 @@ def path_to_url(path, config): Given a relative path, determine its corresponding absolute URL. """ if config['local_files']: - path = get_html_path(path) + path = utils.get_html_path(path) url = path.replace(os.path.pathsep, '/') return config['base_url'] + '/' + url @@ -220,6 +186,6 @@ def build(config): """ Perform a full site build. """ - copy_media_files(config['theme_dir'], config['build_dir']) - copy_media_files(config['docs_dir'], config['build_dir']) + utils.copy_media_files(config['theme_dir'], config['build_dir']) + utils.copy_media_files(config['docs_dir'], config['build_dir']) build_pages(config) diff --git a/mkdocs/serve.py b/mkdocs/serve.py index d8443d52..118a282d 100644 --- a/mkdocs/serve.py +++ b/mkdocs/serve.py @@ -6,6 +6,7 @@ import os import posixpath import SimpleHTTPServer import SocketServer +import shutil import sys import tempfile import urllib @@ -45,8 +46,8 @@ class FixedDirectoryHandler(SimpleHTTPServer.SimpleHTTPRequestHandler): def translate_path(self, path): # abandon query parameters - path = path.split('?',1)[0] - path = path.split('#',1)[0] + path = path.split('?', 1)[0] + path = path.split('#', 1)[0] path = posixpath.normpath(urllib.unquote(path)) words = path.split('/') words = filter(None, words) @@ -54,14 +55,14 @@ class FixedDirectoryHandler(SimpleHTTPServer.SimpleHTTPRequestHandler): for word in words: drive, word = os.path.splitdrive(word) head, word = os.path.split(word) - if word in (os.curdir, os.pardir): continue + if word in (os.curdir, os.pardir): + continue path = os.path.join(path, word) return path def log_message(self, format, *args): - sys.stderr.write("[%s] %s\n" % - (self.log_date_time_string(), - format%args)) + date_str = self.log_date_time_string() + sys.stderr.write('[%s] %s\n' % (date_str, format % args)) def serve(config, options=None): @@ -96,7 +97,7 @@ def serve(config, options=None): host, port = config['dev_addr'].split(':', 1) server = TCPServer((host, int(port)), DocsDirectoryHandler) - print "Running at: http://%s:%s/" % (host, port) + print 'Running at: http://%s:%s/' % (host, port) server.serve_forever() # Clean up diff --git a/mkdocs/test.py b/mkdocs/test.py index a518487c..c061d50e 100755 --- a/mkdocs/test.py +++ b/mkdocs/test.py @@ -1,29 +1,46 @@ #!/usr/bin/env python # coding: utf-8 -from mkdocs import utils +from mkdocs import toc, utils +import markdown +import textwrap import unittest +def dedent(text): + return textwrap.dedent(text).strip() + + class UtilsTests(unittest.TestCase): def test_html_path(self): expected_results = { 'index.md': 'index.html', - 'api-guide.md': 'api-guide/index.html', - 'api-guide/index.md': 'api-guide/index.html', - 'api-guide/testing.md': 'api-guide/testing/index.html', + 'api-guide.md': 'api-guide/index.html', + 'api-guide/index.md': 'api-guide/index.html', + 'api-guide/testing.md': 'api-guide/testing/index.html', } for file_path, expected_html_path in expected_results.items(): html_path = utils.get_html_path(file_path) self.assertEqual(html_path, expected_html_path) + def test_url_path(self): + expected_results = { + 'index.md': '', + 'api-guide.md': 'api-guide/', + 'api-guide/index.md': 'api-guide/', + 'api-guide/testing.md': 'api-guide/testing/', + } + for file_path, expected_html_path in expected_results.items(): + html_path = utils.get_url_path(file_path) + self.assertEqual(html_path, expected_html_path) + def test_is_markdown_file(self): expected_results = { 'index.md': True, - 'index.MARKDOWN': True, + 'index.MARKDOWN': True, 'index.txt': False, - 'indexmd': False - } + 'indexmd': False + } for path, expected_result in expected_results.items(): is_markdown = utils.is_markdown_file(path) self.assertEqual(is_markdown, expected_result) @@ -31,14 +48,83 @@ class UtilsTests(unittest.TestCase): def test_is_html_file(self): expected_results = { 'index.htm': True, - 'index.HTML': True, + 'index.HTML': True, 'index.txt': False, - 'indexhtml': False - } + 'indexhtml': False + } for path, expected_result in expected_results.items(): is_html = utils.is_html_file(path) self.assertEqual(is_html, expected_result) +class TableOfContentsTests(unittest.TestCase): + def markdown_to_toc(self, markdown_source): + markdown_source = toc.pre_process(markdown_source) + md = markdown.Markdown(extensions=['toc']) + html_output = md.convert(markdown_source) + html_output, toc_output = toc.post_process(html_output) + return toc.TableOfContents(toc_output) + + def test_indented_toc(self): + md = dedent(""" + # Heading 1 + ## Heading 2 + ### Heading 3 + """) + expected = dedent(""" + Heading 1 - #heading-1 + Heading 2 - #heading-2 + Heading 3 - #heading-3 + """) + toc = self.markdown_to_toc(md) + self.assertEqual(str(toc).strip(), expected) + + def test_flat_toc(self): + md = dedent(""" + # Heading 1 + # Heading 2 + # Heading 3 + """) + expected = dedent(""" + Heading 1 - #heading-1 + Heading 2 - #heading-2 + Heading 3 - #heading-3 + """) + toc = self.markdown_to_toc(md) + self.assertEqual(str(toc).strip(), expected) + + def test_flat_h2_toc(self): + md = dedent(""" + ## Heading 1 + ## Heading 2 + ## Heading 3 + """) + expected = dedent(""" + Heading 1 - #heading-1 + Heading 2 - #heading-2 + Heading 3 - #heading-3 + """) + toc = self.markdown_to_toc(md) + self.assertEqual(str(toc).strip(), expected) + + def test_mixed_toc(self): + md = dedent(""" + # Heading 1 + ## Heading 2 + # Heading 3 + ### Heading 4 + ### Heading 5 + """) + expected = dedent(""" + Heading 1 - #heading-1 + Heading 2 - #heading-2 + Heading 3 - #heading-3 + Heading 4 - #heading-4 + Heading 5 - #heading-5 + """) + toc = self.markdown_to_toc(md) + self.assertEqual(str(toc).strip(), expected) + + if __name__ == '__main__': - unittest.main() \ No newline at end of file + unittest.main() diff --git a/mkdocs/toc.py b/mkdocs/toc.py new file mode 100644 index 00000000..3ca04c6b --- /dev/null +++ b/mkdocs/toc.py @@ -0,0 +1,104 @@ +#coding: utf-8 + +""" +Deals with generating the per-page table of contents. + +For the sake of simplicity we use an existing markdown extension to generate +an HTML table of contents, and then parse that into the underlying data. + +The steps we take to generate a table of contents are: + +* Pre-process the markdown, injecting a [TOC] marker. +* Generate HTML from markdown. +* Post-process the HTML, spliting the content and the table of contents. +* Parse table of contents HTML into the underlying data structure. +""" + +import re + +TOC_DELIMITER = '' +TOC_LINK_REGEX = re.compile('([^<]*)') + + +def pre_process(markdown_content): + """ + Append a `[TOC]` marker to the markdown. + The `toc` extension injects the HTML table of contents here. + """ + return markdown_content + '\n\n' + TOC_DELIMITER + '\n[TOC]' + + +def post_process(html_content): + """ + Strip the generated HTML table of contents from the HTML output. + + Returns a two-tuple of `(content, table_of_contents)` + """ + return html_content.rsplit(TOC_DELIMITER, 1) + + +class TableOfContents(object): + """ + Represents the table of contents for a given page. + """ + def __init__(self, html): + self.items = _parse_html_table_of_contents(html) + + def __iter__(self): + return iter(self.items) + + def __str__(self): + return ''.join([str(item) for item in self]) + + +class TableOfContentsItem(object): + """ + A single entry in the table of contents. + """ + def __init__(self, title, url): + self.title, self.url = title, url + self.children = [] + + def __str__(self): + return self._indent_print() + + def _indent_print(self, depth=0): + indent = ' ' * depth + ret = '%s%s - %s\n' % (indent, self.title, self.url) + for item in self.children: + ret += item._indent_print(depth + 1) + return ret + + +def _parse_html_table_of_contents(html): + """ + Given a table of contents string that has been automatically generated by + the markdown library, parse it into a tree of TableOfContentsItem instances. + + Returns a list of all the parent TableOfContentsItem instances. + """ + lines = html.splitlines()[2:-2] + parents = [] + ret = [] + for line in lines: + match = TOC_LINK_REGEX.search(line) + if match: + href, title = match.groups() + nav = TableOfContentsItem(title, href) + # Add the item to its parent if required. If it is a topmost + # item then instead append it to our return value. + if parents: + parents[-1].children.append(nav) + else: + ret.append(nav) + # If this item has children, store it as the current parent + if line.endswith(''): + parents.pop() + + # For the table of contents, always mark the first element as active + if ret: + ret[0].active = True + + return ret diff --git a/mkdocs/utils.py b/mkdocs/utils.py index af8cb302..0c8ea3f2 100644 --- a/mkdocs/utils.py +++ b/mkdocs/utils.py @@ -1,5 +1,12 @@ #coding: utf-8 +""" +Standalone file utils. + +Nothing in this module should have an knowledge of config or the layout +and structure of the site and pages in the site. +""" + import os import shutil @@ -53,6 +60,21 @@ def get_html_path(path): return os.path.join(path, 'index.html') +def get_url_path(path, local_file_urls=False): + """ + Map a source file path to an output html path. + + Paths like 'index.md' will be converted to '' + Paths like 'about.md' will be converted to 'about/' + Paths like 'api-guide/core.md' will be converted to 'api-guide/core/' + """ + path = get_html_path(path) + url = path.replace(os.path.pathsep, '/') + if not local_file_urls: + return url[:-len('index.html')] + return url + + def is_markdown_file(path): """ Return True if the given file path is a Markdown file. diff --git a/runtests b/runtests new file mode 100755 index 00000000..ec0caef2 --- /dev/null +++ b/runtests @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +set -e + +coverage run --source=mkdocs mkdocs/test.py $@ +flake8 mkdocs --ignore=E128,E501 --exclude=__init__.py +echo +coverage report