From 6b6d56e81de43a5208a4f2c9b5bd3e2c6daf7055 Mon Sep 17 00:00:00 2001
From: Tom Christie <tom@tomchristie.com>
Date: Wed, 22 Jan 2014 19:46:37 +0000
Subject: [PATCH] Split out toc module, adding tests.

---
 .gitignore      |   1 +
 mkdocs/build.py |  60 ++++++---------------------
 mkdocs/serve.py |  15 +++----
 mkdocs/test.py  | 108 +++++++++++++++++++++++++++++++++++++++++++-----
 mkdocs/toc.py   | 104 ++++++++++++++++++++++++++++++++++++++++++++++
 mkdocs/utils.py |  22 ++++++++++
 runtests        |   8 ++++
 7 files changed, 253 insertions(+), 65 deletions(-)
 create mode 100644 mkdocs/toc.py
 create mode 100755 runtests
diff --git a/.gitignore b/.gitignore
index 158399fb..65a3d8ab 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,6 +3,7 @@ env/
 dist/
 *.egg-info/
 *.pyc
+.coverage
 
 # The 'React' theme that we use for the docs is purchased from the
 # wrapbootstrap.com site, and is not licensed for re-use.
diff --git a/mkdocs/build.py b/mkdocs/build.py
index 365d6eca..58ddff20 100644
--- a/mkdocs/build.py
+++ b/mkdocs/build.py
@@ -1,17 +1,16 @@
 #coding: utf-8
 
-from mkdocs.utils import copy_media_files, write_file, get_html_path
-import collections
+from mkdocs import toc, utils
 import jinja2
 import markdown
 import os
 import re
 
 
-TOC_LINK_REGEX = re.compile('<a href=["]([^"]*)["]>([^<]*)</a>')
-
-
 class NavItem(object):
+    """
+    Used to represent table of contents and navbar items.
+    """
     def __init__(self, title, url, children=None):
         self.title, self.url = title, url
         self.children = children or []
@@ -43,13 +42,13 @@ def build_pages(config):
         previous_url, next_url = path_to_previous_and_next_urls(path, config)
 
         source_path = os.path.join(config['docs_dir'], path)
-        output_path = os.path.join(config['build_dir'], get_html_path(path))
+        output_path = os.path.join(config['build_dir'], utils.get_html_path(path))
 
         # Get the markdown text
         source_content = open(source_path, 'r').read().decode('utf-8')
 
         # Prepend a table of contents marker for the TOC extension
-        source_content = source_content + '<!-- STARTTOC -->\n\n[TOC]'
+        source_content = toc.pre_process(source_content)
 
         # Generate the HTML from the markdown source
         md = markdown.Markdown(extensions=['meta', 'toc'])
@@ -57,10 +56,10 @@ def build_pages(config):
         meta = md.Meta
 
         # Strip out the generated table of contents
-        (content, toc_html) = content.split('<!-- STARTTOC -->', 1)
+        (content, toc_html) = toc.post_process(content)
 
         # Post process the generated table of contents into a data structure
-        toc = generate_toc(toc_html)
+        table_of_contents = toc.TableOfContents(toc_html)
 
         # Allow 'template:' override in md source files.
         if 'template' in meta:
@@ -77,7 +76,7 @@ def build_pages(config):
             'page_title': active_nav.title,
             'content': content,
 
-            'toc': toc,
+            'toc': table_of_contents,
             'nav': nav,
             'meta': meta,
             'config': config,
@@ -90,40 +89,7 @@ def build_pages(config):
         }
         output_content = template.render(context)
 
-        write_file(output_content.encode('utf-8'), output_path)
-
-
-def generate_toc(toc_html):
-    """
-    Given a table of contents string that has been automatically generated by
-    the markdown library, parse it into a tree of NavItem instances.
-    """
-    depth = 0
-    lines = toc_html.splitlines()[2:-2]
-    parents = []
-    ret = []
-    for line in lines:
-        match = TOC_LINK_REGEX.search(line)
-        if match:
-            href, title = match.groups()
-            nav = NavItem(title, href)
-            # Add the item to its parent if required.  If it is a topmost
-            # item then instead append it to our return value.
-            if parents:
-                parents[-1].children.append(nav)
-            else:
-                ret.append(nav)
-            # If this item has children, store it as the current parent
-            if line.endswith('<ul>'):
-                parents.append(nav)
-        elif line.startswith('</ul>'):
-            parents.pop()
-
-    # For the table of contents, always mark the first element as active
-    if ret:
-        ret[0].active = True
-
-    return ret
+        utils.write_file(output_content.encode('utf-8'), output_path)
 
 
 def generate_nav(config):
@@ -184,7 +150,7 @@ def path_to_url(path, config):
     Given a relative path, determine its corresponding absolute URL.
     """
     if config['local_files']:
-        path = get_html_path(path)
+        path = utils.get_html_path(path)
         url = path.replace(os.path.pathsep, '/')
         return config['base_url'] + '/' + url
 
@@ -220,6 +186,6 @@ def build(config):
     """
     Perform a full site build.
     """
-    copy_media_files(config['theme_dir'], config['build_dir'])
-    copy_media_files(config['docs_dir'], config['build_dir'])
+    utils.copy_media_files(config['theme_dir'], config['build_dir'])
+    utils.copy_media_files(config['docs_dir'], config['build_dir'])
     build_pages(config)
diff --git a/mkdocs/serve.py b/mkdocs/serve.py
index d8443d52..118a282d 100644
--- a/mkdocs/serve.py
+++ b/mkdocs/serve.py
@@ -6,6 +6,7 @@ import os
 import posixpath
 import SimpleHTTPServer
 import SocketServer
+import shutil
 import sys
 import tempfile
 import urllib
@@ -45,8 +46,8 @@ class FixedDirectoryHandler(SimpleHTTPServer.SimpleHTTPRequestHandler):
 
     def translate_path(self, path):
         # abandon query parameters
-        path = path.split('?',1)[0]
-        path = path.split('#',1)[0]
+        path = path.split('?', 1)[0]
+        path = path.split('#', 1)[0]
         path = posixpath.normpath(urllib.unquote(path))
         words = path.split('/')
         words = filter(None, words)
@@ -54,14 +55,14 @@ class FixedDirectoryHandler(SimpleHTTPServer.SimpleHTTPRequestHandler):
         for word in words:
             drive, word = os.path.splitdrive(word)
             head, word = os.path.split(word)
-            if word in (os.curdir, os.pardir): continue
+            if word in (os.curdir, os.pardir):
+                continue
             path = os.path.join(path, word)
         return path
 
     def log_message(self, format, *args):
-        sys.stderr.write("[%s] %s\n" %
-                          (self.log_date_time_string(),
-                          format%args))
+        date_str = self.log_date_time_string()
+        sys.stderr.write('[%s] %s\n' % (date_str, format % args))
 
 
 def serve(config, options=None):
@@ -96,7 +97,7 @@ def serve(config, options=None):
     host, port = config['dev_addr'].split(':', 1)
     server = TCPServer((host, int(port)), DocsDirectoryHandler)
 
-    print "Running at: http://%s:%s/" % (host, port)
+    print 'Running at: http://%s:%s/' % (host, port)
     server.serve_forever()
 
     # Clean up
diff --git a/mkdocs/test.py b/mkdocs/test.py
index a518487c..c061d50e 100755
--- a/mkdocs/test.py
+++ b/mkdocs/test.py
@@ -1,29 +1,46 @@
 #!/usr/bin/env python
 # coding: utf-8
 
-from mkdocs import utils
+from mkdocs import toc, utils
+import markdown
+import textwrap
 import unittest
 
 
+def dedent(text):
+    return textwrap.dedent(text).strip()
+
+
 class UtilsTests(unittest.TestCase):
     def test_html_path(self):
         expected_results = {
             'index.md': 'index.html',
-            'api-guide.md': 'api-guide/index.html', 
-            'api-guide/index.md': 'api-guide/index.html', 
-            'api-guide/testing.md': 'api-guide/testing/index.html', 
+            'api-guide.md': 'api-guide/index.html',
+            'api-guide/index.md': 'api-guide/index.html',
+            'api-guide/testing.md': 'api-guide/testing/index.html',
         }
         for file_path, expected_html_path in expected_results.items():
             html_path = utils.get_html_path(file_path)
             self.assertEqual(html_path, expected_html_path)
 
+    def test_url_path(self):
+        expected_results = {
+            'index.md': '',
+            'api-guide.md': 'api-guide/',
+            'api-guide/index.md': 'api-guide/',
+            'api-guide/testing.md': 'api-guide/testing/',
+        }
+        for file_path, expected_html_path in expected_results.items():
+            html_path = utils.get_url_path(file_path)
+            self.assertEqual(html_path, expected_html_path)
+
     def test_is_markdown_file(self):
         expected_results = {
             'index.md': True,
-            'index.MARKDOWN': True, 
+            'index.MARKDOWN': True,
             'index.txt': False,
-            'indexmd': False 
-        } 
+            'indexmd': False
+        }
         for path, expected_result in expected_results.items():
             is_markdown = utils.is_markdown_file(path)
             self.assertEqual(is_markdown, expected_result)
@@ -31,14 +48,83 @@ class UtilsTests(unittest.TestCase):
     def test_is_html_file(self):
         expected_results = {
             'index.htm': True,
-            'index.HTML': True, 
+            'index.HTML': True,
             'index.txt': False,
-            'indexhtml': False 
-        } 
+            'indexhtml': False
+        }
         for path, expected_result in expected_results.items():
             is_html = utils.is_html_file(path)
             self.assertEqual(is_html, expected_result)
 
 
+class TableOfContentsTests(unittest.TestCase):
+    def markdown_to_toc(self, markdown_source):
+        markdown_source = toc.pre_process(markdown_source)
+        md = markdown.Markdown(extensions=['toc'])
+        html_output = md.convert(markdown_source)
+        html_output, toc_output = toc.post_process(html_output)
+        return toc.TableOfContents(toc_output)
+
+    def test_indented_toc(self):
+        md = dedent("""
+        # Heading 1
+        ## Heading 2
+        ### Heading 3
+        """)
+        expected = dedent("""
+        Heading 1 - #heading-1
+          Heading 2 - #heading-2
+            Heading 3 - #heading-3
+        """)
+        toc = self.markdown_to_toc(md)
+        self.assertEqual(str(toc).strip(), expected)
+
+    def test_flat_toc(self):
+        md = dedent("""
+        # Heading 1
+        # Heading 2
+        # Heading 3
+        """)
+        expected = dedent("""
+        Heading 1 - #heading-1
+        Heading 2 - #heading-2
+        Heading 3 - #heading-3
+        """)
+        toc = self.markdown_to_toc(md)
+        self.assertEqual(str(toc).strip(), expected)
+
+    def test_flat_h2_toc(self):
+        md = dedent("""
+        ## Heading 1
+        ## Heading 2
+        ## Heading 3
+        """)
+        expected = dedent("""
+        Heading 1 - #heading-1
+        Heading 2 - #heading-2
+        Heading 3 - #heading-3
+        """)
+        toc = self.markdown_to_toc(md)
+        self.assertEqual(str(toc).strip(), expected)
+
+    def test_mixed_toc(self):
+        md = dedent("""
+        # Heading 1
+        ## Heading 2
+        # Heading 3
+        ### Heading 4
+        ### Heading 5
+        """)
+        expected = dedent("""
+        Heading 1 - #heading-1
+          Heading 2 - #heading-2
+        Heading 3 - #heading-3
+          Heading 4 - #heading-4
+          Heading 5 - #heading-5
+        """)
+        toc = self.markdown_to_toc(md)
+        self.assertEqual(str(toc).strip(), expected)
+
+
 if __name__ == '__main__':
-    unittest.main()
\ No newline at end of file
+    unittest.main()
diff --git a/mkdocs/toc.py b/mkdocs/toc.py
new file mode 100644
index 00000000..3ca04c6b
--- /dev/null
+++ b/mkdocs/toc.py
@@ -0,0 +1,104 @@
+#coding: utf-8
+
+"""
+Deals with generating the per-page table of contents.
+
+For the sake of simplicity we use an existing markdown extension to generate
+an HTML table of contents, and then parse that into the underlying data.
+
+The steps we take to generate a table of contents are:
+
+* Pre-process the markdown, injecting a [TOC] marker.
+* Generate HTML from markdown.
+* Post-process the HTML, spliting the content and the table of contents.
+* Parse table of contents HTML into the underlying data structure.
+"""
+
+import re
+
+TOC_DELIMITER = '<!-- STARTTOC -->'
+TOC_LINK_REGEX = re.compile('<a href=["]([^"]*)["]>([^<]*)</a>')
+
+
+def pre_process(markdown_content):
+    """
+    Append a `[TOC]` marker to the markdown.
+    The `toc` extension injects the HTML table of contents here.
+    """
+    return markdown_content + '\n\n' + TOC_DELIMITER + '\n[TOC]'
+
+
+def post_process(html_content):
+    """
+    Strip the generated HTML table of contents from the HTML output.
+
+    Returns a two-tuple of `(content, table_of_contents)`
+    """
+    return html_content.rsplit(TOC_DELIMITER, 1)
+
+
+class TableOfContents(object):
+    """
+    Represents the table of contents for a given page.
+    """
+    def __init__(self, html):
+        self.items = _parse_html_table_of_contents(html)
+
+    def __iter__(self):
+        return iter(self.items)
+
+    def __str__(self):
+        return ''.join([str(item) for item in self])
+
+
+class TableOfContentsItem(object):
+    """
+    A single entry in the table of contents.
+    """
+    def __init__(self, title, url):
+        self.title, self.url = title, url
+        self.children = []
+
+    def __str__(self):
+        return self._indent_print()
+
+    def _indent_print(self, depth=0):
+        indent = '  ' * depth
+        ret = '%s%s - %s\n' % (indent, self.title, self.url)
+        for item in self.children:
+            ret += item._indent_print(depth + 1)
+        return ret
+
+
+def _parse_html_table_of_contents(html):
+    """
+    Given a table of contents string that has been automatically generated by
+    the markdown library, parse it into a tree of TableOfContentsItem instances.
+
+    Returns a list of all the parent TableOfContentsItem instances.
+    """
+    lines = html.splitlines()[2:-2]
+    parents = []
+    ret = []
+    for line in lines:
+        match = TOC_LINK_REGEX.search(line)
+        if match:
+            href, title = match.groups()
+            nav = TableOfContentsItem(title, href)
+            # Add the item to its parent if required.  If it is a topmost
+            # item then instead append it to our return value.
+            if parents:
+                parents[-1].children.append(nav)
+            else:
+                ret.append(nav)
+            # If this item has children, store it as the current parent
+            if line.endswith('<ul>'):
+                parents.append(nav)
+        elif line.startswith('</ul>'):
+            parents.pop()
+
+    # For the table of contents, always mark the first element as active
+    if ret:
+        ret[0].active = True
+
+    return ret
diff --git a/mkdocs/utils.py b/mkdocs/utils.py
index af8cb302..0c8ea3f2 100644
--- a/mkdocs/utils.py
+++ b/mkdocs/utils.py
@@ -1,5 +1,12 @@
 #coding: utf-8
 
+"""
+Standalone file utils.
+
+Nothing in this module should have an knowledge of config or the layout
+and structure of the site and pages in the site.
+"""
+
 import os
 import shutil
 
@@ -53,6 +60,21 @@ def get_html_path(path):
     return os.path.join(path, 'index.html')
 
 
+def get_url_path(path, local_file_urls=False):
+    """
+    Map a source file path to an output html path.
+
+    Paths like 'index.md' will be converted to ''
+    Paths like 'about.md' will be converted to 'about/'
+    Paths like 'api-guide/core.md' will be converted to 'api-guide/core/'
+    """
+    path = get_html_path(path)
+    url = path.replace(os.path.pathsep, '/')
+    if not local_file_urls:
+        return url[:-len('index.html')]
+    return url
+
+
 def is_markdown_file(path):
     """
     Return True if the given file path is a Markdown file.
diff --git a/runtests b/runtests
new file mode 100755
index 00000000..ec0caef2
--- /dev/null
+++ b/runtests
@@ -0,0 +1,8 @@
+#!/usr/bin/env bash
+
+set -e
+
+coverage run --source=mkdocs mkdocs/test.py $@
+flake8 mkdocs --ignore=E128,E501 --exclude=__init__.py
+echo
+coverage report