Open files with utf-8-sig to account for BOM.

Python simply discards the BOM with `utf-8-sig`. This way, users of Microsoft text editors can have their files properly parsered. In all other ways behaves as reading files using `utf-8` encoding. For more info see: https://docs.python.org/2/library/codecs.html#encodings-and-unicode Fixes #1186.
2026-03-27 09:58:31 +07:00 · 2017-06-05 19:49:59 -04:00
parent 31e7c29784
commit 555659cdc3
2 changed files with 31 additions and 1 deletions
--- a/mkdocs/nav.py
+++ b/mkdocs/nav.py
@@ -248,7 +248,7 @@ class Page(object):
            'page_read_source', None, config=config, page=self)
        if source is None:
            try:
-                with io.open(self.abs_input_path, 'r', encoding='utf-8') as f:
+                with io.open(self.abs_input_path, 'r', encoding='utf-8-sig') as f:
                    source = f.read()
            except IOError:
                log.error('File not found: %s', self.abs_input_path)
--- a/mkdocs/tests/build_tests.py
+++ b/mkdocs/tests/build_tests.py
@@ -7,6 +7,7 @@ import shutil
 import tempfile
 import unittest
 import mock
+import io

 try:
    from itertools import izip as zip
@@ -464,3 +465,32 @@ class BuildTests(unittest.TestCase):
        context = build.get_context(mock.Mock(), cfg)

        self.assertEqual(context['config']['extra']['a'], 1)
+
+    def test_BOM(self):
+        docs_dir = tempfile.mkdtemp()
+        site_dir = tempfile.mkdtemp()
+        try:
+            # Create an UTF-8 Encoded file with BOM (as Micorsoft editors do). See #1186.
+            f = io.open(os.path.join(docs_dir, 'index.md'), 'w', encoding='utf-8-sig')
+            f.write('# An UTF-8 encoded file with a BOM')
+            f.close()
+
+            cfg = load_config(
+                docs_dir=docs_dir,
+                site_dir=site_dir
+            )
+            build.build(cfg)
+
+            # Verify that the file was generated properly.
+            # If the BOM is not removed, Markdown will return:
+            # `<p>\ufeff# An UTF-8 encoded file with a BOM</p>`.
+            f = io.open(os.path.join(site_dir, 'index.html'), 'r', encoding='utf-8')
+            output = f.read()
+            f.close()
+            self.assertTrue(
+                '<h1 id="an-utf-8-encoded-file-with-a-bom">An UTF-8 encoded file with a BOM</h1>' in output
+            )
+
+        finally:
+            shutil.rmtree(docs_dir)
+            shutil.rmtree(site_dir)