mirror of
https://github.com/mkdocs/mkdocs.git
synced 2026-03-27 09:58:31 +07:00
Open files with utf-8-sig to account for BOM.
Python simply discards the BOM with `utf-8-sig`. This way, users of Microsoft text editors can have their files properly parsered. In all other ways behaves as reading files using `utf-8` encoding. For more info see: https://docs.python.org/2/library/codecs.html#encodings-and-unicode Fixes #1186.
This commit is contained in:
@@ -248,7 +248,7 @@ class Page(object):
|
||||
'page_read_source', None, config=config, page=self)
|
||||
if source is None:
|
||||
try:
|
||||
with io.open(self.abs_input_path, 'r', encoding='utf-8') as f:
|
||||
with io.open(self.abs_input_path, 'r', encoding='utf-8-sig') as f:
|
||||
source = f.read()
|
||||
except IOError:
|
||||
log.error('File not found: %s', self.abs_input_path)
|
||||
|
||||
@@ -7,6 +7,7 @@ import shutil
|
||||
import tempfile
|
||||
import unittest
|
||||
import mock
|
||||
import io
|
||||
|
||||
try:
|
||||
from itertools import izip as zip
|
||||
@@ -464,3 +465,32 @@ class BuildTests(unittest.TestCase):
|
||||
context = build.get_context(mock.Mock(), cfg)
|
||||
|
||||
self.assertEqual(context['config']['extra']['a'], 1)
|
||||
|
||||
def test_BOM(self):
|
||||
docs_dir = tempfile.mkdtemp()
|
||||
site_dir = tempfile.mkdtemp()
|
||||
try:
|
||||
# Create an UTF-8 Encoded file with BOM (as Micorsoft editors do). See #1186.
|
||||
f = io.open(os.path.join(docs_dir, 'index.md'), 'w', encoding='utf-8-sig')
|
||||
f.write('# An UTF-8 encoded file with a BOM')
|
||||
f.close()
|
||||
|
||||
cfg = load_config(
|
||||
docs_dir=docs_dir,
|
||||
site_dir=site_dir
|
||||
)
|
||||
build.build(cfg)
|
||||
|
||||
# Verify that the file was generated properly.
|
||||
# If the BOM is not removed, Markdown will return:
|
||||
# `<p>\ufeff# An UTF-8 encoded file with a BOM</p>`.
|
||||
f = io.open(os.path.join(site_dir, 'index.html'), 'r', encoding='utf-8')
|
||||
output = f.read()
|
||||
f.close()
|
||||
self.assertTrue(
|
||||
'<h1 id="an-utf-8-encoded-file-with-a-bom">An UTF-8 encoded file with a BOM</h1>' in output
|
||||
)
|
||||
|
||||
finally:
|
||||
shutil.rmtree(docs_dir)
|
||||
shutil.rmtree(site_dir)
|
||||
|
||||
Reference in New Issue
Block a user