diff --git a/docs/user-guide/configuration.md b/docs/user-guide/configuration.md index e084ee64..561ed849 100644 --- a/docs/user-guide/configuration.md +++ b/docs/user-guide/configuration.md @@ -291,6 +291,42 @@ server root and effectively points to `https://example.com/bugs/`. Of course, th list of all the Markdown files found within the `docs_dir` and its sub-directories. Index files will always be listed first within a sub-section. +### exclude_docs + +NEW: **New in version 1.5.** + +This config defines patterns of files (under [`docs_dir`](#docs_dir)) to not be picked up into the built site. + +Example: + +```yaml +exclude_docs: | + api-config.json # A file with this name anywhere. + drafts/ # A "drafts" directory anywhere. + /requirements.txt # Top-level "docs/requirements.txt". + *.py # Any file with this extension anywhere. + !/foo/example.py # But keep this particular file. +``` + +This follows the [.gitignore pattern format](https://git-scm.com/docs/gitignore#_pattern_format). + +The following defaults are always implicitly prepended - to exclude dot-files (and directories) as well as the top-level `templates` directory: + +```yaml +exclude_docs: | + .* + /templates/ +``` + +So, in order to really start this config fresh, you'd need to specify a negated version of these entries first. + +Otherwise you could for example opt only certain dot-files back into the site: + +```yaml +exclude_docs: | + !.assets # Don't exclude '.assets' although all other '.*' are excluded +``` + ## Build directories ### theme diff --git a/mkdocs/config/config_options.py b/mkdocs/config/config_options.py index 23722a11..d4a617eb 100644 --- a/mkdocs/config/config_options.py +++ b/mkdocs/config/config_options.py @@ -28,6 +28,8 @@ from urllib.parse import quote as urlquote from urllib.parse import urlsplit, urlunsplit import markdown +import pathspec +import pathspec.gitignore from mkdocs import plugins, theme, utils from mkdocs.config.base import ( @@ -1061,3 +1063,15 @@ class Hooks(BaseConfigOption[List[types.ModuleType]]): plugins = config[self.plugins_key] for name, hook in config[key_name].items(): plugins[name] = hook + + +class PathSpec(BaseConfigOption[pathspec.gitignore.GitIgnoreSpec]): + """A path pattern based on gitignore-like syntax.""" + + def run_validation(self, value: object) -> pathspec.gitignore.GitIgnoreSpec: + if not isinstance(value, str): + raise ValidationError(f'Expected a multiline string, but a {type(value)} was given.') + try: + return pathspec.gitignore.GitIgnoreSpec.from_lines(lines=value.splitlines()) + except ValueError as e: + raise ValidationError(str(e)) diff --git a/mkdocs/config/defaults.py b/mkdocs/config/defaults.py index ba6681d3..5561379a 100644 --- a/mkdocs/config/defaults.py +++ b/mkdocs/config/defaults.py @@ -26,6 +26,9 @@ class MkDocsConfig(base.Config): """Defines the structure of the navigation.""" pages = c.Deprecated(removed=True, moved_to='nav') + exclude_docs = c.Optional(c.PathSpec()) + """Gitignore-like patterns of files (relative to docs dir) to exclude from the site.""" + site_url = c.Optional(c.URL(is_dir=True)) """The full URL to where the documentation will be hosted.""" diff --git a/mkdocs/structure/files.py b/mkdocs/structure/files.py index 9debf0ac..50d445e7 100644 --- a/mkdocs/structure/files.py +++ b/mkdocs/structure/files.py @@ -5,11 +5,15 @@ import logging import os import posixpath import shutil +import warnings from pathlib import PurePath from typing import TYPE_CHECKING, Any, Iterable, Iterator, Mapping, Sequence from urllib.parse import quote as urlquote import jinja2.environment +import pathspec +import pathspec.gitignore +import pathspec.util from mkdocs import utils @@ -273,52 +277,56 @@ class File: return self.src_uri.endswith('.css') +_default_exclude = pathspec.gitignore.GitIgnoreSpec.from_lines(['.*', '/templates/']) + + def get_files(config: MkDocsConfig | Mapping[str, Any]) -> Files: """Walk the `docs_dir` and return a Files collection.""" + exclude = config.get('exclude_docs') + exclude = _default_exclude + exclude if exclude else _default_exclude files = [] - exclude = ['.*', '/templates'] - for source_dir, dirnames, filenames in os.walk(config['docs_dir'], followlinks=True): relative_dir = os.path.relpath(source_dir, config['docs_dir']) - - for dirname in list(dirnames): - path = os.path.normpath(os.path.join(relative_dir, dirname)) - # Skip any excluded directories - if _filter_paths(basename=dirname, path=path, is_dir=True, exclude=exclude): - dirnames.remove(dirname) dirnames.sort() + filenames.sort(key=_file_sort_key) - for filename in _sort_files(filenames): - path = os.path.normpath(os.path.join(relative_dir, filename)) + for filename in filenames: + file = File( + os.path.join(relative_dir, filename), + config['docs_dir'], + config['site_dir'], + config['use_directory_urls'], + ) # Skip any excluded files - if _filter_paths(basename=filename, path=path, is_dir=False, exclude=exclude): + if exclude.match_file(file.src_uri): continue # Skip README.md if an index file also exists in dir if filename == 'README.md' and 'index.md' in filenames: log.warning( - f"Both index.md and README.md found. Skipping README.md from {source_dir}" + f"Excluding '{file.src_uri}' from the site because " + f"it conflicts with 'index.md' in the same directory." ) continue - files.append( - File(path, config['docs_dir'], config['site_dir'], config['use_directory_urls']) - ) + files.append(file) return Files(files) -def _sort_files(filenames: Iterable[str]) -> list[str]: +def _file_sort_key(f: str): """Always sort `index` or `README` as first filename in list.""" + if os.path.splitext(f)[0] in ('index', 'README'): + return (0,) + return (1, f) - def key(f): - if os.path.splitext(f)[0] in ['index', 'README']: - return (0,) - return (1, f) - return sorted(filenames, key=key) +def _sort_files(filenames: Iterable[str]) -> list[str]: + return sorted(filenames, key=_file_sort_key) def _filter_paths(basename: str, path: str, is_dir: bool, exclude: Iterable[str]) -> bool: - """.gitignore style file filtering.""" + warnings.warn( + "_filter_paths is not used since MkDocs 1.5 and will be removed soon.", DeprecationWarning + ) for item in exclude: # Items ending in '/' apply only to directories. if item.endswith('/') and not is_dir: diff --git a/mkdocs/tests/structure/file_tests.py b/mkdocs/tests/structure/file_tests.py index 2316ab81..e5c01af2 100644 --- a/mkdocs/tests/structure/file_tests.py +++ b/mkdocs/tests/structure/file_tests.py @@ -3,7 +3,7 @@ import sys import unittest from unittest import mock -from mkdocs.structure.files import File, Files, _filter_paths, _sort_files, get_files +from mkdocs.structure.files import File, Files, _sort_files, get_files from mkdocs.tests.base import PathAssertionMixin, load_config, tempdir @@ -427,34 +427,6 @@ class TestFiles(PathAssertionMixin, unittest.TestCase): os.path.normpath(os.path.join(ddir, 'favicon.ico')), ) - def test_filter_paths(self): - # Root level file - self.assertFalse(_filter_paths('foo.md', 'foo.md', False, ['bar.md'])) - self.assertTrue(_filter_paths('foo.md', 'foo.md', False, ['foo.md'])) - - # Nested file - self.assertFalse(_filter_paths('foo.md', 'baz/foo.md', False, ['bar.md'])) - self.assertTrue(_filter_paths('foo.md', 'baz/foo.md', False, ['foo.md'])) - - # Wildcard - self.assertFalse(_filter_paths('foo.md', 'foo.md', False, ['*.txt'])) - self.assertTrue(_filter_paths('foo.md', 'foo.md', False, ['*.md'])) - - # Root level dir - self.assertFalse(_filter_paths('bar', 'bar', True, ['/baz'])) - self.assertFalse(_filter_paths('bar', 'bar', True, ['/baz/'])) - self.assertTrue(_filter_paths('bar', 'bar', True, ['/bar'])) - self.assertTrue(_filter_paths('bar', 'bar', True, ['/bar/'])) - - # Nested dir - self.assertFalse(_filter_paths('bar', 'foo/bar', True, ['/bar'])) - self.assertFalse(_filter_paths('bar', 'foo/bar', True, ['/bar/'])) - self.assertTrue(_filter_paths('bar', 'foo/bar', True, ['bar/'])) - - # Files that look like dirs (no extension). Note that `is_dir` is `False`. - self.assertFalse(_filter_paths('bar', 'bar', False, ['bar/'])) - self.assertFalse(_filter_paths('bar', 'foo/bar', False, ['bar/'])) - def test_get_relative_url_use_directory_urls(self): to_files = [ 'index.md', @@ -661,7 +633,6 @@ class TestFiles(PathAssertionMixin, unittest.TestCase): files = get_files(config) expected = ['index.md', 'bar.css', 'bar.html', 'bar.jpg', 'bar.js', 'bar.md', 'readme.md'] self.assertIsInstance(files, Files) - self.assertEqual(len(files), len(expected)) self.assertEqual([f.src_path for f in files], expected) @tempdir( @@ -675,7 +646,6 @@ class TestFiles(PathAssertionMixin, unittest.TestCase): files = get_files(config) expected = ['README.md', 'foo.md'] self.assertIsInstance(files, Files) - self.assertEqual(len(files), len(expected)) self.assertEqual([f.src_path for f in files], expected) @tempdir( @@ -691,11 +661,11 @@ class TestFiles(PathAssertionMixin, unittest.TestCase): files = get_files(config) self.assertRegex( '\n'.join(cm.output), - r"^WARNING:mkdocs.structure.files:Both index.md and README.md found. Skipping README.md .+$", + r"^WARNING:mkdocs.structure.files:" + r"Excluding 'README.md' from the site because it conflicts with 'index.md' in the same directory.$", ) expected = ['index.md', 'foo.md'] self.assertIsInstance(files, Files) - self.assertEqual(len(files), len(expected)) self.assertEqual([f.src_path for f in files], expected) @tempdir() diff --git a/pyproject.toml b/pyproject.toml index 3e7c1862..a47e1364 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -44,6 +44,7 @@ dependencies = [ "typing_extensions >=3.10; python_version < '3.8'", "packaging >=20.5", "mergedeep >=1.3.4", + "pathspec >=0.11.1", "colorama >=0.4; platform_system == 'Windows'", ] [project.optional-dependencies] @@ -63,6 +64,7 @@ min-versions = [ "typing_extensions ==3.10; python_version < '3.8'", "packaging ==20.5", "mergedeep ==1.3.4", + "pathspec ==0.11.1", "colorama ==0.4; platform_system == 'Windows'", "babel ==2.9.0", ]