From ccf011db793c968bf2cee80c3e8f1f66fd2b3189 Mon Sep 17 00:00:00 2001 From: Oleh Prypin Date: Fri, 8 Dec 2023 21:39:14 +0100 Subject: [PATCH] Make sitemap.xml.gz slightly more reproducible (#3460) Use the latest page update time instead of the current time Now the date of the gzip file will change only once per day, based on the pages' update date. The sitemap.xml itself also changes once per day already. --- mkdocs/commands/build.py | 4 +++- mkdocs/structure/pages.py | 2 +- mkdocs/tests/build_tests.py | 8 ++++---- mkdocs/utils/__init__.py | 19 ++++++++++--------- 4 files changed, 18 insertions(+), 15 deletions(-) diff --git a/mkdocs/commands/build.py b/mkdocs/commands/build.py index 2c9e9793..2eec30aa 100644 --- a/mkdocs/commands/build.py +++ b/mkdocs/commands/build.py @@ -110,7 +110,9 @@ def _build_theme_template( log.debug(f"Gzipping template: {template_name}") gz_filename = f'{output_path}.gz' with open(gz_filename, 'wb') as f: - timestamp = utils.get_build_timestamp() + timestamp = utils.get_build_timestamp( + pages=[f.page for f in files.documentation_pages() if f.page is not None] + ) with gzip.GzipFile( fileobj=f, filename=gz_filename, mode='wb', mtime=timestamp ) as gz_buf: diff --git a/mkdocs/structure/pages.py b/mkdocs/structure/pages.py index 84c1430f..f5bd041f 100644 --- a/mkdocs/structure/pages.py +++ b/mkdocs/structure/pages.py @@ -42,7 +42,7 @@ class Page(StructureItem): self.next_page = None self.active = False - self.update_date = get_build_date() + self.update_date: str = get_build_date() self._set_canonical_url(config.get('site_url', None)) self._set_edit_url( diff --git a/mkdocs/tests/build_tests.py b/mkdocs/tests/build_tests.py index 95145298..ae1b474c 100644 --- a/mkdocs/tests/build_tests.py +++ b/mkdocs/tests/build_tests.py @@ -225,7 +225,7 @@ class BuildTests(PathAssertionMixin, unittest.TestCase): def test_build_theme_template(self, mock_build_template, mock_write_file): cfg = load_config() env = cfg.theme.get_env() - build._build_theme_template('main.html', env, mock.Mock(), cfg, mock.Mock()) + build._build_theme_template('main.html', env, Files([]), cfg, mock.Mock()) mock_write_file.assert_called_once() mock_build_template.assert_called_once() @@ -238,7 +238,7 @@ class BuildTests(PathAssertionMixin, unittest.TestCase): ): cfg = load_config(site_dir=site_dir) env = cfg.theme.get_env() - build._build_theme_template('sitemap.xml', env, mock.Mock(), cfg, mock.Mock()) + build._build_theme_template('sitemap.xml', env, Files([]), cfg, mock.Mock()) mock_write_file.assert_called_once() mock_build_template.assert_called_once() mock_gzip_gzipfile.assert_called_once() @@ -249,7 +249,7 @@ class BuildTests(PathAssertionMixin, unittest.TestCase): cfg = load_config() env = cfg.theme.get_env() with self.assertLogs('mkdocs') as cm: - build._build_theme_template('missing.html', env, mock.Mock(), cfg, mock.Mock()) + build._build_theme_template('missing.html', env, Files([]), cfg, mock.Mock()) self.assertEqual( '\n'.join(cm.output), "WARNING:mkdocs.commands.build:Template skipped: 'missing.html' not found in theme directories.", @@ -263,7 +263,7 @@ class BuildTests(PathAssertionMixin, unittest.TestCase): cfg = load_config() env = cfg.theme.get_env() with self.assertLogs('mkdocs') as cm: - build._build_theme_template('main.html', env, mock.Mock(), cfg, mock.Mock()) + build._build_theme_template('main.html', env, Files([]), cfg, mock.Mock()) self.assertEqual( '\n'.join(cm.output), "INFO:mkdocs.commands.build:Template skipped: 'main.html' generated empty output.", diff --git a/mkdocs/utils/__init__.py b/mkdocs/utils/__init__.py index 1560dfa1..602e99fe 100644 --- a/mkdocs/utils/__init__.py +++ b/mkdocs/utils/__init__.py @@ -44,18 +44,19 @@ markdown_extensions = ( ) -def get_build_timestamp() -> int: +def get_build_timestamp(*, pages: Collection[Page] | None = None) -> int: """ - Returns the number of seconds since the epoch. + Returns the number of seconds since the epoch for the latest updated page. - Support SOURCE_DATE_EPOCH environment variable for reproducible builds. - See https://reproducible-builds.org/specs/source-date-epoch/ + In reality this is just today's date because that's how pages' update time is populated. """ - source_date_epoch = os.environ.get('SOURCE_DATE_EPOCH') - if source_date_epoch is None: - return int(datetime.now(timezone.utc).timestamp()) - - return int(source_date_epoch) + if pages: + # Lexicographic comparison is OK for ISO date. + date_string = max(p.update_date for p in pages) + dt = datetime.fromisoformat(date_string) + else: + dt = get_build_datetime() + return int(dt.timestamp()) def get_build_datetime() -> datetime: