mirror of
https://github.com/docker/docs.git
synced 2026-03-27 06:18:55 +07:00
hack: resolve links in the markdown output
Resolves internal links to point to the corresponding (rendered) path rather than keeping them as internal links, which wouldn't make sense to an agent/llm reading them. Signed-off-by: David Karlsson <35727626+dvdksn@users.noreply.github.com>
This commit is contained in:
@@ -51,7 +51,7 @@ ARG DOCS_URL="https://docs.docker.com"
|
|||||||
ENV HUGO_CACHEDIR="/tmp/hugo_cache"
|
ENV HUGO_CACHEDIR="/tmp/hugo_cache"
|
||||||
RUN --mount=type=cache,target=/tmp/hugo_cache \
|
RUN --mount=type=cache,target=/tmp/hugo_cache \
|
||||||
hugo --gc --minify -e $HUGO_ENV -b $DOCS_URL
|
hugo --gc --minify -e $HUGO_ENV -b $DOCS_URL
|
||||||
RUN ./hack/flatten-markdown.sh public
|
RUN ./hack/flatten-and-resolve.js public
|
||||||
|
|
||||||
# lint lints markdown files
|
# lint lints markdown files
|
||||||
FROM ghcr.io/igorshubovych/markdownlint-cli:v0.45.0 AS lint
|
FROM ghcr.io/igorshubovych/markdownlint-cli:v0.45.0 AS lint
|
||||||
|
|||||||
236
hack/flatten-and-resolve.js
Executable file
236
hack/flatten-and-resolve.js
Executable file
@@ -0,0 +1,236 @@
|
|||||||
|
#!/usr/bin/env node
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Flattens markdown directory structure and resolves all links to absolute paths.
|
||||||
|
*
|
||||||
|
* This script:
|
||||||
|
* 1. Moves index.md files up one level (ai/model-runner/index.md -> ai/model-runner.md)
|
||||||
|
* 2. Fixes _index.md and index.md references in links
|
||||||
|
* 3. Strips /manuals/ prefix from paths (Hugo config removes this)
|
||||||
|
* 4. Resolves all relative links to absolute HTML paths for RAG ingestion
|
||||||
|
*
|
||||||
|
* Usage: node flatten-and-resolve.js [public-dir]
|
||||||
|
*/
|
||||||
|
|
||||||
|
const fs = require('fs');
|
||||||
|
const path = require('path');
|
||||||
|
|
||||||
|
const PUBLIC_DIR = path.resolve(process.argv[2] || 'public');
|
||||||
|
|
||||||
|
if (!fs.existsSync(PUBLIC_DIR)) {
|
||||||
|
console.error(`Error: Directory ${PUBLIC_DIR} does not exist`);
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Recursively find all files matching a predicate
|
||||||
|
*/
|
||||||
|
function findFiles(dir, predicate) {
|
||||||
|
const results = [];
|
||||||
|
const entries = fs.readdirSync(dir, { withFileTypes: true });
|
||||||
|
|
||||||
|
for (const entry of entries) {
|
||||||
|
const fullPath = path.join(dir, entry.name);
|
||||||
|
if (entry.isDirectory()) {
|
||||||
|
results.push(...findFiles(fullPath, predicate));
|
||||||
|
} else if (entry.isFile() && predicate(entry.name)) {
|
||||||
|
results.push(fullPath);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Step 1: Flatten index.md files
|
||||||
|
* Move path/to/section/index.md -> path/to/section.md
|
||||||
|
* Before moving, rewrite sibling links (e.g., "get-started.md" -> "section/get-started.md")
|
||||||
|
*/
|
||||||
|
function flattenIndexFiles() {
|
||||||
|
const indexFiles = findFiles(PUBLIC_DIR, name => name === 'index.md');
|
||||||
|
let count = 0;
|
||||||
|
|
||||||
|
for (const file of indexFiles) {
|
||||||
|
// Skip root index.md
|
||||||
|
if (file === path.join(PUBLIC_DIR, 'index.md')) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
const dir = path.dirname(file);
|
||||||
|
const dirname = path.basename(dir);
|
||||||
|
|
||||||
|
// Read content and fix sibling links
|
||||||
|
let content = fs.readFileSync(file, 'utf8');
|
||||||
|
|
||||||
|
// Rewrite relative links that don't start with /, ../, or http
|
||||||
|
// These are sibling files that will become children after flattening
|
||||||
|
content = content.replace(
|
||||||
|
/\[([^\]]+)\]\(([a-zA-Z0-9][^):]*)\)/g,
|
||||||
|
(match, text, link) => {
|
||||||
|
// Skip if it's a URL or starts with special chars
|
||||||
|
if (link.startsWith('http://') || link.startsWith('https://') ||
|
||||||
|
link.startsWith('#')) {
|
||||||
|
return match;
|
||||||
|
}
|
||||||
|
return `[${text}](${dirname}/${link})`;
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
// Also fix reference-style links
|
||||||
|
content = content.replace(
|
||||||
|
/^\[([^\]]+)\]:\s+([a-zA-Z0-9][^: ]*\.md)$/gm,
|
||||||
|
(match, ref, link) => `[${ref}]: ${dirname}/${link}`
|
||||||
|
);
|
||||||
|
|
||||||
|
fs.writeFileSync(file, content, 'utf8');
|
||||||
|
|
||||||
|
// Move file up one level
|
||||||
|
const parentDir = path.dirname(dir);
|
||||||
|
const newPath = path.join(parentDir, `${dirname}.md`);
|
||||||
|
fs.renameSync(file, newPath);
|
||||||
|
|
||||||
|
count++;
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`Flattened ${count} index.md files`);
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Step 2: Fix _index.md and index.md references in all files
|
||||||
|
* Also strip /manuals/ prefix from paths
|
||||||
|
*/
|
||||||
|
function fixIndexReferences() {
|
||||||
|
const mdFiles = findFiles(PUBLIC_DIR, name => name.endsWith('.md'));
|
||||||
|
let count = 0;
|
||||||
|
|
||||||
|
for (const file of mdFiles) {
|
||||||
|
const dir = path.dirname(file);
|
||||||
|
const dirname = path.basename(dir);
|
||||||
|
const parentDir = path.dirname(dir);
|
||||||
|
const parentDirname = path.basename(parentDir);
|
||||||
|
|
||||||
|
let content = fs.readFileSync(file, 'utf8');
|
||||||
|
const original = content;
|
||||||
|
|
||||||
|
// Fix path/_index.md or path/index.md -> path.md
|
||||||
|
content = content.replace(/([a-zA-Z0-9_/-]+)\/_?index\.md/g, '$1.md');
|
||||||
|
|
||||||
|
// Fix bare _index.md or index.md -> ../dirname.md
|
||||||
|
content = content.replace(/_?index\.md/g, `../${dirname}.md`);
|
||||||
|
|
||||||
|
// Fix ../_index.md that became ...md -> ../../parentdirname.md
|
||||||
|
if (parentDir !== PUBLIC_DIR) {
|
||||||
|
content = content.replace(/\.\.\.md/g, `../../${parentDirname}.md`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Strip /manuals/ prefix (both /manuals/ and manuals/)
|
||||||
|
content = content.replace(/\/?manuals\//g, '/');
|
||||||
|
|
||||||
|
if (content !== original) {
|
||||||
|
fs.writeFileSync(file, content, 'utf8');
|
||||||
|
count++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`Fixed _index.md references in ${count} files`);
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Step 3: Resolve all relative links to absolute HTML paths
|
||||||
|
*/
|
||||||
|
function resolveLinks() {
|
||||||
|
const mdFiles = findFiles(PUBLIC_DIR, name => name.endsWith('.md'));
|
||||||
|
let count = 0;
|
||||||
|
|
||||||
|
for (const file of mdFiles) {
|
||||||
|
let content = fs.readFileSync(file, 'utf8');
|
||||||
|
const original = content;
|
||||||
|
|
||||||
|
// Process inline links: [text](path)
|
||||||
|
content = content.replace(/\[([^\]]+)\]\(([^)]+)\)/g, (match, text, link) => {
|
||||||
|
const resolved = resolveLinkPath(link, file);
|
||||||
|
return `[${text}](${resolved})`;
|
||||||
|
});
|
||||||
|
|
||||||
|
// Process reference-style links: [ref]: path
|
||||||
|
content = content.replace(/^\[([^\]]+)\]:\s+(.+)$/gm, (match, ref, link) => {
|
||||||
|
const resolved = resolveLinkPath(link, file);
|
||||||
|
return `[${ref}]: ${resolved}`;
|
||||||
|
});
|
||||||
|
|
||||||
|
if (content !== original) {
|
||||||
|
fs.writeFileSync(file, content, 'utf8');
|
||||||
|
count++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`Resolved links in ${count} files`);
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Resolve a link path to absolute HTML format
|
||||||
|
*/
|
||||||
|
function resolveLinkPath(linkPath, currentFile) {
|
||||||
|
// Skip external URLs and mailto
|
||||||
|
if (linkPath.startsWith('http://') || linkPath.startsWith('https://') ||
|
||||||
|
linkPath.startsWith('mailto:')) {
|
||||||
|
return linkPath;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Skip same-page anchors
|
||||||
|
if (linkPath.startsWith('#')) {
|
||||||
|
return linkPath;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Split path and anchor
|
||||||
|
const hashIndex = linkPath.indexOf('#');
|
||||||
|
const pathPart = hashIndex >= 0 ? linkPath.substring(0, hashIndex) : linkPath;
|
||||||
|
const anchorPart = hashIndex >= 0 ? linkPath.substring(hashIndex) : '';
|
||||||
|
|
||||||
|
if (!pathPart) {
|
||||||
|
// Just an anchor
|
||||||
|
return linkPath;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handle absolute paths - just convert to HTML format
|
||||||
|
if (pathPart.startsWith('/')) {
|
||||||
|
return toHtmlPath(pathPart) + anchorPart;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Resolve relative path to absolute
|
||||||
|
const currentDir = path.dirname(currentFile);
|
||||||
|
const absolutePath = path.resolve(currentDir, pathPart);
|
||||||
|
const relativePath = path.relative(PUBLIC_DIR, absolutePath);
|
||||||
|
|
||||||
|
// Convert to URL path (forward slashes)
|
||||||
|
const urlPath = '/' + relativePath.split(path.sep).join('/');
|
||||||
|
|
||||||
|
return toHtmlPath(urlPath) + anchorPart;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convert a path to HTML format (strip .md, add trailing /)
|
||||||
|
*/
|
||||||
|
function toHtmlPath(urlPath) {
|
||||||
|
if (urlPath.endsWith('.md')) {
|
||||||
|
return urlPath.slice(0, -3) + '/';
|
||||||
|
}
|
||||||
|
return urlPath;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Main execution
|
||||||
|
console.log('Starting markdown flattening and link resolution...');
|
||||||
|
console.log('');
|
||||||
|
|
||||||
|
const flattenCount = flattenIndexFiles();
|
||||||
|
const fixCount = fixIndexReferences();
|
||||||
|
const resolveCount = resolveLinks();
|
||||||
|
|
||||||
|
console.log('');
|
||||||
|
console.log('Done!');
|
||||||
|
console.log(`- Flattened: ${flattenCount} files`);
|
||||||
|
console.log(`- Fixed references: ${fixCount} files`);
|
||||||
|
console.log(`- Resolved links: ${resolveCount} files`);
|
||||||
@@ -1,22 +0,0 @@
|
|||||||
#!/bin/sh
|
|
||||||
set -e
|
|
||||||
|
|
||||||
# Flatten markdown files from public/path/to/page/index.md to public/path/to/page.md
|
|
||||||
# This makes markdown output links work correctly
|
|
||||||
|
|
||||||
PUBLIC_DIR="${1:-public}"
|
|
||||||
|
|
||||||
[ -d "$PUBLIC_DIR" ] || { echo "Error: Directory $PUBLIC_DIR does not exist"; exit 1; }
|
|
||||||
|
|
||||||
find "$PUBLIC_DIR" -type f -name 'index.md' | while read -r file; do
|
|
||||||
# Skip the root index.md
|
|
||||||
[ "$file" = "$PUBLIC_DIR/index.md" ] && continue
|
|
||||||
|
|
||||||
# Get the directory containing index.md
|
|
||||||
dir="${file%/*}"
|
|
||||||
|
|
||||||
# Move index.md to parent directory with directory name
|
|
||||||
mv "$file" "${dir%/*}/${dir##*/}.md"
|
|
||||||
done
|
|
||||||
|
|
||||||
echo "Flattened markdown files in $PUBLIC_DIR"
|
|
||||||
@@ -10,4 +10,4 @@ HUGO_ENVIRONMENT = "preview"
|
|||||||
SECRETS_SCAN_OMIT_PATHS = "public/contribute/file-conventions/index.html"
|
SECRETS_SCAN_OMIT_PATHS = "public/contribute/file-conventions/index.html"
|
||||||
|
|
||||||
[context.deploy-preview]
|
[context.deploy-preview]
|
||||||
command = "hugo --gc --minify -b $DEPLOY_PRIME_URL && ./hack/flatten-markdown.sh && npx pagefind@v1.3.0"
|
command = "hugo --gc --minify -b $DEPLOY_PRIME_URL && ./hack/flatten-and-resolve.js && npx pagefind@v1.3.0"
|
||||||
|
|||||||
Reference in New Issue
Block a user