mirror of
https://github.com/docker/docs.git
synced 2026-03-27 14:28:47 +07:00
Resolves internal links to point to the corresponding (rendered) path rather than keeping them as internal links, which wouldn't make sense to an agent/llm reading them. Signed-off-by: David Karlsson <35727626+dvdksn@users.noreply.github.com>
237 lines
6.6 KiB
JavaScript
Executable File
237 lines
6.6 KiB
JavaScript
Executable File
#!/usr/bin/env node
|
|
|
|
/**
|
|
* Flattens markdown directory structure and resolves all links to absolute paths.
|
|
*
|
|
* This script:
|
|
* 1. Moves index.md files up one level (ai/model-runner/index.md -> ai/model-runner.md)
|
|
* 2. Fixes _index.md and index.md references in links
|
|
* 3. Strips /manuals/ prefix from paths (Hugo config removes this)
|
|
* 4. Resolves all relative links to absolute HTML paths for RAG ingestion
|
|
*
|
|
* Usage: node flatten-and-resolve.js [public-dir]
|
|
*/
|
|
|
|
const fs = require('fs');
|
|
const path = require('path');
|
|
|
|
const PUBLIC_DIR = path.resolve(process.argv[2] || 'public');
|
|
|
|
if (!fs.existsSync(PUBLIC_DIR)) {
|
|
console.error(`Error: Directory ${PUBLIC_DIR} does not exist`);
|
|
process.exit(1);
|
|
}
|
|
|
|
/**
|
|
* Recursively find all files matching a predicate
|
|
*/
|
|
function findFiles(dir, predicate) {
|
|
const results = [];
|
|
const entries = fs.readdirSync(dir, { withFileTypes: true });
|
|
|
|
for (const entry of entries) {
|
|
const fullPath = path.join(dir, entry.name);
|
|
if (entry.isDirectory()) {
|
|
results.push(...findFiles(fullPath, predicate));
|
|
} else if (entry.isFile() && predicate(entry.name)) {
|
|
results.push(fullPath);
|
|
}
|
|
}
|
|
|
|
return results;
|
|
}
|
|
|
|
/**
|
|
* Step 1: Flatten index.md files
|
|
* Move path/to/section/index.md -> path/to/section.md
|
|
* Before moving, rewrite sibling links (e.g., "get-started.md" -> "section/get-started.md")
|
|
*/
|
|
function flattenIndexFiles() {
|
|
const indexFiles = findFiles(PUBLIC_DIR, name => name === 'index.md');
|
|
let count = 0;
|
|
|
|
for (const file of indexFiles) {
|
|
// Skip root index.md
|
|
if (file === path.join(PUBLIC_DIR, 'index.md')) {
|
|
continue;
|
|
}
|
|
|
|
const dir = path.dirname(file);
|
|
const dirname = path.basename(dir);
|
|
|
|
// Read content and fix sibling links
|
|
let content = fs.readFileSync(file, 'utf8');
|
|
|
|
// Rewrite relative links that don't start with /, ../, or http
|
|
// These are sibling files that will become children after flattening
|
|
content = content.replace(
|
|
/\[([^\]]+)\]\(([a-zA-Z0-9][^):]*)\)/g,
|
|
(match, text, link) => {
|
|
// Skip if it's a URL or starts with special chars
|
|
if (link.startsWith('http://') || link.startsWith('https://') ||
|
|
link.startsWith('#')) {
|
|
return match;
|
|
}
|
|
return `[${text}](${dirname}/${link})`;
|
|
}
|
|
);
|
|
|
|
// Also fix reference-style links
|
|
content = content.replace(
|
|
/^\[([^\]]+)\]:\s+([a-zA-Z0-9][^: ]*\.md)$/gm,
|
|
(match, ref, link) => `[${ref}]: ${dirname}/${link}`
|
|
);
|
|
|
|
fs.writeFileSync(file, content, 'utf8');
|
|
|
|
// Move file up one level
|
|
const parentDir = path.dirname(dir);
|
|
const newPath = path.join(parentDir, `${dirname}.md`);
|
|
fs.renameSync(file, newPath);
|
|
|
|
count++;
|
|
}
|
|
|
|
console.log(`Flattened ${count} index.md files`);
|
|
return count;
|
|
}
|
|
|
|
/**
|
|
* Step 2: Fix _index.md and index.md references in all files
|
|
* Also strip /manuals/ prefix from paths
|
|
*/
|
|
function fixIndexReferences() {
|
|
const mdFiles = findFiles(PUBLIC_DIR, name => name.endsWith('.md'));
|
|
let count = 0;
|
|
|
|
for (const file of mdFiles) {
|
|
const dir = path.dirname(file);
|
|
const dirname = path.basename(dir);
|
|
const parentDir = path.dirname(dir);
|
|
const parentDirname = path.basename(parentDir);
|
|
|
|
let content = fs.readFileSync(file, 'utf8');
|
|
const original = content;
|
|
|
|
// Fix path/_index.md or path/index.md -> path.md
|
|
content = content.replace(/([a-zA-Z0-9_/-]+)\/_?index\.md/g, '$1.md');
|
|
|
|
// Fix bare _index.md or index.md -> ../dirname.md
|
|
content = content.replace(/_?index\.md/g, `../${dirname}.md`);
|
|
|
|
// Fix ../_index.md that became ...md -> ../../parentdirname.md
|
|
if (parentDir !== PUBLIC_DIR) {
|
|
content = content.replace(/\.\.\.md/g, `../../${parentDirname}.md`);
|
|
}
|
|
|
|
// Strip /manuals/ prefix (both /manuals/ and manuals/)
|
|
content = content.replace(/\/?manuals\//g, '/');
|
|
|
|
if (content !== original) {
|
|
fs.writeFileSync(file, content, 'utf8');
|
|
count++;
|
|
}
|
|
}
|
|
|
|
console.log(`Fixed _index.md references in ${count} files`);
|
|
return count;
|
|
}
|
|
|
|
/**
|
|
* Step 3: Resolve all relative links to absolute HTML paths
|
|
*/
|
|
function resolveLinks() {
|
|
const mdFiles = findFiles(PUBLIC_DIR, name => name.endsWith('.md'));
|
|
let count = 0;
|
|
|
|
for (const file of mdFiles) {
|
|
let content = fs.readFileSync(file, 'utf8');
|
|
const original = content;
|
|
|
|
// Process inline links: [text](path)
|
|
content = content.replace(/\[([^\]]+)\]\(([^)]+)\)/g, (match, text, link) => {
|
|
const resolved = resolveLinkPath(link, file);
|
|
return `[${text}](${resolved})`;
|
|
});
|
|
|
|
// Process reference-style links: [ref]: path
|
|
content = content.replace(/^\[([^\]]+)\]:\s+(.+)$/gm, (match, ref, link) => {
|
|
const resolved = resolveLinkPath(link, file);
|
|
return `[${ref}]: ${resolved}`;
|
|
});
|
|
|
|
if (content !== original) {
|
|
fs.writeFileSync(file, content, 'utf8');
|
|
count++;
|
|
}
|
|
}
|
|
|
|
console.log(`Resolved links in ${count} files`);
|
|
return count;
|
|
}
|
|
|
|
/**
|
|
* Resolve a link path to absolute HTML format
|
|
*/
|
|
function resolveLinkPath(linkPath, currentFile) {
|
|
// Skip external URLs and mailto
|
|
if (linkPath.startsWith('http://') || linkPath.startsWith('https://') ||
|
|
linkPath.startsWith('mailto:')) {
|
|
return linkPath;
|
|
}
|
|
|
|
// Skip same-page anchors
|
|
if (linkPath.startsWith('#')) {
|
|
return linkPath;
|
|
}
|
|
|
|
// Split path and anchor
|
|
const hashIndex = linkPath.indexOf('#');
|
|
const pathPart = hashIndex >= 0 ? linkPath.substring(0, hashIndex) : linkPath;
|
|
const anchorPart = hashIndex >= 0 ? linkPath.substring(hashIndex) : '';
|
|
|
|
if (!pathPart) {
|
|
// Just an anchor
|
|
return linkPath;
|
|
}
|
|
|
|
// Handle absolute paths - just convert to HTML format
|
|
if (pathPart.startsWith('/')) {
|
|
return toHtmlPath(pathPart) + anchorPart;
|
|
}
|
|
|
|
// Resolve relative path to absolute
|
|
const currentDir = path.dirname(currentFile);
|
|
const absolutePath = path.resolve(currentDir, pathPart);
|
|
const relativePath = path.relative(PUBLIC_DIR, absolutePath);
|
|
|
|
// Convert to URL path (forward slashes)
|
|
const urlPath = '/' + relativePath.split(path.sep).join('/');
|
|
|
|
return toHtmlPath(urlPath) + anchorPart;
|
|
}
|
|
|
|
/**
|
|
* Convert a path to HTML format (strip .md, add trailing /)
|
|
*/
|
|
function toHtmlPath(urlPath) {
|
|
if (urlPath.endsWith('.md')) {
|
|
return urlPath.slice(0, -3) + '/';
|
|
}
|
|
return urlPath;
|
|
}
|
|
|
|
// Main execution
|
|
console.log('Starting markdown flattening and link resolution...');
|
|
console.log('');
|
|
|
|
const flattenCount = flattenIndexFiles();
|
|
const fixCount = fixIndexReferences();
|
|
const resolveCount = resolveLinks();
|
|
|
|
console.log('');
|
|
console.log('Done!');
|
|
console.log(`- Flattened: ${flattenCount} files`);
|
|
console.log(`- Fixed references: ${fixCount} files`);
|
|
console.log(`- Resolved links: ${resolveCount} files`);
|