diff --git a/tests/Dockerfile b/tests/Dockerfile new file mode 100644 index 0000000000..7e2b18ec0e --- /dev/null +++ b/tests/Dockerfile @@ -0,0 +1,7 @@ +FROM golang:1.7.3-alpine + +COPY src /go/src +WORKDIR /go/src/validator + +# when running the container, MOUNT docs repo in /docs +CMD ["go", "test", "-v", "-run", "FrontMatter"] diff --git a/tests/src/validator/frontmatter_test.go b/tests/src/validator/frontmatter_test.go new file mode 100644 index 0000000000..4360c5446e --- /dev/null +++ b/tests/src/validator/frontmatter_test.go @@ -0,0 +1,88 @@ +package main + +import ( + "errors" + "github.com/gdevillele/frontparser" + "os" + "path/filepath" + "testing" +) + +// TestFrontmatterTitle tests if there's a title present in all +// published markdown frontmatters. +func TestFrontMatterTitle(t *testing.T) { + filepath.Walk("/docs", func(path string, info os.FileInfo, err error) error { + if err != nil { + t.Error(err.Error(), "-", path) + } + published, mdBytes, err := isPublishedMarkdown(path) + if err != nil { + t.Error(err.Error(), "-", path) + } + if published == false { + return nil + } + err = testFrontMatterTitle(mdBytes) + if err != nil { + t.Error(err.Error(), "-", path) + } + return nil + }) +} + +// testFrontmatterTitle tests if there's a title present in +// given markdown file bytes +func testFrontMatterTitle(mdBytes []byte) error { + fm, _, err := frontparser.ParseFrontmatterAndContent(mdBytes) + if err != nil { + return err + } + if _, exists := fm["title"]; exists == false { + return errors.New("can't find title in frontmatter") + } + return nil +} + +// TestFrontMatterKeywords tests if keywords are present and correctly +// formatted in all published markdown frontmatters. +func TestFrontMatterKeywords(t *testing.T) { + filepath.Walk("/docs", func(path string, info os.FileInfo, err error) error { + if err != nil { + t.Error(err.Error(), "-", path) + } + published, mdBytes, err := isPublishedMarkdown(path) + if err != nil { + t.Error(err.Error(), "-", path) + } + if published == false { + return nil + } + err = testFrontMatterKeywords(mdBytes) + if err != nil { + t.Error(err.Error(), "-", path) + } + return nil + }) +} + +// testFrontMatterKeywords tests if if keywords are present and correctly +// formatted in given markdown file bytes +func testFrontMatterKeywords(mdBytes []byte) error { + fm, _, err := frontparser.ParseFrontmatterAndContent(mdBytes) + if err != nil { + return err + } + + keywords, exists := fm["keywords"] + + // it's ok to have a page without keywords + if exists == false { + return nil + } + + if _, ok := keywords.(string); !ok { + return errors.New("keywords should be a comma separated string") + } + + return nil +} diff --git a/tests/src/validator/html_test.go b/tests/src/validator/html_test.go new file mode 100644 index 0000000000..316c4401ec --- /dev/null +++ b/tests/src/validator/html_test.go @@ -0,0 +1,204 @@ +package main + +import ( + "bytes" + "errors" + "fmt" + "golang.org/x/net/html" + "net/url" + "os" + "path/filepath" + "regexp" + "strings" + "testing" +) + +var countLinks = 0 +var countImages = 0 +var htmlContentRootPath = "/usr/src/app/allvbuild" + +// TestURLs tests if we're not using absolute paths for URLs +// when pointing to local pages. +func TestURLs(t *testing.T) { + count := 0 + + filepath.Walk(htmlContentRootPath, func(path string, info os.FileInfo, err error) error { + + relPath := strings.TrimPrefix(path, htmlContentRootPath) + + isArchive, err := regexp.MatchString(`^/v[0-9]+\.[0-9]+/.*`, relPath) + if err != nil { + t.Error(err.Error(), "-", relPath) + } + // skip archives for now, only test URLs in current version + // TODO: test archives + if isArchive { + return nil + } + + if err != nil { + t.Error(err.Error(), "-", relPath) + } + b, htmlBytes, err := isHTML(path) + if err != nil { + t.Error(err.Error(), "-", relPath) + } + // don't check non-html files + if b == false { + return nil + } + + count++ + + err = testURLs(htmlBytes, path) + if err != nil { + t.Error(relPath + err.Error()) + } + return nil + }) + + fmt.Println("found", count, "html files (excluding archives)") + fmt.Println("found", countLinks, "links (excluding archives)") + fmt.Println("found", countImages, "images (excluding archives)") +} + +// testURLs tests if we're not using absolute paths for URLs +// when pointing to local pages. +func testURLs(htmlBytes []byte, htmlPath string) error { + + reader := bytes.NewReader(htmlBytes) + + z := html.NewTokenizer(reader) + + urlErrors := "" + // fmt.Println("urlErrors:", urlErrors) + done := false + + for !done { + tt := z.Next() + + switch tt { + case html.ErrorToken: + // End of the document, we're done + done = true + case html.StartTagToken: + t := z.Token() + + urlStr := "" + + // check tag types + switch t.Data { + case "a": + countLinks++ + ok, href := getHref(t) + // skip, it may just be an anchor + if !ok { + break + } + urlStr = href + + case "img": + countImages++ + ok, src := getSrc(t) + if !ok { + urlErrors += "\nimg with no src: " + t.String() + break + } + urlStr = src + } + + // there's an url to test! + if urlStr != "" { + u, err := url.Parse(urlStr) + if err != nil { + urlErrors += "\ncan't parse url: " + t.String() + break + // return errors.New("can't parse url: " + t.String()) + } + // test with github.com + if u.Scheme != "" && u.Host == "docs.docker.com" { + urlErrors += "\nabsolute: " + t.String() + break + } + + // relative link + if u.Scheme == "" { + + resourcePath := "" + resourcePathIsAbs := false + + if filepath.IsAbs(u.Path) { + resourcePath = filepath.Join(htmlContentRootPath, mdToHtmlPath(u.Path)) + resourcePathIsAbs = true + } else { + resourcePath = filepath.Join(filepath.Dir(htmlPath), mdToHtmlPath(u.Path)) + } + + if _, err := os.Stat(resourcePath); os.IsNotExist(err) { + + fail := true + + // index.html could mean there's a corresponding index.md meaning built the correct path + // but Jekyll actually creates index.html files for all md files. + // foo.md -> foo/index.html + // it does this to prettify urls, content of foo.md would then be rendered here: + // http://domain.com/foo/ (instead of http://domain.com/foo.html) + // so if there's an error, let's see if index.md exists, otherwise retry from parent folder + // (only if the resource path is not absolute) + if !resourcePathIsAbs && filepath.Base(htmlPath) == "index.html" { + // retry from parent folder + resourcePath = filepath.Join(filepath.Dir(htmlPath), "..", mdToHtmlPath(u.Path)) + if _, err := os.Stat(resourcePath); err == nil { + fail = false + } + } + + if fail { + urlErrors += "\nbroken: " + t.String() + break + } + } + } + } + } + } + + // fmt.Println("urlErrors:", urlErrors) + if urlErrors != "" { + return errors.New(urlErrors) + } + return nil +} + +func mdToHtmlPath(mdPath string) string { + if strings.HasSuffix(mdPath, ".md") == false { + // file is not a markdown, don't change anything + return mdPath + } + if strings.HasSuffix(mdPath, "index.md") { + return strings.TrimSuffix(mdPath, "md") + "html" + } + return strings.TrimSuffix(mdPath, ".md") + "/index.html" +} + +// helpers + +func getHref(t html.Token) (ok bool, href string) { + for _, a := range t.Attr { + if a.Key == "href" { + href = a.Val + ok = true + } + } + return +} + +func getSrc(t html.Token) (ok bool, src string) { + for _, a := range t.Attr { + if a.Key == "src" { + src = a.Val + ok = true + } + } + return +} diff --git a/tests/src/validator/main_test.go b/tests/src/validator/main_test.go new file mode 100644 index 0000000000..883673e2a7 --- /dev/null +++ b/tests/src/validator/main_test.go @@ -0,0 +1,12 @@ +package main + +import ( + "os" + "testing" +) + +// TestMain is used to add extra setup or +// teardown before or after testing +func TestMain(m *testing.M) { + os.Exit(m.Run()) +} diff --git a/tests/src/validator/utils.go b/tests/src/validator/utils.go new file mode 100644 index 0000000000..94ca47c10e --- /dev/null +++ b/tests/src/validator/utils.go @@ -0,0 +1,57 @@ +package main + +import ( + "github.com/gdevillele/frontparser" + "io/ioutil" + "os" + "strings" +) + +// isPublishedMarkdown returns wether a file is a published markdown or not +// as a convenience it also returns the markdown bytes to avoid reading files twice +func isPublishedMarkdown(path string) (bool, []byte, error) { + if strings.HasSuffix(path, ".md") { + fileBytes, err := ioutil.ReadFile(path) + if err != nil { + return false, nil, err + } + if frontparser.HasFrontmatterHeader(fileBytes) { + fm, _, err := frontparser.ParseFrontmatterAndContent(fileBytes) + if err != nil { + return false, nil, err + } + // skip markdowns that are not published + if published, exists := fm["published"]; exists { + if publishedBool, ok := published.(bool); ok { + if publishedBool { + // file is markdown, has frontmatter and is published + return true, fileBytes, nil + } + } + } else { + // if "published" field is missing, it means published == true + return true, fileBytes, nil + } + } + } + return false, nil, nil +} + +// isHTML returns wether a file is an html file or not +// as a convenience it also returns the markdown bytes to avoid reading files twice +func isHTML(path string) (bool, []byte, error) { + if strings.HasSuffix(path, ".html") { + fileBytes, err := ioutil.ReadFile(path) + if err != nil { + return false, nil, err + } + return true, fileBytes, nil + } + return false, nil, nil +} + +// fileExists returns true if the given file exists +func fileExists(name string) bool { + _, err := os.Stat(name) + return err == nil +}