package documents import ( "crypto/sha256" "encoding/hex" "fmt" "io/fs" "os" "path/filepath" "sort" "strings" ) type Document struct { RepoPath string DocPath string Kind string Title string Hash string Metadata map[string]string Sections []Section } type Section struct { Heading string Level int Ordinal int Body string } func LoadRepo(repoPath, scanPath string) ([]Document, error) { root := filepath.Join(repoPath, scanPath) info, err := os.Stat(root) if err != nil { return nil, err } if !info.IsDir() { return nil, fmt.Errorf("%s is not a directory", root) } var docs []Document err = filepath.WalkDir(root, func(path string, d fs.DirEntry, walkErr error) error { if walkErr != nil { return walkErr } if d.IsDir() { return nil } if strings.ToLower(filepath.Ext(path)) != ".md" { return nil } doc, parseErr := ParseFile(repoPath, path) if parseErr != nil { return parseErr } docs = append(docs, doc) return nil }) if err != nil { return nil, err } sort.Slice(docs, func(i, j int) bool { return docs[i].DocPath < docs[j].DocPath }) return docs, nil } func ParseFile(repoPath, path string) (Document, error) { content, err := os.ReadFile(path) if err != nil { return Document{}, err } relPath, err := filepath.Rel(repoPath, path) if err != nil { return Document{}, err } meta, body := splitFrontMatter(string(content)) sections, title := parseSections(body, fallbackTitle(path)) if len(sections) == 0 { sections = []Section{{ Heading: fallbackTitle(path), Level: 1, Ordinal: 1, Body: strings.TrimSpace(body), }} } if metaTitle := strings.TrimSpace(meta["title"]); metaTitle != "" { title = metaTitle } if title == "" { title = fallbackTitle(path) } sum := sha256.Sum256(content) return Document{ RepoPath: repoPath, DocPath: filepath.ToSlash(relPath), Kind: strings.TrimSuffix(filepath.Base(path), filepath.Ext(path)), Title: title, Hash: hex.EncodeToString(sum[:]), Metadata: meta, Sections: sections, }, nil } func splitFrontMatter(content string) (map[string]string, string) { meta := map[string]string{} lines := strings.Split(content, "\n") if len(lines) < 3 || strings.TrimSpace(lines[0]) != "---" { return meta, content } end := -1 for i := 1; i < len(lines); i++ { if strings.TrimSpace(lines[i]) == "---" { end = i break } parts := strings.SplitN(lines[i], ":", 2) if len(parts) != 2 { continue } key := strings.TrimSpace(parts[0]) val := strings.Trim(strings.TrimSpace(parts[1]), `"'`) if key != "" { meta[key] = val } } if end == -1 { return map[string]string{}, content } return meta, strings.Join(lines[end+1:], "\n") } func parseSections(body, fallback string) ([]Section, string) { lines := strings.Split(body, "\n") var sections []Section var title string currentHeading := "Overview" currentLevel := 1 currentBody := make([]string, 0, len(lines)) ordinal := 0 flush := func() { text := strings.TrimSpace(strings.Join(currentBody, "\n")) if text == "" && ordinal > 0 { currentBody = currentBody[:0] return } ordinal++ heading := currentHeading if heading == "" { heading = fallback } sections = append(sections, Section{ Heading: heading, Level: currentLevel, Ordinal: ordinal, Body: text, }) currentBody = currentBody[:0] } for _, line := range lines { trimmed := strings.TrimSpace(line) level, heading, ok := parseHeading(trimmed) if !ok { currentBody = append(currentBody, line) continue } if strings.TrimSpace(strings.Join(currentBody, "\n")) != "" || ordinal > 0 { flush() } currentHeading = heading currentLevel = level if title == "" && level == 1 { title = heading } } if len(currentBody) > 0 || ordinal == 0 { flush() } return sections, title } func parseHeading(line string) (int, string, bool) { if line == "" || !strings.HasPrefix(line, "#") { return 0, "", false } level := 0 for level < len(line) && line[level] == '#' { level++ } if level == 0 || level > 6 { return 0, "", false } if level >= len(line) || line[level] != ' ' { return 0, "", false } heading := strings.TrimSpace(line[level:]) if heading == "" { return 0, "", false } return level, heading, true } func fallbackTitle(path string) string { base := strings.TrimSuffix(filepath.Base(path), filepath.Ext(path)) base = strings.ReplaceAll(base, "-", " ") base = strings.ReplaceAll(base, "_", " ") return strings.TrimSpace(base) }