Files

224 lines
4.5 KiB
Go

package documents
import (
"crypto/sha256"
"encoding/hex"
"fmt"
"io/fs"
"os"
"path/filepath"
"sort"
"strings"
)
type Document struct {
RepoPath string
DocPath string
Kind string
Title string
Hash string
Metadata map[string]string
Sections []Section
}
type Section struct {
Heading string
Level int
Ordinal int
Body string
}
func LoadRepo(repoPath, scanPath string) ([]Document, error) {
root := filepath.Join(repoPath, scanPath)
info, err := os.Stat(root)
if err != nil {
return nil, err
}
if !info.IsDir() {
return nil, fmt.Errorf("%s is not a directory", root)
}
var docs []Document
err = filepath.WalkDir(root, func(path string, d fs.DirEntry, walkErr error) error {
if walkErr != nil {
return walkErr
}
if d.IsDir() {
return nil
}
if strings.ToLower(filepath.Ext(path)) != ".md" {
return nil
}
doc, parseErr := ParseFile(repoPath, path)
if parseErr != nil {
return parseErr
}
docs = append(docs, doc)
return nil
})
if err != nil {
return nil, err
}
sort.Slice(docs, func(i, j int) bool {
return docs[i].DocPath < docs[j].DocPath
})
return docs, nil
}
func ParseFile(repoPath, path string) (Document, error) {
content, err := os.ReadFile(path)
if err != nil {
return Document{}, err
}
relPath, err := filepath.Rel(repoPath, path)
if err != nil {
return Document{}, err
}
meta, body := splitFrontMatter(string(content))
sections, title := parseSections(body, fallbackTitle(path))
if len(sections) == 0 {
sections = []Section{{
Heading: fallbackTitle(path),
Level: 1,
Ordinal: 1,
Body: strings.TrimSpace(body),
}}
}
if metaTitle := strings.TrimSpace(meta["title"]); metaTitle != "" {
title = metaTitle
}
if title == "" {
title = fallbackTitle(path)
}
sum := sha256.Sum256(content)
return Document{
RepoPath: repoPath,
DocPath: filepath.ToSlash(relPath),
Kind: strings.TrimSuffix(filepath.Base(path), filepath.Ext(path)),
Title: title,
Hash: hex.EncodeToString(sum[:]),
Metadata: meta,
Sections: sections,
}, nil
}
func splitFrontMatter(content string) (map[string]string, string) {
meta := map[string]string{}
lines := strings.Split(content, "\n")
if len(lines) < 3 || strings.TrimSpace(lines[0]) != "---" {
return meta, content
}
end := -1
for i := 1; i < len(lines); i++ {
if strings.TrimSpace(lines[i]) == "---" {
end = i
break
}
parts := strings.SplitN(lines[i], ":", 2)
if len(parts) != 2 {
continue
}
key := strings.TrimSpace(parts[0])
val := strings.Trim(strings.TrimSpace(parts[1]), `"'`)
if key != "" {
meta[key] = val
}
}
if end == -1 {
return map[string]string{}, content
}
return meta, strings.Join(lines[end+1:], "\n")
}
func parseSections(body, fallback string) ([]Section, string) {
lines := strings.Split(body, "\n")
var sections []Section
var title string
currentHeading := "Overview"
currentLevel := 1
currentBody := make([]string, 0, len(lines))
ordinal := 0
flush := func() {
text := strings.TrimSpace(strings.Join(currentBody, "\n"))
if text == "" && ordinal > 0 {
currentBody = currentBody[:0]
return
}
ordinal++
heading := currentHeading
if heading == "" {
heading = fallback
}
sections = append(sections, Section{
Heading: heading,
Level: currentLevel,
Ordinal: ordinal,
Body: text,
})
currentBody = currentBody[:0]
}
for _, line := range lines {
trimmed := strings.TrimSpace(line)
level, heading, ok := parseHeading(trimmed)
if !ok {
currentBody = append(currentBody, line)
continue
}
if strings.TrimSpace(strings.Join(currentBody, "\n")) != "" || ordinal > 0 {
flush()
}
currentHeading = heading
currentLevel = level
if title == "" && level == 1 {
title = heading
}
}
if len(currentBody) > 0 || ordinal == 0 {
flush()
}
return sections, title
}
func parseHeading(line string) (int, string, bool) {
if line == "" || !strings.HasPrefix(line, "#") {
return 0, "", false
}
level := 0
for level < len(line) && line[level] == '#' {
level++
}
if level == 0 || level > 6 {
return 0, "", false
}
if level >= len(line) || line[level] != ' ' {
return 0, "", false
}
heading := strings.TrimSpace(line[level:])
if heading == "" {
return 0, "", false
}
return level, heading, true
}
func fallbackTitle(path string) string {
base := strings.TrimSuffix(filepath.Base(path), filepath.Ext(path))
base = strings.ReplaceAll(base, "-", " ")
base = strings.ReplaceAll(base, "_", " ")
return strings.TrimSpace(base)
}