224 lines
4.5 KiB
Go
224 lines
4.5 KiB
Go
package documents
|
|
|
|
import (
|
|
"crypto/sha256"
|
|
"encoding/hex"
|
|
"fmt"
|
|
"io/fs"
|
|
"os"
|
|
"path/filepath"
|
|
"sort"
|
|
"strings"
|
|
)
|
|
|
|
type Document struct {
|
|
RepoPath string
|
|
DocPath string
|
|
Kind string
|
|
Title string
|
|
Hash string
|
|
Metadata map[string]string
|
|
Sections []Section
|
|
}
|
|
|
|
type Section struct {
|
|
Heading string
|
|
Level int
|
|
Ordinal int
|
|
Body string
|
|
}
|
|
|
|
func LoadRepo(repoPath, scanPath string) ([]Document, error) {
|
|
root := filepath.Join(repoPath, scanPath)
|
|
info, err := os.Stat(root)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if !info.IsDir() {
|
|
return nil, fmt.Errorf("%s is not a directory", root)
|
|
}
|
|
|
|
var docs []Document
|
|
err = filepath.WalkDir(root, func(path string, d fs.DirEntry, walkErr error) error {
|
|
if walkErr != nil {
|
|
return walkErr
|
|
}
|
|
if d.IsDir() {
|
|
return nil
|
|
}
|
|
if strings.ToLower(filepath.Ext(path)) != ".md" {
|
|
return nil
|
|
}
|
|
|
|
doc, parseErr := ParseFile(repoPath, path)
|
|
if parseErr != nil {
|
|
return parseErr
|
|
}
|
|
docs = append(docs, doc)
|
|
return nil
|
|
})
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
sort.Slice(docs, func(i, j int) bool {
|
|
return docs[i].DocPath < docs[j].DocPath
|
|
})
|
|
return docs, nil
|
|
}
|
|
|
|
func ParseFile(repoPath, path string) (Document, error) {
|
|
content, err := os.ReadFile(path)
|
|
if err != nil {
|
|
return Document{}, err
|
|
}
|
|
|
|
relPath, err := filepath.Rel(repoPath, path)
|
|
if err != nil {
|
|
return Document{}, err
|
|
}
|
|
|
|
meta, body := splitFrontMatter(string(content))
|
|
sections, title := parseSections(body, fallbackTitle(path))
|
|
if len(sections) == 0 {
|
|
sections = []Section{{
|
|
Heading: fallbackTitle(path),
|
|
Level: 1,
|
|
Ordinal: 1,
|
|
Body: strings.TrimSpace(body),
|
|
}}
|
|
}
|
|
if metaTitle := strings.TrimSpace(meta["title"]); metaTitle != "" {
|
|
title = metaTitle
|
|
}
|
|
if title == "" {
|
|
title = fallbackTitle(path)
|
|
}
|
|
|
|
sum := sha256.Sum256(content)
|
|
return Document{
|
|
RepoPath: repoPath,
|
|
DocPath: filepath.ToSlash(relPath),
|
|
Kind: strings.TrimSuffix(filepath.Base(path), filepath.Ext(path)),
|
|
Title: title,
|
|
Hash: hex.EncodeToString(sum[:]),
|
|
Metadata: meta,
|
|
Sections: sections,
|
|
}, nil
|
|
}
|
|
|
|
func splitFrontMatter(content string) (map[string]string, string) {
|
|
meta := map[string]string{}
|
|
lines := strings.Split(content, "\n")
|
|
if len(lines) < 3 || strings.TrimSpace(lines[0]) != "---" {
|
|
return meta, content
|
|
}
|
|
|
|
end := -1
|
|
for i := 1; i < len(lines); i++ {
|
|
if strings.TrimSpace(lines[i]) == "---" {
|
|
end = i
|
|
break
|
|
}
|
|
parts := strings.SplitN(lines[i], ":", 2)
|
|
if len(parts) != 2 {
|
|
continue
|
|
}
|
|
key := strings.TrimSpace(parts[0])
|
|
val := strings.Trim(strings.TrimSpace(parts[1]), `"'`)
|
|
if key != "" {
|
|
meta[key] = val
|
|
}
|
|
}
|
|
if end == -1 {
|
|
return map[string]string{}, content
|
|
}
|
|
|
|
return meta, strings.Join(lines[end+1:], "\n")
|
|
}
|
|
|
|
func parseSections(body, fallback string) ([]Section, string) {
|
|
lines := strings.Split(body, "\n")
|
|
var sections []Section
|
|
var title string
|
|
|
|
currentHeading := "Overview"
|
|
currentLevel := 1
|
|
currentBody := make([]string, 0, len(lines))
|
|
ordinal := 0
|
|
|
|
flush := func() {
|
|
text := strings.TrimSpace(strings.Join(currentBody, "\n"))
|
|
if text == "" && ordinal > 0 {
|
|
currentBody = currentBody[:0]
|
|
return
|
|
}
|
|
ordinal++
|
|
heading := currentHeading
|
|
if heading == "" {
|
|
heading = fallback
|
|
}
|
|
sections = append(sections, Section{
|
|
Heading: heading,
|
|
Level: currentLevel,
|
|
Ordinal: ordinal,
|
|
Body: text,
|
|
})
|
|
currentBody = currentBody[:0]
|
|
}
|
|
|
|
for _, line := range lines {
|
|
trimmed := strings.TrimSpace(line)
|
|
level, heading, ok := parseHeading(trimmed)
|
|
if !ok {
|
|
currentBody = append(currentBody, line)
|
|
continue
|
|
}
|
|
|
|
if strings.TrimSpace(strings.Join(currentBody, "\n")) != "" || ordinal > 0 {
|
|
flush()
|
|
}
|
|
currentHeading = heading
|
|
currentLevel = level
|
|
if title == "" && level == 1 {
|
|
title = heading
|
|
}
|
|
}
|
|
|
|
if len(currentBody) > 0 || ordinal == 0 {
|
|
flush()
|
|
}
|
|
|
|
return sections, title
|
|
}
|
|
|
|
func parseHeading(line string) (int, string, bool) {
|
|
if line == "" || !strings.HasPrefix(line, "#") {
|
|
return 0, "", false
|
|
}
|
|
|
|
level := 0
|
|
for level < len(line) && line[level] == '#' {
|
|
level++
|
|
}
|
|
if level == 0 || level > 6 {
|
|
return 0, "", false
|
|
}
|
|
if level >= len(line) || line[level] != ' ' {
|
|
return 0, "", false
|
|
}
|
|
|
|
heading := strings.TrimSpace(line[level:])
|
|
if heading == "" {
|
|
return 0, "", false
|
|
}
|
|
return level, heading, true
|
|
}
|
|
|
|
func fallbackTitle(path string) string {
|
|
base := strings.TrimSuffix(filepath.Base(path), filepath.Ext(path))
|
|
base = strings.ReplaceAll(base, "-", " ")
|
|
base = strings.ReplaceAll(base, "_", " ")
|
|
return strings.TrimSpace(base)
|
|
}
|