Files
ks-zl/scripts/validate-distillation-output.ps1
2026-05-13 18:14:30 +08:00

598 lines
21 KiB
PowerShell
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
param(
[string]$Root = (Get-Location).Path,
[switch]$AllowNoArtifacts,
[int]$MaxRequirementLines = 120,
[int]$MaxSidecarLines = 80
)
$ErrorActionPreference = "Stop"
if (-not (Test-Path -LiteralPath $Root)) {
Write-Host "Validation failed: root path does not exist: $Root"
exit 1
}
$rootPath = (Resolve-Path -LiteralPath $Root).Path.TrimEnd([char[]]"\/")
$errors = New-Object System.Collections.Generic.List[string]
function Add-ValidationError {
param([string]$Message)
[void]$errors.Add($Message)
}
function Get-RelativePath {
param([string]$Path)
$fullPath = (Resolve-Path -LiteralPath $Path).Path
if ($fullPath.StartsWith($rootPath, [System.StringComparison]::OrdinalIgnoreCase)) {
return ($fullPath.Substring($rootPath.Length).TrimStart([char[]]"\/") -replace "\\", "/")
}
return ($fullPath -replace "\\", "/")
}
function Test-AllowedExampleIp {
param([string]$Ip)
$parts = $Ip.Split(".") | ForEach-Object { [int]$_ }
if ($parts[0] -eq 127) { return $true }
if ($parts[0] -eq 10 -and $parts[1] -eq 0 -and $parts[2] -eq 0) { return $true }
if ($parts[0] -eq 192 -and $parts[1] -eq 0 -and $parts[2] -eq 2) { return $true }
if ($parts[0] -eq 198 -and $parts[1] -eq 51 -and $parts[2] -eq 100) { return $true }
if ($parts[0] -eq 203 -and $parts[1] -eq 0 -and $parts[2] -eq 113) { return $true }
return $false
}
function Test-MarkdownSection {
param(
[string]$Content,
[string]$Section
)
return $Content -match "(?m)^## $([regex]::Escape($Section))[ `t`r]*$"
}
function Get-MarkdownSectionBody {
param(
[string]$Content,
[string]$Section
)
$match = [regex]::Match($Content, "(?ms)^## $([regex]::Escape($Section))[ `t]*\r?\n(?<body>.*?)(?=^## |\z)")
if ($match.Success) {
return $match.Groups["body"].Value
}
return $null
}
function Add-RequiredMarkdownSubsectionErrors {
param(
[string]$Content,
[string]$Section,
[string[]]$Subsections,
[string]$RelativePath
)
$body = Get-MarkdownSectionBody -Content $Content -Section $Section
if ($null -eq $body) {
return
}
foreach ($subsection in $Subsections) {
if ($body -notmatch "(?m)^### $([regex]::Escape($subsection))[ `t`r]*$") {
Add-ValidationError "$RelativePath $Section is missing subsection '$subsection'."
}
}
}
function Resolve-MarkdownLinkTarget {
param(
[string]$BaseDirectory,
[string]$LinkedPath
)
if ([string]::IsNullOrWhiteSpace($LinkedPath)) {
return $null
}
if ($LinkedPath -match "^[a-z][a-z0-9+.-]*:") {
return $null
}
$cleanPath = $LinkedPath.Trim()
$fragmentIndex = $cleanPath.IndexOf("#")
if ($fragmentIndex -ge 0) {
$cleanPath = $cleanPath.Substring(0, $fragmentIndex)
}
if ([string]::IsNullOrWhiteSpace($cleanPath)) {
return $null
}
return Join-Path $BaseDirectory ($cleanPath -replace "/", [System.IO.Path]::DirectorySeparatorChar)
}
function Add-MarkdownLinkErrorIfNeeded {
param(
[string]$BaseDirectory,
[string]$RelativePath,
[string]$Scope,
[string]$LinkedPath
)
$targetPath = Resolve-MarkdownLinkTarget -BaseDirectory $BaseDirectory -LinkedPath $LinkedPath
if ($null -eq $targetPath) {
return
}
if (-not (Test-Path -LiteralPath $targetPath)) {
Add-ValidationError "$RelativePath $Scope links to missing file: $LinkedPath"
}
}
function Add-MissingMarkdownLinkErrors {
param(
[string]$Content,
[string]$BaseDirectory,
[string]$RelativePath,
[string]$Scope
)
$seenLinks = New-Object System.Collections.Generic.HashSet[string]
foreach ($match in [regex]::Matches($Content, '\[[^\]\r\n]+\]\(([^)\r\n]+?\.md(?:#[^)\r\n]*)?)\)')) {
$linkedPath = $match.Groups[1].Value
[void]$seenLinks.Add($linkedPath)
Add-MarkdownLinkErrorIfNeeded -BaseDirectory $BaseDirectory -RelativePath $RelativePath -Scope $Scope -LinkedPath $linkedPath
}
foreach ($match in [regex]::Matches($Content, '(?<![\w./-])(?:\.{1,2}/)?[A-Za-z0-9_./-]+\.md(?:#[A-Za-z0-9_.-]+)?')) {
$linkedPath = $match.Value
if (-not $seenLinks.Contains($linkedPath)) {
Add-MarkdownLinkErrorIfNeeded -BaseDirectory $BaseDirectory -RelativePath $RelativePath -Scope $Scope -LinkedPath $linkedPath
}
}
}
function Add-RelatedContentErrors {
param(
[string]$Content,
[string]$RelativePath
)
$trimmedContent = $Content.Trim()
if ([string]::IsNullOrWhiteSpace($trimmedContent)) {
Add-ValidationError "$RelativePath Related section is empty."
return
}
$hasNoneYet = $Content -match "(?m)^\s*-\s+None yet\.\s*$"
$hasMarkdownReference = $Content -match '\[[^\]\r\n]+\]\([^)\r\n]+?\.md(?:#[^)\r\n]*)?\)'
$hasBareReference = $Content -match '(?<![\w./-])(?:\.{1,2}/)?[A-Za-z0-9_./-]+\.md(?:#[A-Za-z0-9_.-]+)?'
if (-not $hasNoneYet -and -not $hasMarkdownReference -and -not $hasBareReference) {
Add-ValidationError "$RelativePath Related section must contain '- None yet.' or at least one Markdown document link."
}
if ($hasNoneYet -and ($hasMarkdownReference -or $hasBareReference)) {
Add-ValidationError "$RelativePath Related section must not combine '- None yet.' with document links."
}
}
function Add-FrontMatterErrors {
param(
[string]$Content,
[string]$RelativePath
)
$requiredMetadataKeys = @("title", "category", "tags", "status", "updated", "source")
if ($Content -notmatch "(?s)\A---\r?\n(.*?)\r?\n---") {
Add-ValidationError "$RelativePath is missing YAML metadata."
return
}
$frontMatter = $Matches[1]
foreach ($key in $requiredMetadataKeys) {
if ($frontMatter -notmatch "(?m)^$([regex]::Escape($key))\s*:") {
Add-ValidationError "$RelativePath metadata is missing '$key'."
}
}
}
function Get-FrontMatterValue {
param(
[string]$Content,
[string]$Key
)
if ($Content -notmatch "(?s)\A---\r?\n(?<frontmatter>.*?)\r?\n---") {
return $null
}
$frontMatter = $Matches["frontmatter"]
$match = [regex]::Match($frontMatter, "(?m)^$([regex]::Escape($Key))\s*:\s*(?<value>.+?)\s*$")
if ($match.Success) {
return $match.Groups["value"].Value.Trim().Trim('"').Trim("'")
}
return $null
}
function Add-PlaceholderErrors {
param(
[string]$Content,
[string]$RelativePath
)
if ($Content -match "<[^>`r`n]+>") {
Add-ValidationError "$RelativePath contains an unresolved angle-bracket placeholder."
}
if ($Content -match "(?i)\bTODO\b") {
Add-ValidationError "$RelativePath contains TODO placeholder text."
}
}
function Get-NonEmptyLineCount {
param([string]$Content)
return @($Content -split "`r?`n" | Where-Object { -not [string]::IsNullOrWhiteSpace($_) }).Count
}
function Add-LineLengthErrors {
param(
[string]$Content,
[string]$RelativePath,
[int]$MaxLineChars = 180
)
$lineNumber = 0
$inFrontMatter = $false
foreach ($line in ($Content -split "`r?`n")) {
$lineNumber++
if ($lineNumber -eq 1 -and $line -eq "---") {
$inFrontMatter = $true
continue
}
if ($inFrontMatter) {
if ($line -eq "---") {
$inFrontMatter = $false
}
continue
}
if ($line.TrimStart().StartsWith("|")) {
continue
}
if ($line.Length -gt $MaxLineChars) {
Add-ValidationError "$RelativePath line $lineNumber is too long: $($line.Length) chars, max $MaxLineChars. Split it into short requirement bullets."
}
}
}
function Add-ForbiddenRequirementHeadingErrors {
param(
[string]$Content,
[string]$RelativePath
)
$forbiddenHeadings = @(
"背景",
"需求背景",
"价值",
"目标",
"技术方案",
"详细设计",
"接口设计",
"架构设计",
"实现方案"
)
foreach ($match in [regex]::Matches($Content, "(?m)^##\s+(?<heading>[^#\r\n]+?)\s*$")) {
$heading = $match.Groups["heading"].Value.Trim()
if ($forbiddenHeadings -contains $heading) {
Add-ValidationError "$RelativePath uses verbose heading '$heading'. Keep requirement.md as a short functional spec."
}
}
}
function Add-SectionLineLimitErrors {
param(
[string]$Content,
[string]$RelativePath,
[hashtable]$Limits
)
foreach ($section in $Limits.Keys) {
$body = Get-MarkdownSectionBody -Content $Content -Section $section
if ($null -eq $body) {
continue
}
$lineCount = Get-NonEmptyLineCount -Content $body
if ($lineCount -gt $Limits[$section]) {
Add-ValidationError "$RelativePath section '$section' is too long: $lineCount non-empty line(s), max $($Limits[$section]). Keep it concise or split the feature."
}
}
}
function Add-RequirementPackageErrors {
param(
[System.IO.FileInfo]$File,
[string]$Content,
[string]$RelativePath
)
Add-FrontMatterErrors -Content $Content -RelativePath $RelativePath
Add-PlaceholderErrors -Content $Content -RelativePath $RelativePath
Add-LineLengthErrors -Content $Content -RelativePath $RelativePath
Add-ForbiddenRequirementHeadingErrors -Content $Content -RelativePath $RelativePath
$nonEmptyLineCount = Get-NonEmptyLineCount -Content $Content
if ($nonEmptyLineCount -gt $MaxRequirementLines) {
Add-ValidationError "$RelativePath is too long: $nonEmptyLineCount non-empty line(s), max $MaxRequirementLines. Split the feature or remove non-essential prose."
}
$slugPattern = "[a-z0-9]+(?:-[a-z0-9]+)*"
$allowedPackageDirs = "requirement-packages|skill-requirements|feature-requirements"
if ($RelativePath -notmatch "^$slugPattern/($allowedPackageDirs)/$slugPattern/requirement\.md$") {
Add-ValidationError "$RelativePath must use '<domain>/<requirement-packages|skill-requirements|feature-requirements>/<slug>/requirement.md'."
}
$topDomain = ($RelativePath -split "/")[0]
$technicalDomains = @(
"backend",
"frontend",
"infra",
"devops",
"java",
"go",
"python",
"node",
"spring",
"gin",
"django",
"admin-service",
"admin-project"
)
if ($technicalDomains -contains $topDomain) {
Add-ValidationError "$RelativePath uses technology or source-project domain '$topDomain'. Requirement packages must be organized by feature/capability domain."
}
$pathParts = $RelativePath -split "/"
$packageKind = $pathParts[1]
$category = Get-FrontMatterValue -Content $Content -Key "category"
$expectedCategory = "$topDomain/$packageKind"
if ($null -ne $category -and $category -ne $expectedCategory) {
Add-ValidationError "$RelativePath metadata category must be '$expectedCategory' to match its package path."
}
$requiredRequirementSections = @(
"功能",
"流程",
"数据表",
"字典",
"业务规则",
"验收",
"移植说明",
"来源依据",
"待确认",
"Related"
)
foreach ($section in $requiredRequirementSections) {
if (-not (Test-MarkdownSection -Content $Content -Section $section)) {
Add-ValidationError "$RelativePath requirement package is missing section '$section'."
}
}
Add-SectionLineLimitErrors `
-Content $Content `
-RelativePath $RelativePath `
-Limits @{
"功能" = 8
"流程" = 25
"业务规则" = 30
"验收" = 15
"移植说明" = 6
"来源依据" = 15
"待确认" = 15
}
$dataTableBody = Get-MarkdownSectionBody -Content $Content -Section "数据表"
if ($null -ne $dataTableBody -and $dataTableBody -notmatch "\|" -and $dataTableBody -notmatch "不新增表|不涉及表|复用") {
Add-ValidationError "$RelativePath 数据表 must contain a field table or explicitly state no new/reused tables."
}
$dictionaryBody = Get-MarkdownSectionBody -Content $Content -Section "字典"
if ($null -ne $dictionaryBody -and $dictionaryBody -notmatch "\|" -and $dictionaryBody -notmatch "不新增字典|不涉及字典|复用") {
Add-ValidationError "$RelativePath 字典 must contain value-domain rows or explicitly state no new/reused dictionaries."
}
$portabilityBody = Get-MarkdownSectionBody -Content $Content -Section "移植说明"
if ($null -ne $portabilityBody -and ($portabilityBody -notmatch "跨技术栈|跨栈" -or $portabilityBody -notmatch "来源实现|参考")) {
Add-ValidationError "$RelativePath 移植说明 must mention both cross-stack preservation and source implementation as reference."
}
$packageDir = $File.DirectoryName
$requiredPackageEntries = @("decisions.md", "acceptance.md", "references")
foreach ($entry in $requiredPackageEntries) {
$entryPath = Join-Path $packageDir $entry
if (-not (Test-Path -LiteralPath $entryPath)) {
Add-ValidationError "$RelativePath requirement package is missing '$entry'."
}
}
$decisionsPath = Join-Path $packageDir "decisions.md"
if (Test-Path -LiteralPath $decisionsPath) {
$decisionsContent = Get-Content -Raw -Encoding UTF8 -LiteralPath $decisionsPath
$decisionsRelativePath = Get-RelativePath $decisionsPath
Add-LineLengthErrors -Content $decisionsContent -RelativePath $decisionsRelativePath
$decisionsLineCount = Get-NonEmptyLineCount -Content $decisionsContent
if ($decisionsLineCount -gt $MaxSidecarLines) {
Add-ValidationError "$decisionsRelativePath is too long: $decisionsLineCount non-empty line(s), max $MaxSidecarLines. Keep decision records concise."
}
$requiredDecisionSections = @(
"已确认",
"待确认事项",
"冲突检查",
"闭环检查",
"方案记录",
"覆盖记录"
)
foreach ($section in $requiredDecisionSections) {
if (-not (Test-MarkdownSection -Content $decisionsContent -Section $section)) {
Add-ValidationError "$decisionsRelativePath is missing section '$section'."
}
}
Add-RequiredMarkdownSubsectionErrors `
-Content $decisionsContent `
-Section "方案记录" `
-Subsections @("推荐方案", "备选方案") `
-RelativePath $decisionsRelativePath
$approvalBody = Get-MarkdownSectionBody -Content $decisionsContent -Section "覆盖记录"
if ($null -ne $approvalBody -and $approvalBody -notmatch "(?m)^\s*-\s*允许覆盖\s*[:]") {
Add-ValidationError "$decisionsRelativePath 覆盖记录 must include '允许覆盖:'."
}
Add-PlaceholderErrors -Content $decisionsContent -RelativePath $decisionsRelativePath
}
$acceptancePath = Join-Path $packageDir "acceptance.md"
if (Test-Path -LiteralPath $acceptancePath) {
$acceptanceContent = Get-Content -Raw -Encoding UTF8 -LiteralPath $acceptancePath
$acceptanceRelativePath = Get-RelativePath $acceptancePath
Add-LineLengthErrors -Content $acceptanceContent -RelativePath $acceptanceRelativePath
$acceptanceLineCount = Get-NonEmptyLineCount -Content $acceptanceContent
if ($acceptanceLineCount -gt $MaxSidecarLines) {
Add-ValidationError "$acceptanceRelativePath is too long: $acceptanceLineCount non-empty line(s), max $MaxSidecarLines. Keep acceptance sidecars concise."
}
$requiredAcceptanceSections = @(
"验收补充",
"边界补充",
"还原检查"
)
foreach ($section in $requiredAcceptanceSections) {
if (-not (Test-MarkdownSection -Content $acceptanceContent -Section $section)) {
Add-ValidationError "$acceptanceRelativePath is missing section '$section'."
}
}
Add-PlaceholderErrors -Content $acceptanceContent -RelativePath $acceptanceRelativePath
}
$relatedBody = Get-MarkdownSectionBody -Content $Content -Section "Related"
if ($null -ne $relatedBody) {
Add-RelatedContentErrors -Content $relatedBody -RelativePath $RelativePath
Add-MissingMarkdownLinkErrors -Content $relatedBody -BaseDirectory $packageDir -RelativePath $RelativePath -Scope "Related"
}
}
function Add-DistilledDocumentErrors {
param(
[System.IO.FileInfo]$File,
[string]$Content,
[string]$RelativePath
)
Add-FrontMatterErrors -Content $Content -RelativePath $RelativePath
Add-PlaceholderErrors -Content $Content -RelativePath $RelativePath
$slugPattern = "[a-z0-9]+(?:-[a-z0-9]+)*"
if ($RelativePath -notmatch "^$slugPattern/$slugPattern\.md$") {
Add-ValidationError "$RelativePath must use '<domain>/<slug>.md'."
}
$topDomain = ($RelativePath -split "/")[0]
$category = Get-FrontMatterValue -Content $Content -Key "category"
if ($null -ne $category -and $category -notmatch "^$([regex]::Escape($topDomain))(/|$)") {
Add-ValidationError "$RelativePath metadata category must start with '$topDomain' to match its document path."
}
if ($File.Name -notmatch "^[a-z0-9]+(-[a-z0-9]+)*\.md$") {
Add-ValidationError "$RelativePath uses a non-slug file name."
}
$requiredSections = @("Summary", "Keywords", "Environment", "Symptom", "Root Cause", "Solution", "Verification", "Related")
foreach ($section in $requiredSections) {
if (-not (Test-MarkdownSection -Content $Content -Section $section)) {
Add-ValidationError "$RelativePath is missing section '$section'."
}
}
$relatedBody = Get-MarkdownSectionBody -Content $Content -Section "Related"
if ($null -ne $relatedBody) {
Add-RelatedContentErrors -Content $relatedBody -RelativePath $RelativePath
Add-MissingMarkdownLinkErrors -Content $relatedBody -BaseDirectory $File.DirectoryName -RelativePath $RelativePath -Scope "Related"
}
}
$excludedTopDirs = @(".git", "templates", "scripts")
$markdownFiles = Get-ChildItem -Path $rootPath -Recurse -File -Filter "*.md" -Force |
Where-Object {
$relativePath = Get-RelativePath $_.FullName
$topDir = ($relativePath -split "/")[0]
$excludedTopDirs -notcontains $topDir
}
$distilledCount = 0
$requirementPackageCount = 0
foreach ($file in $markdownFiles) {
$relativePath = Get-RelativePath $file.FullName
$content = Get-Content -Raw -Encoding UTF8 -LiteralPath $file.FullName
$isDistilled = $content -match "(?m)^status:\s*distilled\s*$"
$isRequirementPackage = $file.Name -eq "requirement.md" -and $content -match "(?m)^status:\s*requirement-draft\s*$"
if ($isDistilled) {
$distilledCount++
Add-DistilledDocumentErrors -File $file -Content $content -RelativePath $relativePath
}
if ($isRequirementPackage) {
$requirementPackageCount++
Add-RequirementPackageErrors -File $file -Content $content -RelativePath $relativePath
}
foreach ($match in [regex]::Matches($content, "\b(?:(?:25[0-5]|2[0-4]\d|1?\d?\d)\.){3}(?:25[0-5]|2[0-4]\d|1?\d?\d)\b")) {
$ip = $match.Value
if (-not (Test-AllowedExampleIp $ip)) {
Add-ValidationError "$relativePath contains a non-example IP address: $ip"
}
}
$secretPatterns = @(
@{ Name = "private key"; Pattern = '-----BEGIN [A-Z ]*PRIVATE KEY-----' },
@{ Name = "credential assignment"; Pattern = '(?i)\b(password|passwd|pwd|token|secret|api[_-]?key)\b\s*[:=]\s*\S{6,}' }
)
foreach ($pattern in $secretPatterns) {
if ($content -match $pattern.Pattern) {
Add-ValidationError "$relativePath contains a possible $($pattern.Name)."
}
}
}
if (-not $AllowNoArtifacts -and ($distilledCount + $requirementPackageCount) -eq 0) {
Add-ValidationError "No distillation artifacts found. Expected at least one 'status: distilled' document or 'status: requirement-draft' package. Use -AllowNoArtifacts only when intentionally checking an empty skeleton."
}
if ($errors.Count -gt 0) {
Write-Host "Validation failed with $($errors.Count) issue(s):"
foreach ($validationError in $errors) {
Write-Host "- $validationError"
}
exit 1
}
Write-Host "Validation passed. Checked $($markdownFiles.Count) Markdown file(s), including $distilledCount distilled document(s) and $requirementPackageCount requirement package(s)."