Files
calminer/scripts/format_docs_md.py
zwitschi 4b3a15ed15 Add comprehensive architecture documentation and related scripts
- Introduced multiple architecture documentation files covering building block view, runtime view, deployment view, concepts, architecture decisions, quality requirements, technical risks, glossary, UI and styling, testing, CI, and development setup.
- Migrated existing content from `architecture_overview.md` and `implementation_plan.md` into structured documentation.
- Created scripts for checking broken links in documentation and formatting Markdown files for consistency.
- Updated quickstart guide to provide clearer setup instructions and usage overview.
- Removed outdated MVP features and testing strategy documents to streamline documentation.
2025-10-21 15:39:17 +02:00

80 lines
2.5 KiB
Python

"""Lightweight Markdown formatter: normalizes first-line H1, adds code-fence language hints for common shebangs, trims trailing whitespace.
This is intentionally small and non-destructive; it touches only files under docs/ and makes safe changes.
"""
import re
from pathlib import Path
DOCS = Path(__file__).resolve().parents[1] / "docs"
CODE_LANG_HINTS = {
'powershell': ('powershell',),
'bash': ('bash', 'sh'),
'sql': ('sql',),
'python': ('python',),
}
def add_code_fence_language(match):
fence = match.group(0)
inner = match.group(1)
# If language already present, return unchanged
if fence.startswith('```') and len(fence.splitlines()[0].strip()) > 3:
return fence
# Try to infer language from the code content
code = inner.strip().splitlines()[0] if inner.strip() else ''
lang = ''
if code.startswith('$') or code.startswith('PS') or code.lower().startswith('powershell'):
lang = 'powershell'
elif code.startswith('#') or code.startswith('import') or code.startswith('from'):
lang = 'python'
elif re.match(r'^(select|insert|update|create)\b', code.strip(), re.I):
lang = 'sql'
elif code.startswith('git') or code.startswith('./') or code.startswith('sudo'):
lang = 'bash'
if lang:
return f'```{lang}\n{inner}\n```'
return fence
def normalize_file(path: Path):
text = path.read_text(encoding='utf-8')
orig = text
# Trim trailing whitespace and ensure single trailing newline
text = '\n'.join(line.rstrip() for line in text.splitlines()) + '\n'
# Ensure first non-empty line is H1
lines = text.splitlines()
for i, ln in enumerate(lines):
if ln.strip():
if not ln.startswith('#'):
lines[i] = '# ' + ln
break
text = '\n'.join(lines) + '\n'
# Add basic code fence languages where missing (simple heuristic)
text = re.sub(r'```\n([\s\S]*?)\n```', add_code_fence_language, text)
if text != orig:
path.write_text(text, encoding='utf-8')
return True
return False
def main():
changed = []
for p in DOCS.rglob('*.md'):
if p.is_file():
try:
if normalize_file(p):
changed.append(str(p.relative_to(Path.cwd())))
except Exception as e:
print(f"Failed to format {p}: {e}")
if changed:
print('Formatted files:')
for c in changed:
print(' -', c)
else:
print('No formatting changes required.')
if __name__ == '__main__':
main()