#!/usr/bin/env bash set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" ROOT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)" OUT_FILE="${1:-$ROOT_DIR/docs/code-stats-report.html}" TMP_DIR="$(mktemp -d)" cleanup() { rm -rf "$TMP_DIR" } trap cleanup EXIT cd "$ROOT_DIR" if ! git rev-parse --is-inside-work-tree >/dev/null 2>&1; then echo "Error: this script must run inside a git repository." exit 1 fi # 1) Snapshot LOC from tracked files. while IFS= read -r -d '' file; do if [ -f "$file" ]; then wc -l "$file" fi done < <(git ls-files -z) > "$TMP_DIR/wc_all.txt" awk -v out_by_ext="$TMP_DIR/loc_by_ext.tsv" -v out_totals="$TMP_DIR/loc_totals.tsv" ' { lines = $1 $1 = "" sub(/^ +/, "") file = $0 n = split(file, parts, "/") base = parts[n] ext = base if (index(base, ".") > 0) { sub(/.*\./, "", ext) } else { ext = "[noext]" } ext_lines[ext] += lines ext_files[ext] += 1 files += 1 lines_all += lines } END { for (e in ext_lines) { printf "%s\t%d\t%d\n", e, ext_files[e], ext_lines[e] > out_by_ext } source_lines = 0 source_files = 0 doc_lines = 0 doc_files = 0 cfg_lines = 0 cfg_files = 0 for (e in ext_lines) { if (e ~ /^(ts|tsx|js|jsx|mjs|cjs|py|css|scss|html|sh)$/) { source_lines += ext_lines[e] source_files += ext_files[e] } if (e == "md") { doc_lines += ext_lines[e] doc_files += ext_files[e] } if (e ~ /^(json|json5|yaml|yml|xsd)$/) { cfg_lines += ext_lines[e] cfg_files += ext_files[e] } } printf "files\t%d\nlines\t%d\nsource_files\t%d\nsource_lines\t%d\ndoc_files\t%d\ndoc_lines\t%d\nconfig_files\t%d\nconfig_lines\t%d\n", files, lines_all, source_files, source_lines, doc_files, doc_lines, cfg_files, cfg_lines > out_totals } ' "$TMP_DIR/wc_all.txt" # 2) Contribution by author (email-normalized). git log --all --no-merges --numstat --format='@@@%aN|%aE' | awk -v out="$TMP_DIR/author_by_email.tsv" ' BEGIN { FS = "\t" } /^@@@/ { split(substr($0, 4), h, /\|/) name = h[1] email = h[2] id = email if (!(id in display)) { display[id] = name " <" email ">" } commits[id] += 1 next } NF == 3 && $1 ~ /^[0-9]+$/ && $2 ~ /^[0-9]+$/ { adds[id] += $1 dels[id] += $2 } END { for (k in commits) { printf "%s\t%d\t%d\t%d\t%d\n", display[k], commits[k], adds[k] + 0, dels[k] + 0, (adds[k] - dels[k]) + 0 > out } } ' sort -t $'\t' -k3,3nr "$TMP_DIR/author_by_email.tsv" > "$TMP_DIR/author_by_email.sorted.tsv" awk -F '\t' -v out="$TMP_DIR/author_human_share.tsv" ' $1 !~ /checkpointer@noreply|dependabot\[bot\]/ { total_commits += $2 total_adds += $3 rows[++n] = $0 } END { for (i = 1; i <= n; i++) { split(rows[i], f, "\t") add_pct = (total_adds > 0) ? (f[3] / total_adds * 100) : 0 commit_pct = (total_commits > 0) ? (f[2] / total_commits * 100) : 0 printf "%s\t%d\t%d\t%d\t%d\t%.2f%%\t%.2f%%\n", f[1], f[2], f[3], f[4], f[5], add_pct, commit_pct > out } } ' "$TMP_DIR/author_by_email.sorted.tsv" # 3) Contribution by author/day/hour. git log --all --no-merges --numstat --date=format:'%Y-%m-%d|%H' --format='@@@%aE|%ad' | awk -v out="$TMP_DIR/author_day_hour_summary.tsv" ' BEGIN { FS = "\t" } /^@@@/ { split(substr($0, 4), h, /\|/) email = h[1] day = h[2] hour = h[3] key = email "\t" day "\t" hour commits[key] += 1 next } NF == 3 && $1 ~ /^[0-9]+$/ && $2 ~ /^[0-9]+$/ { adds[key] += $1 dels[key] += $2 } END { for (k in commits) { split(k, f, "\t") a = adds[k] + 0 d = dels[k] + 0 printf "%s\t%s\t%s\t%d\t%d\t%d\t%d\n", f[1], f[2], f[3], commits[k], a, d, (a - d) > out } } ' awk -F '\t' -v out="$TMP_DIR/day_summary_human.tsv" ' $1 !~ /checkpointer@noreply|dependabot\[bot\]/ { day = $2 commits[day] += $4 adds[day] += $5 dels[day] += $6 if (!(day in min_hour) || $3 < min_hour[day]) { min_hour[day] = $3 } if (!(day in max_hour) || $3 > max_hour[day]) { max_hour[day] = $3 } } END { for (d in commits) { printf "%s\t%d\t%d\t%d\t%d\t%s\t%s\n", d, commits[d], adds[d], dels[d], adds[d] - dels[d], min_hour[d], max_hour[d] > out } } ' "$TMP_DIR/author_day_hour_summary.tsv" sort -t $'\t' -k1,1 "$TMP_DIR/day_summary_human.tsv" -o "$TMP_DIR/day_summary_human.tsv" awk -F '\t' -v out="$TMP_DIR/hour_summary_human.tsv" ' $1 !~ /checkpointer@noreply|dependabot\[bot\]/ { hour = $3 commits[hour] += $4 adds[hour] += $5 dels[hour] += $6 } END { for (i = 0; i < 24; i++) { h = sprintf("%02d", i) a = adds[h] + 0 d = dels[h] + 0 printf "%s\t%d\t%d\t%d\t%d\n", h, commits[h] + 0, a, d, a - d > out } } ' "$TMP_DIR/author_day_hour_summary.tsv" sort -t $'\t' -k1,1 "$TMP_DIR/hour_summary_human.tsv" -o "$TMP_DIR/hour_summary_human.tsv" awk -F '\t' -v out="$TMP_DIR/day_peak_hour_human.tsv" ' $1 !~ /checkpointer@noreply|dependabot\[bot\]/ { key = $2 "\t" $3 commits[key] += $4 adds[key] += $5 dels[key] += $6 } END { for (k in adds) { split(k, parts, "\t") day = parts[1] hour = parts[2] if (!(day in max_adds) || adds[k] > max_adds[day]) { max_adds[day] = adds[k] best_hour[day] = hour best_commits[day] = commits[k] best_dels[day] = dels[k] } } for (d in max_adds) { printf "%s\t%s\t%d\t%d\t%d\n", d, best_hour[d], best_commits[d], max_adds[d], best_dels[d] > out } } ' "$TMP_DIR/author_day_hour_summary.tsv" sort -t $'\t' -k1,1 "$TMP_DIR/day_peak_hour_human.tsv" -o "$TMP_DIR/day_peak_hour_human.tsv" mkdir -p "$(dirname "$OUT_FILE")" # 4) Render standalone HTML. { cat <<'HTML_HEAD'