Merge pull request #197 from multica-ai/forrestchang/create-pr

feat(report): add code stats report generator
This commit is contained in:
Jiayuan Zhang 2026-02-15 13:09:52 +08:00 committed by GitHub
commit 358fcb3c0e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 847 additions and 0 deletions

View file

@ -0,0 +1,499 @@
#!/usr/bin/env bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
ROOT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
OUT_FILE="${1:-$ROOT_DIR/docs/code-stats-report.html}"
TMP_DIR="$(mktemp -d)"
cleanup() {
rm -rf "$TMP_DIR"
}
trap cleanup EXIT
cd "$ROOT_DIR"
if ! git rev-parse --is-inside-work-tree >/dev/null 2>&1; then
echo "Error: this script must run inside a git repository."
exit 1
fi
# 1) Snapshot LOC from tracked files.
while IFS= read -r -d '' file; do
if [ -f "$file" ]; then
wc -l "$file"
fi
done < <(git ls-files -z) > "$TMP_DIR/wc_all.txt"
awk -v out_by_ext="$TMP_DIR/loc_by_ext.tsv" -v out_totals="$TMP_DIR/loc_totals.tsv" '
{
lines = $1
$1 = ""
sub(/^ +/, "")
file = $0
n = split(file, parts, "/")
base = parts[n]
ext = base
if (index(base, ".") > 0) {
sub(/.*\./, "", ext)
} else {
ext = "[noext]"
}
ext_lines[ext] += lines
ext_files[ext] += 1
files += 1
lines_all += lines
}
END {
for (e in ext_lines) {
printf "%s\t%d\t%d\n", e, ext_files[e], ext_lines[e] > out_by_ext
}
source_lines = 0
source_files = 0
doc_lines = 0
doc_files = 0
cfg_lines = 0
cfg_files = 0
for (e in ext_lines) {
if (e ~ /^(ts|tsx|js|jsx|mjs|cjs|py|css|scss|html|sh)$/) {
source_lines += ext_lines[e]
source_files += ext_files[e]
}
if (e == "md") {
doc_lines += ext_lines[e]
doc_files += ext_files[e]
}
if (e ~ /^(json|json5|yaml|yml|xsd)$/) {
cfg_lines += ext_lines[e]
cfg_files += ext_files[e]
}
}
printf "files\t%d\nlines\t%d\nsource_files\t%d\nsource_lines\t%d\ndoc_files\t%d\ndoc_lines\t%d\nconfig_files\t%d\nconfig_lines\t%d\n", files, lines_all, source_files, source_lines, doc_files, doc_lines, cfg_files, cfg_lines > out_totals
}
' "$TMP_DIR/wc_all.txt"
# 2) Contribution by author (email-normalized).
git log --all --no-merges --numstat --format='@@@%aN|%aE' | awk -v out="$TMP_DIR/author_by_email.tsv" '
BEGIN { FS = "\t" }
/^@@@/ {
split(substr($0, 4), h, /\|/)
name = h[1]
email = h[2]
id = email
if (!(id in display)) {
display[id] = name " <" email ">"
}
commits[id] += 1
next
}
NF == 3 && $1 ~ /^[0-9]+$/ && $2 ~ /^[0-9]+$/ {
adds[id] += $1
dels[id] += $2
}
END {
for (k in commits) {
printf "%s\t%d\t%d\t%d\t%d\n", display[k], commits[k], adds[k] + 0, dels[k] + 0, (adds[k] - dels[k]) + 0 > out
}
}
'
sort -t $'\t' -k3,3nr "$TMP_DIR/author_by_email.tsv" > "$TMP_DIR/author_by_email.sorted.tsv"
awk -F '\t' -v out="$TMP_DIR/author_human_share.tsv" '
$1 !~ /checkpointer@noreply|dependabot\[bot\]/ {
total_commits += $2
total_adds += $3
rows[++n] = $0
}
END {
for (i = 1; i <= n; i++) {
split(rows[i], f, "\t")
add_pct = (total_adds > 0) ? (f[3] / total_adds * 100) : 0
commit_pct = (total_commits > 0) ? (f[2] / total_commits * 100) : 0
printf "%s\t%d\t%d\t%d\t%d\t%.2f%%\t%.2f%%\n", f[1], f[2], f[3], f[4], f[5], add_pct, commit_pct > out
}
}
' "$TMP_DIR/author_by_email.sorted.tsv"
# 3) Contribution by author/day/hour.
git log --all --no-merges --numstat --date=format:'%Y-%m-%d|%H' --format='@@@%aE|%ad' | awk -v out="$TMP_DIR/author_day_hour_summary.tsv" '
BEGIN { FS = "\t" }
/^@@@/ {
split(substr($0, 4), h, /\|/)
email = h[1]
day = h[2]
hour = h[3]
key = email "\t" day "\t" hour
commits[key] += 1
next
}
NF == 3 && $1 ~ /^[0-9]+$/ && $2 ~ /^[0-9]+$/ {
adds[key] += $1
dels[key] += $2
}
END {
for (k in commits) {
split(k, f, "\t")
a = adds[k] + 0
d = dels[k] + 0
printf "%s\t%s\t%s\t%d\t%d\t%d\t%d\n", f[1], f[2], f[3], commits[k], a, d, (a - d) > out
}
}
'
awk -F '\t' -v out="$TMP_DIR/day_summary_human.tsv" '
$1 !~ /checkpointer@noreply|dependabot\[bot\]/ {
day = $2
commits[day] += $4
adds[day] += $5
dels[day] += $6
if (!(day in min_hour) || $3 < min_hour[day]) {
min_hour[day] = $3
}
if (!(day in max_hour) || $3 > max_hour[day]) {
max_hour[day] = $3
}
}
END {
for (d in commits) {
printf "%s\t%d\t%d\t%d\t%d\t%s\t%s\n", d, commits[d], adds[d], dels[d], adds[d] - dels[d], min_hour[d], max_hour[d] > out
}
}
' "$TMP_DIR/author_day_hour_summary.tsv"
sort -t $'\t' -k1,1 "$TMP_DIR/day_summary_human.tsv" -o "$TMP_DIR/day_summary_human.tsv"
awk -F '\t' -v out="$TMP_DIR/hour_summary_human.tsv" '
$1 !~ /checkpointer@noreply|dependabot\[bot\]/ {
hour = $3
commits[hour] += $4
adds[hour] += $5
dels[hour] += $6
}
END {
for (i = 0; i < 24; i++) {
h = sprintf("%02d", i)
a = adds[h] + 0
d = dels[h] + 0
printf "%s\t%d\t%d\t%d\t%d\n", h, commits[h] + 0, a, d, a - d > out
}
}
' "$TMP_DIR/author_day_hour_summary.tsv"
sort -t $'\t' -k1,1 "$TMP_DIR/hour_summary_human.tsv" -o "$TMP_DIR/hour_summary_human.tsv"
awk -F '\t' -v out="$TMP_DIR/day_peak_hour_human.tsv" '
$1 !~ /checkpointer@noreply|dependabot\[bot\]/ {
key = $2 "\t" $3
commits[key] += $4
adds[key] += $5
dels[key] += $6
}
END {
for (k in adds) {
split(k, parts, "\t")
day = parts[1]
hour = parts[2]
if (!(day in max_adds) || adds[k] > max_adds[day]) {
max_adds[day] = adds[k]
best_hour[day] = hour
best_commits[day] = commits[k]
best_dels[day] = dels[k]
}
}
for (d in max_adds) {
printf "%s\t%s\t%d\t%d\t%d\n", d, best_hour[d], best_commits[d], max_adds[d], best_dels[d] > out
}
}
' "$TMP_DIR/author_day_hour_summary.tsv"
sort -t $'\t' -k1,1 "$TMP_DIR/day_peak_hour_human.tsv" -o "$TMP_DIR/day_peak_hour_human.tsv"
mkdir -p "$(dirname "$OUT_FILE")"
# 4) Render standalone HTML.
{
cat <<'HTML_HEAD'
<!doctype html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Super Multica 代码贡献统计</title>
<style>
:root {
--bg: #0b0d10;
--panel: #14181d;
--panel-2: #1a2027;
--line: #2a3440;
--text: #e8edf3;
--muted: #98a7b7;
--ok: #2fbf71;
--danger: #ef4444;
}
* { box-sizing: border-box; }
body {
margin: 0;
font-family: ui-sans-serif, -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial;
background: radial-gradient(circle at 20% -10%, #1a2430 0%, #0b0d10 45%) fixed;
color: var(--text);
line-height: 1.4;
}
.wrap { max-width: 1200px; margin: 0 auto; padding: 24px; }
h1 { margin: 0 0 8px; font-size: 28px; }
.sub { color: var(--muted); margin-bottom: 20px; }
.grid {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(190px, 1fr));
gap: 12px;
margin-bottom: 18px;
}
.card {
background: linear-gradient(180deg, var(--panel) 0%, var(--panel-2) 100%);
border: 1px solid var(--line);
border-radius: 10px;
padding: 12px;
}
.k { color: var(--muted); font-size: 12px; margin-bottom: 8px; }
.v { font-size: 24px; font-weight: 700; letter-spacing: 0.3px; }
.section { margin-top: 14px; }
.section h2 { margin: 0 0 10px; font-size: 16px; color: #d4dde7; }
.panel {
background: var(--panel);
border: 1px solid var(--line);
border-radius: 10px;
overflow: hidden;
}
table { width: 100%; border-collapse: collapse; }
th, td { padding: 9px 10px; border-bottom: 1px solid var(--line); font-size: 13px; }
th { background: #11161c; text-align: left; color: #c5d0db; position: sticky; top: 0; }
tr:last-child td { border-bottom: 0; }
.num { text-align: right; font-variant-numeric: tabular-nums; }
.mono { font-family: ui-monospace, SFMono-Regular, Menlo, Consolas, monospace; }
.bar-wrap { background: #0f1318; border-radius: 999px; height: 8px; width: 180px; border: 1px solid #273241; }
.bar { height: 100%; border-radius: 999px; background: linear-gradient(90deg, #3f7ef7, #58a6ff); }
.ok { color: var(--ok); }
.danger { color: var(--danger); }
.foot { margin-top: 16px; color: var(--muted); font-size: 12px; }
.scroll { max-height: 420px; overflow: auto; }
</style>
</head>
<body>
<div class="wrap">
<h1>Super Multica 代码贡献统计</h1>
<div class="sub" id="subtitle"></div>
<div class="grid" id="summary"></div>
<div class="section">
<h2>代码量分布(按扩展名)</h2>
<div class="panel scroll"><table id="extTable"></table></div>
</div>
<div class="section">
<h2>人员贡献(人工口径)</h2>
<div class="panel scroll"><table id="authorTable"></table></div>
</div>
<div class="section">
<h2>每日贡献(人工口径)</h2>
<div class="panel scroll"><table id="dayTable"></table></div>
</div>
<div class="section">
<h2>小时段贡献(人工口径)</h2>
<div class="panel scroll"><table id="hourTable"></table></div>
</div>
<div class="foot">数据来源git log --numstat 与当前工作树文件统计。人工口径排除 checkpointer / dependabot。</div>
</div>
<script>
const RAW = {
locTotals: String.raw`
HTML_HEAD
cat "$TMP_DIR/loc_totals.tsv"
cat <<'MID1'
`,
locByExt: String.raw`
MID1
cat "$TMP_DIR/loc_by_ext.tsv"
cat <<'MID2'
`,
authorHuman: String.raw`
MID2
cat "$TMP_DIR/author_human_share.tsv"
cat <<'MID3'
`,
dayHuman: String.raw`
MID3
cat "$TMP_DIR/day_summary_human.tsv"
cat <<'MID4'
`,
hourHuman: String.raw`
MID4
cat "$TMP_DIR/hour_summary_human.tsv"
cat <<'MID5'
`,
dayPeak: String.raw`
MID5
cat "$TMP_DIR/day_peak_hour_human.tsv"
cat <<'HTML_TAIL'
`
};
const fmt = (n) => Number(n).toLocaleString("en-US");
const tsv = (txt) => txt.trim().split(/\n+/).map((line) => line.split("\t"));
const toNum = (v) => Number(v || 0);
const locTotalsRows = tsv(RAW.locTotals);
const locTotals = Object.fromEntries(locTotalsRows.map(([k, v]) => [k, toNum(v)]));
const extRows = tsv(RAW.locByExt).map(([ext, files, lines]) => ({
ext,
files: toNum(files),
lines: toNum(lines),
})).sort((a, b) => b.lines - a.lines);
const authors = tsv(RAW.authorHuman).map(([name, commits, add, del, net, addPct, commitPct]) => ({
name,
commits: toNum(commits),
add: toNum(add),
del: toNum(del),
net: toNum(net),
addPct,
commitPct,
})).sort((a, b) => b.add - a.add);
const dayPeaks = Object.fromEntries(tsv(RAW.dayPeak).map(([d, h, c, a, del]) => [d, {
hour: h,
commits: toNum(c),
add: toNum(a),
del: toNum(del),
}]));
const days = tsv(RAW.dayHuman).map(([date, commits, add, del, net, startHour, endHour]) => ({
date,
commits: toNum(commits),
add: toNum(add),
del: toNum(del),
net: toNum(net),
startHour,
endHour,
peak: dayPeaks[date] || null,
})).sort((a, b) => a.date.localeCompare(b.date));
const hours = tsv(RAW.hourHuman).map(([hour, commits, add, del, net]) => ({
hour,
commits: toNum(commits),
add: toNum(add),
del: toNum(del),
net: toNum(net),
})).sort((a, b) => a.hour.localeCompare(b.hour));
const totalHumanCommits = authors.reduce((sum, x) => sum + x.commits, 0);
const totalHumanAdd = authors.reduce((sum, x) => sum + x.add, 0);
const totalHumanDel = authors.reduce((sum, x) => sum + x.del, 0);
const topHour = [...hours].sort((a, b) => b.add - a.add)[0] || { hour: "--", add: 0 };
const startDate = days[0]?.date || "--";
const endDate = days[days.length - 1]?.date || "--";
document.getElementById("subtitle").textContent = `${startDate} ~ ${endDate}`;
const summaryItems = [
["总文件数", fmt(locTotals.files || 0)],
["总行数", fmt(locTotals.lines || 0)],
["源码行数", fmt(locTotals.source_lines || 0)],
["贡献人数", fmt(authors.length)],
["人工提交数", fmt(totalHumanCommits)],
["人工新增", fmt(totalHumanAdd)],
["人工删除", fmt(totalHumanDel)],
["最高产小时", `${topHour.hour}:00 (${fmt(topHour.add)})`],
];
document.getElementById("summary").innerHTML = summaryItems.map(([k, v]) => (
`<div class="card"><div class="k">${k}</div><div class="v">${v}</div></div>`
)).join("");
const maxExtLines = Math.max(...extRows.map((x) => x.lines), 1);
document.getElementById("extTable").innerHTML = `
<thead><tr><th>扩展名</th><th class="num">文件数</th><th class="num">行数</th><th>占比</th><th>可视化</th></tr></thead>
<tbody>
${extRows.map((r) => {
const pct = ((r.lines / (locTotals.lines || 1)) * 100).toFixed(2);
const w = ((r.lines / maxExtLines) * 100).toFixed(1);
return `<tr>
<td class="mono">${r.ext}</td>
<td class="num">${fmt(r.files)}</td>
<td class="num">${fmt(r.lines)}</td>
<td class="num">${pct}%</td>
<td><div class="bar-wrap"><div class="bar" style="width:${w}%"></div></div></td>
</tr>`;
}).join("")}
</tbody>`;
document.getElementById("authorTable").innerHTML = `
<thead><tr><th>作者</th><th class="num">提交</th><th class="num">新增</th><th class="num">删除</th><th class="num">净新增</th><th class="num">新增占比</th><th class="num">提交占比</th></tr></thead>
<tbody>
${authors.map((a) => `<tr>
<td>${a.name}</td>
<td class="num">${fmt(a.commits)}</td>
<td class="num">${fmt(a.add)}</td>
<td class="num">${fmt(a.del)}</td>
<td class="num ${a.net >= 0 ? "ok" : "danger"}">${fmt(a.net)}</td>
<td class="num">${a.addPct}</td>
<td class="num">${a.commitPct}</td>
</tr>`).join("")}
</tbody>`;
document.getElementById("dayTable").innerHTML = `
<thead><tr><th>日期</th><th class="num">提交</th><th class="num">新增</th><th class="num">删除</th><th class="num">净新增</th><th>活跃时段</th><th>峰值小时</th></tr></thead>
<tbody>
${days.map((d) => `<tr>
<td class="mono">${d.date}</td>
<td class="num">${fmt(d.commits)}</td>
<td class="num">${fmt(d.add)}</td>
<td class="num">${fmt(d.del)}</td>
<td class="num ${d.net >= 0 ? "ok" : "danger"}">${fmt(d.net)}</td>
<td class="mono">${d.startHour}:00 - ${d.endHour}:59</td>
<td class="mono">${d.peak ? `${d.peak.hour}:00 (${fmt(d.peak.add)})` : "--"}</td>
</tr>`).join("")}
</tbody>`;
const maxHourAdd = Math.max(...hours.map((h) => h.add), 1);
document.getElementById("hourTable").innerHTML = `
<thead><tr><th>小时</th><th class="num">提交</th><th class="num">新增</th><th class="num">删除</th><th class="num">净新增</th><th>可视化</th></tr></thead>
<tbody>
${hours.map((h) => {
const w = ((h.add / maxHourAdd) * 100).toFixed(1);
return `<tr>
<td class="mono">${h.hour}:00</td>
<td class="num">${fmt(h.commits)}</td>
<td class="num">${fmt(h.add)}</td>
<td class="num">${fmt(h.del)}</td>
<td class="num ${h.net >= 0 ? "ok" : "danger"}">${fmt(h.net)}</td>
<td><div class="bar-wrap"><div class="bar" style="width:${w}%"></div></div></td>
</tr>`;
}).join("")}
</tbody>`;
</script>
</body>
</html>
HTML_TAIL
} > "$OUT_FILE"
echo "Report generated: $OUT_FILE"