fix(daemon): fix Codex token usage parsing from session logs

The parser read `payload.msg` but Codex JSONL files store token data at
`payload.info`. Also adds model tracking from `turn_context` events,
`last_token_usage` fallback, and `cache_read_input_tokens` field support.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Jiayuan 2026-03-28 00:03:47 +08:00
parent 6395a74661
commit 241acb3eac
2 changed files with 192 additions and 45 deletions

View file

@ -58,26 +58,29 @@ func codexLogRoot() string {
// codexEvent represents a line in a Codex session JSONL file.
type codexEvent struct {
Type string `json:"type"`
Timestamp string `json:"timestamp"`
Payload *struct {
Type string `json:"type"`
Msg json.RawMessage `json:"msg"`
} `json:"payload"`
Type string `json:"type"`
Payload *codexPayload `json:"payload"`
}
// codexTokenCount represents the token_count info structure.
type codexTokenCount struct {
Info *struct {
TotalTokenUsage *struct {
InputTokens int64 `json:"input_tokens"`
OutputTokens int64 `json:"output_tokens"`
CachedInputTokens int64 `json:"cached_input_tokens"`
ReasoningOutputTokens int64 `json:"reasoning_output_tokens"`
TotalTokens int64 `json:"total_tokens"`
} `json:"total_token_usage"`
Model string `json:"model"`
} `json:"info"`
type codexPayload struct {
Type string `json:"type"`
Info *codexTokenInfo `json:"info"`
Model string `json:"model"` // present in turn_context events
}
type codexTokenInfo struct {
TotalTokenUsage *codexTokenUsage `json:"total_token_usage"`
LastTokenUsage *codexTokenUsage `json:"last_token_usage"`
Model string `json:"model"`
}
type codexTokenUsage struct {
InputTokens int64 `json:"input_tokens"`
OutputTokens int64 `json:"output_tokens"`
CachedInputTokens int64 `json:"cached_input_tokens"`
CacheReadInputTokens int64 `json:"cache_read_input_tokens"`
ReasoningOutputTokens int64 `json:"reasoning_output_tokens"`
TotalTokens int64 `json:"total_tokens"`
}
// parseCodexFile extracts the final cumulative token_count from a Codex session file.
@ -95,7 +98,7 @@ func (s *Scanner) parseCodexFile(path string) *Record {
return nil
}
var lastUsage *codexTokenCount
var lastUsage *codexTokenUsage
var lastModel string
scanner := bufio.NewScanner(f)
@ -104,40 +107,40 @@ func (s *Scanner) parseCodexFile(path string) *Record {
for scanner.Scan() {
line := scanner.Bytes()
// Fast pre-filter
if !bytesContains(line, `"token_count"`) {
// Fast pre-filter: only parse lines with token_count or turn_context
hasTokenCount := bytesContains(line, `"token_count"`)
hasTurnContext := bytesContains(line, `"turn_context"`)
if !hasTokenCount && !hasTurnContext {
continue
}
// Try direct event format: {"type": "event_msg", "payload": {"type": "token_count", ...}}
var evt codexEvent
if err := json.Unmarshal(line, &evt); err != nil {
if err := json.Unmarshal(line, &evt); err != nil || evt.Payload == nil {
continue
}
// Check if payload contains token_count
if evt.Payload != nil && evt.Payload.Type == "token_count" {
var tc codexTokenCount
if err := json.Unmarshal(evt.Payload.Msg, &tc); err == nil && tc.Info != nil && tc.Info.TotalTokenUsage != nil {
lastUsage = &tc
if tc.Info.Model != "" {
lastModel = tc.Info.Model
}
continue
}
// Track model from turn_context events
if evt.Type == "turn_context" && evt.Payload.Model != "" {
lastModel = evt.Payload.Model
continue
}
// Also try flat format where msg is at top level
var tc codexTokenCount
if err := json.Unmarshal(line, &tc); err == nil && tc.Info != nil && tc.Info.TotalTokenUsage != nil {
lastUsage = &tc
if tc.Info.Model != "" {
lastModel = tc.Info.Model
// Extract token usage from token_count events
if evt.Payload.Type == "token_count" && evt.Payload.Info != nil {
usage := evt.Payload.Info.TotalTokenUsage
if usage == nil {
usage = evt.Payload.Info.LastTokenUsage
}
if usage != nil {
lastUsage = usage
if evt.Payload.Info.Model != "" {
lastModel = evt.Payload.Info.Model
}
}
}
}
if lastUsage == nil || lastUsage.Info == nil || lastUsage.Info.TotalTokenUsage == nil {
if lastUsage == nil {
return nil
}
@ -146,15 +149,19 @@ func (s *Scanner) parseCodexFile(path string) *Record {
model = "unknown"
}
usage := lastUsage.Info.TotalTokenUsage
cachedTokens := lastUsage.CachedInputTokens
if cachedTokens == 0 {
cachedTokens = lastUsage.CacheReadInputTokens
}
return &Record{
Date: date,
Provider: "codex",
Model: model,
InputTokens: usage.InputTokens,
OutputTokens: usage.OutputTokens + usage.ReasoningOutputTokens,
CacheReadTokens: usage.CachedInputTokens,
CacheWriteTokens: 0, // Codex doesn't have cache write tokens
InputTokens: lastUsage.InputTokens,
OutputTokens: lastUsage.OutputTokens + lastUsage.ReasoningOutputTokens,
CacheReadTokens: cachedTokens,
CacheWriteTokens: 0,
}
}

View file

@ -0,0 +1,140 @@
package usage
import (
"log/slog"
"os"
"path/filepath"
"testing"
)
func TestParseCodexFile(t *testing.T) {
// Create a temp directory structure: sessions/YYYY/MM/DD/file.jsonl
tmp := t.TempDir()
sessionsDir := filepath.Join(tmp, "sessions", "2026", "01", "14")
if err := os.MkdirAll(sessionsDir, 0o755); err != nil {
t.Fatal(err)
}
// Real Codex JSONL format with turn_context and token_count events
content := `{"timestamp":"2026-01-13T17:41:31.666Z","type":"turn_context","payload":{"cwd":"/tmp","model":"gpt-5.2-codex","effort":"high"}}
{"timestamp":"2026-01-13T17:41:32.916Z","type":"event_msg","payload":{"type":"token_count","info":null,"rate_limits":{"primary":{"used_percent":24.0}}}}
{"timestamp":"2026-01-13T17:44:06.217Z","type":"event_msg","payload":{"type":"token_count","info":{"total_token_usage":{"input_tokens":328894,"cached_input_tokens":287872,"output_tokens":3071,"reasoning_output_tokens":960,"total_tokens":331965},"last_token_usage":{"input_tokens":24525,"cached_input_tokens":3200,"output_tokens":1815,"reasoning_output_tokens":960,"total_tokens":26340},"model_context_window":258400},"rate_limits":{"primary":{"used_percent":26.0}}}}
`
filePath := filepath.Join(sessionsDir, "rollout-test.jsonl")
if err := os.WriteFile(filePath, []byte(content), 0o644); err != nil {
t.Fatal(err)
}
s := NewScanner(slog.Default())
record := s.parseCodexFile(filePath)
if record == nil {
t.Fatal("expected non-nil record")
}
if record.Date != "2026-01-14" {
t.Errorf("date = %q, want %q", record.Date, "2026-01-14")
}
if record.Provider != "codex" {
t.Errorf("provider = %q, want %q", record.Provider, "codex")
}
if record.Model != "gpt-5.2-codex" {
t.Errorf("model = %q, want %q", record.Model, "gpt-5.2-codex")
}
if record.InputTokens != 328894 {
t.Errorf("input_tokens = %d, want %d", record.InputTokens, 328894)
}
// output_tokens + reasoning_output_tokens
if record.OutputTokens != 3071+960 {
t.Errorf("output_tokens = %d, want %d", record.OutputTokens, 3071+960)
}
if record.CacheReadTokens != 287872 {
t.Errorf("cache_read_tokens = %d, want %d", record.CacheReadTokens, 287872)
}
}
func TestParseCodexFile_NullInfo(t *testing.T) {
// When all token_count events have info:null, should return nil
tmp := t.TempDir()
sessionsDir := filepath.Join(tmp, "sessions", "2026", "01", "14")
if err := os.MkdirAll(sessionsDir, 0o755); err != nil {
t.Fatal(err)
}
content := `{"timestamp":"2026-01-13T17:41:32.916Z","type":"event_msg","payload":{"type":"token_count","info":null}}
`
filePath := filepath.Join(sessionsDir, "rollout-test.jsonl")
if err := os.WriteFile(filePath, []byte(content), 0o644); err != nil {
t.Fatal(err)
}
s := NewScanner(slog.Default())
record := s.parseCodexFile(filePath)
if record != nil {
t.Errorf("expected nil record for null info, got %+v", record)
}
}
func TestParseCodexFile_LastTokenUsageFallback(t *testing.T) {
// When total_token_usage is absent but last_token_usage exists
tmp := t.TempDir()
sessionsDir := filepath.Join(tmp, "sessions", "2026", "03", "27")
if err := os.MkdirAll(sessionsDir, 0o755); err != nil {
t.Fatal(err)
}
content := `{"timestamp":"2026-03-27T10:00:00Z","type":"turn_context","payload":{"model":"gpt-5"}}
{"timestamp":"2026-03-27T10:01:00Z","type":"event_msg","payload":{"type":"token_count","info":{"last_token_usage":{"input_tokens":1000,"cached_input_tokens":200,"output_tokens":500}}}}
`
filePath := filepath.Join(sessionsDir, "rollout-test.jsonl")
if err := os.WriteFile(filePath, []byte(content), 0o644); err != nil {
t.Fatal(err)
}
s := NewScanner(slog.Default())
record := s.parseCodexFile(filePath)
if record == nil {
t.Fatal("expected non-nil record")
}
if record.InputTokens != 1000 {
t.Errorf("input_tokens = %d, want %d", record.InputTokens, 1000)
}
if record.OutputTokens != 500 {
t.Errorf("output_tokens = %d, want %d", record.OutputTokens, 500)
}
if record.CacheReadTokens != 200 {
t.Errorf("cache_read_tokens = %d, want %d", record.CacheReadTokens, 200)
}
}
func TestParseCodexFile_CacheReadInputTokens(t *testing.T) {
// Test the alternative field name cache_read_input_tokens
tmp := t.TempDir()
sessionsDir := filepath.Join(tmp, "sessions", "2026", "03", "27")
if err := os.MkdirAll(sessionsDir, 0o755); err != nil {
t.Fatal(err)
}
content := `{"timestamp":"2026-03-27T10:00:00Z","type":"event_msg","payload":{"type":"token_count","info":{"total_token_usage":{"input_tokens":5000,"cache_read_input_tokens":3000,"output_tokens":800},"model":"gpt-5.2-codex"}}}
`
filePath := filepath.Join(sessionsDir, "rollout-test.jsonl")
if err := os.WriteFile(filePath, []byte(content), 0o644); err != nil {
t.Fatal(err)
}
s := NewScanner(slog.Default())
record := s.parseCodexFile(filePath)
if record == nil {
t.Fatal("expected non-nil record")
}
if record.CacheReadTokens != 3000 {
t.Errorf("cache_read_tokens = %d, want %d", record.CacheReadTokens, 3000)
}
if record.Model != "gpt-5.2-codex" {
t.Errorf("model = %q, want %q", record.Model, "gpt-5.2-codex")
}
}