fix(daemon): fix Codex token usage parsing from session logs
The parser read `payload.msg` but Codex JSONL files store token data at `payload.info`. Also adds model tracking from `turn_context` events, `last_token_usage` fallback, and `cache_read_input_tokens` field support. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
6395a74661
commit
241acb3eac
2 changed files with 192 additions and 45 deletions
|
|
@ -58,26 +58,29 @@ func codexLogRoot() string {
|
|||
|
||||
// codexEvent represents a line in a Codex session JSONL file.
|
||||
type codexEvent struct {
|
||||
Type string `json:"type"`
|
||||
Timestamp string `json:"timestamp"`
|
||||
Payload *struct {
|
||||
Type string `json:"type"`
|
||||
Msg json.RawMessage `json:"msg"`
|
||||
} `json:"payload"`
|
||||
Type string `json:"type"`
|
||||
Payload *codexPayload `json:"payload"`
|
||||
}
|
||||
|
||||
// codexTokenCount represents the token_count info structure.
|
||||
type codexTokenCount struct {
|
||||
Info *struct {
|
||||
TotalTokenUsage *struct {
|
||||
InputTokens int64 `json:"input_tokens"`
|
||||
OutputTokens int64 `json:"output_tokens"`
|
||||
CachedInputTokens int64 `json:"cached_input_tokens"`
|
||||
ReasoningOutputTokens int64 `json:"reasoning_output_tokens"`
|
||||
TotalTokens int64 `json:"total_tokens"`
|
||||
} `json:"total_token_usage"`
|
||||
Model string `json:"model"`
|
||||
} `json:"info"`
|
||||
type codexPayload struct {
|
||||
Type string `json:"type"`
|
||||
Info *codexTokenInfo `json:"info"`
|
||||
Model string `json:"model"` // present in turn_context events
|
||||
}
|
||||
|
||||
type codexTokenInfo struct {
|
||||
TotalTokenUsage *codexTokenUsage `json:"total_token_usage"`
|
||||
LastTokenUsage *codexTokenUsage `json:"last_token_usage"`
|
||||
Model string `json:"model"`
|
||||
}
|
||||
|
||||
type codexTokenUsage struct {
|
||||
InputTokens int64 `json:"input_tokens"`
|
||||
OutputTokens int64 `json:"output_tokens"`
|
||||
CachedInputTokens int64 `json:"cached_input_tokens"`
|
||||
CacheReadInputTokens int64 `json:"cache_read_input_tokens"`
|
||||
ReasoningOutputTokens int64 `json:"reasoning_output_tokens"`
|
||||
TotalTokens int64 `json:"total_tokens"`
|
||||
}
|
||||
|
||||
// parseCodexFile extracts the final cumulative token_count from a Codex session file.
|
||||
|
|
@ -95,7 +98,7 @@ func (s *Scanner) parseCodexFile(path string) *Record {
|
|||
return nil
|
||||
}
|
||||
|
||||
var lastUsage *codexTokenCount
|
||||
var lastUsage *codexTokenUsage
|
||||
var lastModel string
|
||||
|
||||
scanner := bufio.NewScanner(f)
|
||||
|
|
@ -104,40 +107,40 @@ func (s *Scanner) parseCodexFile(path string) *Record {
|
|||
for scanner.Scan() {
|
||||
line := scanner.Bytes()
|
||||
|
||||
// Fast pre-filter
|
||||
if !bytesContains(line, `"token_count"`) {
|
||||
// Fast pre-filter: only parse lines with token_count or turn_context
|
||||
hasTokenCount := bytesContains(line, `"token_count"`)
|
||||
hasTurnContext := bytesContains(line, `"turn_context"`)
|
||||
if !hasTokenCount && !hasTurnContext {
|
||||
continue
|
||||
}
|
||||
|
||||
// Try direct event format: {"type": "event_msg", "payload": {"type": "token_count", ...}}
|
||||
var evt codexEvent
|
||||
if err := json.Unmarshal(line, &evt); err != nil {
|
||||
if err := json.Unmarshal(line, &evt); err != nil || evt.Payload == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
// Check if payload contains token_count
|
||||
if evt.Payload != nil && evt.Payload.Type == "token_count" {
|
||||
var tc codexTokenCount
|
||||
if err := json.Unmarshal(evt.Payload.Msg, &tc); err == nil && tc.Info != nil && tc.Info.TotalTokenUsage != nil {
|
||||
lastUsage = &tc
|
||||
if tc.Info.Model != "" {
|
||||
lastModel = tc.Info.Model
|
||||
}
|
||||
continue
|
||||
}
|
||||
// Track model from turn_context events
|
||||
if evt.Type == "turn_context" && evt.Payload.Model != "" {
|
||||
lastModel = evt.Payload.Model
|
||||
continue
|
||||
}
|
||||
|
||||
// Also try flat format where msg is at top level
|
||||
var tc codexTokenCount
|
||||
if err := json.Unmarshal(line, &tc); err == nil && tc.Info != nil && tc.Info.TotalTokenUsage != nil {
|
||||
lastUsage = &tc
|
||||
if tc.Info.Model != "" {
|
||||
lastModel = tc.Info.Model
|
||||
// Extract token usage from token_count events
|
||||
if evt.Payload.Type == "token_count" && evt.Payload.Info != nil {
|
||||
usage := evt.Payload.Info.TotalTokenUsage
|
||||
if usage == nil {
|
||||
usage = evt.Payload.Info.LastTokenUsage
|
||||
}
|
||||
if usage != nil {
|
||||
lastUsage = usage
|
||||
if evt.Payload.Info.Model != "" {
|
||||
lastModel = evt.Payload.Info.Model
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if lastUsage == nil || lastUsage.Info == nil || lastUsage.Info.TotalTokenUsage == nil {
|
||||
if lastUsage == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
|
|
@ -146,15 +149,19 @@ func (s *Scanner) parseCodexFile(path string) *Record {
|
|||
model = "unknown"
|
||||
}
|
||||
|
||||
usage := lastUsage.Info.TotalTokenUsage
|
||||
cachedTokens := lastUsage.CachedInputTokens
|
||||
if cachedTokens == 0 {
|
||||
cachedTokens = lastUsage.CacheReadInputTokens
|
||||
}
|
||||
|
||||
return &Record{
|
||||
Date: date,
|
||||
Provider: "codex",
|
||||
Model: model,
|
||||
InputTokens: usage.InputTokens,
|
||||
OutputTokens: usage.OutputTokens + usage.ReasoningOutputTokens,
|
||||
CacheReadTokens: usage.CachedInputTokens,
|
||||
CacheWriteTokens: 0, // Codex doesn't have cache write tokens
|
||||
InputTokens: lastUsage.InputTokens,
|
||||
OutputTokens: lastUsage.OutputTokens + lastUsage.ReasoningOutputTokens,
|
||||
CacheReadTokens: cachedTokens,
|
||||
CacheWriteTokens: 0,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
140
server/internal/daemon/usage/codex_test.go
Normal file
140
server/internal/daemon/usage/codex_test.go
Normal file
|
|
@ -0,0 +1,140 @@
|
|||
package usage
|
||||
|
||||
import (
|
||||
"log/slog"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestParseCodexFile(t *testing.T) {
|
||||
// Create a temp directory structure: sessions/YYYY/MM/DD/file.jsonl
|
||||
tmp := t.TempDir()
|
||||
sessionsDir := filepath.Join(tmp, "sessions", "2026", "01", "14")
|
||||
if err := os.MkdirAll(sessionsDir, 0o755); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// Real Codex JSONL format with turn_context and token_count events
|
||||
content := `{"timestamp":"2026-01-13T17:41:31.666Z","type":"turn_context","payload":{"cwd":"/tmp","model":"gpt-5.2-codex","effort":"high"}}
|
||||
{"timestamp":"2026-01-13T17:41:32.916Z","type":"event_msg","payload":{"type":"token_count","info":null,"rate_limits":{"primary":{"used_percent":24.0}}}}
|
||||
{"timestamp":"2026-01-13T17:44:06.217Z","type":"event_msg","payload":{"type":"token_count","info":{"total_token_usage":{"input_tokens":328894,"cached_input_tokens":287872,"output_tokens":3071,"reasoning_output_tokens":960,"total_tokens":331965},"last_token_usage":{"input_tokens":24525,"cached_input_tokens":3200,"output_tokens":1815,"reasoning_output_tokens":960,"total_tokens":26340},"model_context_window":258400},"rate_limits":{"primary":{"used_percent":26.0}}}}
|
||||
`
|
||||
|
||||
filePath := filepath.Join(sessionsDir, "rollout-test.jsonl")
|
||||
if err := os.WriteFile(filePath, []byte(content), 0o644); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
s := NewScanner(slog.Default())
|
||||
record := s.parseCodexFile(filePath)
|
||||
|
||||
if record == nil {
|
||||
t.Fatal("expected non-nil record")
|
||||
}
|
||||
|
||||
if record.Date != "2026-01-14" {
|
||||
t.Errorf("date = %q, want %q", record.Date, "2026-01-14")
|
||||
}
|
||||
if record.Provider != "codex" {
|
||||
t.Errorf("provider = %q, want %q", record.Provider, "codex")
|
||||
}
|
||||
if record.Model != "gpt-5.2-codex" {
|
||||
t.Errorf("model = %q, want %q", record.Model, "gpt-5.2-codex")
|
||||
}
|
||||
if record.InputTokens != 328894 {
|
||||
t.Errorf("input_tokens = %d, want %d", record.InputTokens, 328894)
|
||||
}
|
||||
// output_tokens + reasoning_output_tokens
|
||||
if record.OutputTokens != 3071+960 {
|
||||
t.Errorf("output_tokens = %d, want %d", record.OutputTokens, 3071+960)
|
||||
}
|
||||
if record.CacheReadTokens != 287872 {
|
||||
t.Errorf("cache_read_tokens = %d, want %d", record.CacheReadTokens, 287872)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseCodexFile_NullInfo(t *testing.T) {
|
||||
// When all token_count events have info:null, should return nil
|
||||
tmp := t.TempDir()
|
||||
sessionsDir := filepath.Join(tmp, "sessions", "2026", "01", "14")
|
||||
if err := os.MkdirAll(sessionsDir, 0o755); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
content := `{"timestamp":"2026-01-13T17:41:32.916Z","type":"event_msg","payload":{"type":"token_count","info":null}}
|
||||
`
|
||||
filePath := filepath.Join(sessionsDir, "rollout-test.jsonl")
|
||||
if err := os.WriteFile(filePath, []byte(content), 0o644); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
s := NewScanner(slog.Default())
|
||||
record := s.parseCodexFile(filePath)
|
||||
|
||||
if record != nil {
|
||||
t.Errorf("expected nil record for null info, got %+v", record)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseCodexFile_LastTokenUsageFallback(t *testing.T) {
|
||||
// When total_token_usage is absent but last_token_usage exists
|
||||
tmp := t.TempDir()
|
||||
sessionsDir := filepath.Join(tmp, "sessions", "2026", "03", "27")
|
||||
if err := os.MkdirAll(sessionsDir, 0o755); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
content := `{"timestamp":"2026-03-27T10:00:00Z","type":"turn_context","payload":{"model":"gpt-5"}}
|
||||
{"timestamp":"2026-03-27T10:01:00Z","type":"event_msg","payload":{"type":"token_count","info":{"last_token_usage":{"input_tokens":1000,"cached_input_tokens":200,"output_tokens":500}}}}
|
||||
`
|
||||
filePath := filepath.Join(sessionsDir, "rollout-test.jsonl")
|
||||
if err := os.WriteFile(filePath, []byte(content), 0o644); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
s := NewScanner(slog.Default())
|
||||
record := s.parseCodexFile(filePath)
|
||||
|
||||
if record == nil {
|
||||
t.Fatal("expected non-nil record")
|
||||
}
|
||||
if record.InputTokens != 1000 {
|
||||
t.Errorf("input_tokens = %d, want %d", record.InputTokens, 1000)
|
||||
}
|
||||
if record.OutputTokens != 500 {
|
||||
t.Errorf("output_tokens = %d, want %d", record.OutputTokens, 500)
|
||||
}
|
||||
if record.CacheReadTokens != 200 {
|
||||
t.Errorf("cache_read_tokens = %d, want %d", record.CacheReadTokens, 200)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseCodexFile_CacheReadInputTokens(t *testing.T) {
|
||||
// Test the alternative field name cache_read_input_tokens
|
||||
tmp := t.TempDir()
|
||||
sessionsDir := filepath.Join(tmp, "sessions", "2026", "03", "27")
|
||||
if err := os.MkdirAll(sessionsDir, 0o755); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
content := `{"timestamp":"2026-03-27T10:00:00Z","type":"event_msg","payload":{"type":"token_count","info":{"total_token_usage":{"input_tokens":5000,"cache_read_input_tokens":3000,"output_tokens":800},"model":"gpt-5.2-codex"}}}
|
||||
`
|
||||
filePath := filepath.Join(sessionsDir, "rollout-test.jsonl")
|
||||
if err := os.WriteFile(filePath, []byte(content), 0o644); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
s := NewScanner(slog.Default())
|
||||
record := s.parseCodexFile(filePath)
|
||||
|
||||
if record == nil {
|
||||
t.Fatal("expected non-nil record")
|
||||
}
|
||||
if record.CacheReadTokens != 3000 {
|
||||
t.Errorf("cache_read_tokens = %d, want %d", record.CacheReadTokens, 3000)
|
||||
}
|
||||
if record.Model != "gpt-5.2-codex" {
|
||||
t.Errorf("model = %q, want %q", record.Model, "gpt-5.2-codex")
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue