[Update variants with parameter count equation]

This commit is contained in:
Kye Gomez 2026-04-19 21:55:06 -04:00
parent f00d10b59f
commit a825ba217f
4 changed files with 72 additions and 22 deletions

View file

@ -71,6 +71,41 @@ print(
## Model Variants
Pre-configured scales from 1B to 1T parameters:
```python
from open_mythos import (
mythos_1b,
mythos_3b,
mythos_10b,
mythos_50b,
mythos_100b,
mythos_500b,
mythos_1t,
OpenMythos,
)
cfg = mythos_7b() # returns a MythosConfig
model = OpenMythos(cfg)
total = sum(p.numel() for p in model.parameters())
print(f"Parameters: {total:,}")
```
| Variant | `dim` | Experts | `expert_dim` | Loop iters | Context | Max output |
|---|---|---|---|---|---|---|
| `mythos_1b` | 2048 | 64 | 2048 | 16 | 4k | 4k |
| `mythos_3b` | 3072 | 64 | 4096 | 16 | 4k | 4k |
| `mythos_10b` | 4096 | 128 | 5632 | 24 | 8k | 4k |
| `mythos_50b` | 6144 | 256 | 9728 | 32 | 8k | 4k |
| `mythos_100b` | 8192 | 256 | 13568 | 32 | 1M | 128k |
| `mythos_500b` | 12288 | 512 | 23040 | 48 | 1M | 128k |
| `mythos_1t` | 16384 | 512 | 34560 | 64 | 1M | 128k |
---
## Documentation
| Page | Description |

View file

@ -1,8 +1,13 @@
from open_mythos.main import MythosConfig
# Parameter budget breakdown per variant:
# total ≈ embed + prelude/coda dense blocks + recurrent MLA + MoE
# MoE = 3 * dim * expert_dim * (n_experts + n_shared * n_experts_per_tok)
# expert_dim is solved from the residual budget after all other terms.
def mythos_1b() -> MythosConfig:
"""1B parameter config. Small research/fine-tuning model. dim=2048, 16 experts, 16 loop iters, 4k context."""
"""1B parameter config. Small research/fine-tuning model. dim=2048, 64 experts, 16 loop iters, 4k context."""
return MythosConfig(
vocab_size=32000,
dim=2048,
@ -18,10 +23,10 @@ def mythos_1b() -> MythosConfig:
qk_rope_head_dim=32,
qk_nope_head_dim=64,
v_head_dim=64,
n_experts=16,
n_experts=64,
n_shared_experts=2,
n_experts_per_tok=2,
expert_dim=256,
n_experts_per_tok=4,
expert_dim=2048,
act_threshold=0.99,
rope_theta=500000.0,
lora_rank=8,
@ -29,7 +34,7 @@ def mythos_1b() -> MythosConfig:
def mythos_3b() -> MythosConfig:
"""3B parameter config. Compact inference model. dim=3072, 32 experts, 16 loop iters, 4k context."""
"""3B parameter config. Compact inference model. dim=3072, 64 experts, 16 loop iters, 4k context."""
return MythosConfig(
vocab_size=32000,
dim=3072,
@ -45,10 +50,10 @@ def mythos_3b() -> MythosConfig:
qk_rope_head_dim=32,
qk_nope_head_dim=96,
v_head_dim=96,
n_experts=32,
n_experts=64,
n_shared_experts=2,
n_experts_per_tok=2,
expert_dim=384,
n_experts_per_tok=4,
expert_dim=4096,
act_threshold=0.99,
rope_theta=500000.0,
lora_rank=8,
@ -56,7 +61,7 @@ def mythos_3b() -> MythosConfig:
def mythos_10b() -> MythosConfig:
"""10B parameter config. Mid-scale general model. dim=4096, 64 experts, 24 loop iters, 8k context."""
"""10B parameter config. Mid-scale general model. dim=4096, 128 experts, 24 loop iters, 8k context."""
return MythosConfig(
vocab_size=32000,
dim=4096,
@ -72,10 +77,10 @@ def mythos_10b() -> MythosConfig:
qk_rope_head_dim=64,
qk_nope_head_dim=128,
v_head_dim=128,
n_experts=64,
n_experts=128,
n_shared_experts=2,
n_experts_per_tok=4,
expert_dim=512,
expert_dim=5632,
act_threshold=0.99,
rope_theta=500000.0,
lora_rank=16,
@ -83,7 +88,7 @@ def mythos_10b() -> MythosConfig:
def mythos_50b() -> MythosConfig:
"""50B parameter config. Large reasoning model. dim=6144, 128 experts, 32 loop iters, 8k context."""
"""50B parameter config. Large reasoning model. dim=6144, 256 experts, 32 loop iters, 8k context."""
return MythosConfig(
vocab_size=32000,
dim=6144,
@ -99,10 +104,10 @@ def mythos_50b() -> MythosConfig:
qk_rope_head_dim=64,
qk_nope_head_dim=128,
v_head_dim=128,
n_experts=128,
n_experts=256,
n_shared_experts=4,
n_experts_per_tok=4,
expert_dim=768,
expert_dim=9728,
act_threshold=0.99,
rope_theta=500000.0,
lora_rank=32,
@ -110,7 +115,7 @@ def mythos_50b() -> MythosConfig:
def mythos_100b() -> MythosConfig:
"""100B parameter config. Frontier-class model. dim=8192, 160 experts, 32 loop iters, 1M context, 128k output."""
"""100B parameter config. Frontier-class model. dim=8192, 256 experts, 32 loop iters, 1M context, 128k output."""
return MythosConfig(
vocab_size=32000,
dim=8192,
@ -126,10 +131,10 @@ def mythos_100b() -> MythosConfig:
qk_rope_head_dim=64,
qk_nope_head_dim=128,
v_head_dim=128,
n_experts=160,
n_experts=256,
n_shared_experts=4,
n_experts_per_tok=8,
expert_dim=1024,
expert_dim=13568,
act_threshold=0.99,
rope_theta=1000000.0,
lora_rank=64,
@ -138,7 +143,7 @@ def mythos_100b() -> MythosConfig:
def mythos_500b() -> MythosConfig:
"""500B parameter config. Ultra-scale MoE model. dim=12288, 256 experts, 48 loop iters, 1M context, 128k output."""
"""500B parameter config. Ultra-scale MoE model. dim=12288, 512 experts, 48 loop iters, 1M context, 128k output."""
return MythosConfig(
vocab_size=100000,
dim=12288,
@ -154,10 +159,10 @@ def mythos_500b() -> MythosConfig:
qk_rope_head_dim=64,
qk_nope_head_dim=128,
v_head_dim=128,
n_experts=256,
n_experts=512,
n_shared_experts=8,
n_experts_per_tok=8,
expert_dim=1536,
expert_dim=23040,
act_threshold=0.99,
rope_theta=1000000.0,
lora_rank=128,
@ -185,7 +190,7 @@ def mythos_1t() -> MythosConfig:
n_experts=512,
n_shared_experts=8,
n_experts_per_tok=8,
expert_dim=2048,
expert_dim=34560,
act_threshold=0.99,
rope_theta=2000000.0,
lora_rank=256,

View file

@ -5,7 +5,7 @@ build-backend = "poetry.core.masonry.api"
[tool.poetry]
name = "open-mythos"
version = "0.1.0"
version = "0.2.0"
description = "OpenMythos — open-source theoretical reconstruction of the Claude Mythos Recurrent-Depth Transformer architecture"
license = "MIT"
authors = ["Kye Gomez <kye@swarms.world>"]

10
variants_example.py Normal file
View file

@ -0,0 +1,10 @@
from open_mythos import (
mythos_1b,
OpenMythos,
)
cfg = mythos_1b()
model = OpenMythos(cfg)
total = sum(p.numel() for p in model.parameters())
print(f"Parameters: {total:,}")