[Update variants with parameter count equation]

2026-04-19 21:55:06 -04:00 · 2026-04-19 21:55:06 -04:00 · a825ba217f
commit a825ba217f
parent f00d10b59f
4 changed files with 72 additions and 22 deletions
--- a/README.md
+++ b/README.md
@ -71,6 +71,41 @@ print(



+## Model Variants
+
+Pre-configured scales from 1B to 1T parameters:
+
+```python
+from open_mythos import (
+    mythos_1b,
+    mythos_3b,
+    mythos_10b,
+    mythos_50b,
+    mythos_100b,
+    mythos_500b,
+    mythos_1t,
+    OpenMythos,
+)
+
+cfg = mythos_7b()  # returns a MythosConfig
+model = OpenMythos(cfg)
+
+total = sum(p.numel() for p in model.parameters())
+print(f"Parameters: {total:,}")
+```
+
+| Variant | `dim` | Experts | `expert_dim` | Loop iters | Context | Max output |
+|---|---|---|---|---|---|---|
+| `mythos_1b` | 2048 | 64 | 2048 | 16 | 4k | 4k |
+| `mythos_3b` | 3072 | 64 | 4096 | 16 | 4k | 4k |
+| `mythos_10b` | 4096 | 128 | 5632 | 24 | 8k | 4k |
+| `mythos_50b` | 6144 | 256 | 9728 | 32 | 8k | 4k |
+| `mythos_100b` | 8192 | 256 | 13568 | 32 | 1M | 128k |
+| `mythos_500b` | 12288 | 512 | 23040 | 48 | 1M | 128k |
+| `mythos_1t` | 16384 | 512 | 34560 | 64 | 1M | 128k |
+
+---
+
 ## Documentation

 | Page | Description |
--- a/open_mythos/variants.py
+++ b/open_mythos/variants.py
@ -1,8 +1,13 @@
 from open_mythos.main import MythosConfig

+# Parameter budget breakdown per variant:
+#   total ≈ embed + prelude/coda dense blocks + recurrent MLA + MoE
+#   MoE   = 3 * dim * expert_dim * (n_experts + n_shared * n_experts_per_tok)
+# expert_dim is solved from the residual budget after all other terms.
+

 def mythos_1b() -> MythosConfig:
-    """1B parameter config. Small research/fine-tuning model. dim=2048, 16 experts, 16 loop iters, 4k context."""
+    """1B parameter config. Small research/fine-tuning model. dim=2048, 64 experts, 16 loop iters, 4k context."""
    return MythosConfig(
        vocab_size=32000,
        dim=2048,
@ -18,10 +23,10 @@ def mythos_1b() -> MythosConfig:
        qk_rope_head_dim=32,
        qk_nope_head_dim=64,
        v_head_dim=64,
-        n_experts=16,
+        n_experts=64,
        n_shared_experts=2,
-        n_experts_per_tok=2,
-        expert_dim=256,
+        n_experts_per_tok=4,
+        expert_dim=2048,
        act_threshold=0.99,
        rope_theta=500000.0,
        lora_rank=8,
@ -29,7 +34,7 @@ def mythos_1b() -> MythosConfig:


 def mythos_3b() -> MythosConfig:
-    """3B parameter config. Compact inference model. dim=3072, 32 experts, 16 loop iters, 4k context."""
+    """3B parameter config. Compact inference model. dim=3072, 64 experts, 16 loop iters, 4k context."""
    return MythosConfig(
        vocab_size=32000,
        dim=3072,
@ -45,10 +50,10 @@ def mythos_3b() -> MythosConfig:
        qk_rope_head_dim=32,
        qk_nope_head_dim=96,
        v_head_dim=96,
-        n_experts=32,
+        n_experts=64,
        n_shared_experts=2,
-        n_experts_per_tok=2,
-        expert_dim=384,
+        n_experts_per_tok=4,
+        expert_dim=4096,
        act_threshold=0.99,
        rope_theta=500000.0,
        lora_rank=8,
@ -56,7 +61,7 @@ def mythos_3b() -> MythosConfig:


 def mythos_10b() -> MythosConfig:
-    """10B parameter config. Mid-scale general model. dim=4096, 64 experts, 24 loop iters, 8k context."""
+    """10B parameter config. Mid-scale general model. dim=4096, 128 experts, 24 loop iters, 8k context."""
    return MythosConfig(
        vocab_size=32000,
        dim=4096,
@ -72,10 +77,10 @@ def mythos_10b() -> MythosConfig:
        qk_rope_head_dim=64,
        qk_nope_head_dim=128,
        v_head_dim=128,
-        n_experts=64,
+        n_experts=128,
        n_shared_experts=2,
        n_experts_per_tok=4,
-        expert_dim=512,
+        expert_dim=5632,
        act_threshold=0.99,
        rope_theta=500000.0,
        lora_rank=16,
@ -83,7 +88,7 @@ def mythos_10b() -> MythosConfig:


 def mythos_50b() -> MythosConfig:
-    """50B parameter config. Large reasoning model. dim=6144, 128 experts, 32 loop iters, 8k context."""
+    """50B parameter config. Large reasoning model. dim=6144, 256 experts, 32 loop iters, 8k context."""
    return MythosConfig(
        vocab_size=32000,
        dim=6144,
@ -99,10 +104,10 @@ def mythos_50b() -> MythosConfig:
        qk_rope_head_dim=64,
        qk_nope_head_dim=128,
        v_head_dim=128,
-        n_experts=128,
+        n_experts=256,
        n_shared_experts=4,
        n_experts_per_tok=4,
-        expert_dim=768,
+        expert_dim=9728,
        act_threshold=0.99,
        rope_theta=500000.0,
        lora_rank=32,
@ -110,7 +115,7 @@ def mythos_50b() -> MythosConfig:


 def mythos_100b() -> MythosConfig:
-    """100B parameter config. Frontier-class model. dim=8192, 160 experts, 32 loop iters, 1M context, 128k output."""
+    """100B parameter config. Frontier-class model. dim=8192, 256 experts, 32 loop iters, 1M context, 128k output."""
    return MythosConfig(
        vocab_size=32000,
        dim=8192,
@ -126,10 +131,10 @@ def mythos_100b() -> MythosConfig:
        qk_rope_head_dim=64,
        qk_nope_head_dim=128,
        v_head_dim=128,
-        n_experts=160,
+        n_experts=256,
        n_shared_experts=4,
        n_experts_per_tok=8,
-        expert_dim=1024,
+        expert_dim=13568,
        act_threshold=0.99,
        rope_theta=1000000.0,
        lora_rank=64,
@ -138,7 +143,7 @@ def mythos_100b() -> MythosConfig:


 def mythos_500b() -> MythosConfig:
-    """500B parameter config. Ultra-scale MoE model. dim=12288, 256 experts, 48 loop iters, 1M context, 128k output."""
+    """500B parameter config. Ultra-scale MoE model. dim=12288, 512 experts, 48 loop iters, 1M context, 128k output."""
    return MythosConfig(
        vocab_size=100000,
        dim=12288,
@ -154,10 +159,10 @@ def mythos_500b() -> MythosConfig:
        qk_rope_head_dim=64,
        qk_nope_head_dim=128,
        v_head_dim=128,
-        n_experts=256,
+        n_experts=512,
        n_shared_experts=8,
        n_experts_per_tok=8,
-        expert_dim=1536,
+        expert_dim=23040,
        act_threshold=0.99,
        rope_theta=1000000.0,
        lora_rank=128,
@ -185,7 +190,7 @@ def mythos_1t() -> MythosConfig:
        n_experts=512,
        n_shared_experts=8,
        n_experts_per_tok=8,
-        expert_dim=2048,
+        expert_dim=34560,
        act_threshold=0.99,
        rope_theta=2000000.0,
        lora_rank=256,
--- a/pyproject.toml
+++ b/pyproject.toml
@ -5,7 +5,7 @@ build-backend = "poetry.core.masonry.api"

 [tool.poetry]
 name = "open-mythos"
-version = "0.1.0"
+version = "0.2.0"
 description = "OpenMythos — open-source theoretical reconstruction of the Claude Mythos Recurrent-Depth Transformer architecture"
 license = "MIT"
 authors = ["Kye Gomez <kye@swarms.world>"]
--- a/variants_example.py
+++ b/variants_example.py
@ -0,0 +1,10 @@
+from open_mythos import (
+    mythos_1b,
+    OpenMythos,
+)
+
+cfg = mythos_1b()
+model = OpenMythos(cfg)
+
+total = sum(p.numel() for p in model.parameters())
+print(f"Parameters: {total:,}")