sqrtspace-experiments/experiments/llm_kv_cache/llm_kv_cache_results.json
2025-07-20 03:56:21 -04:00

87 lines
2.3 KiB
JSON

{
"model_config": {
"hidden_dim": 768,
"num_heads": 12,
"head_dim": 64
},
"results": {
"512": [
{
"label": "Full O(n)",
"cache_size": 512,
"avg_token_time": 0.0014609239995479583,
"tokens_per_second": 684.5087547484942,
"max_memory_mb": 2.994140625,
"total_recomputes": 0.0
},
{
"label": "Flash O(\u221an)",
"cache_size": 90,
"avg_token_time": 0.0004420524463057518,
"tokens_per_second": 2263.2109836224,
"max_memory_mb": 0.52734375,
"total_recomputes": 75136.0
},
{
"label": "Minimal O(1)",
"cache_size": 8,
"avg_token_time": 0.0002111002802848816,
"tokens_per_second": 4739.443599651373,
"max_memory_mb": 0.046875,
"total_recomputes": 96128.0
}
],
"1024": [
{
"label": "Full O(n)",
"cache_size": 1024,
"avg_token_time": 0.0027254623360931872,
"tokens_per_second": 366.91164878423155,
"max_memory_mb": 5.994140625,
"total_recomputes": 0.0
},
{
"label": "Flash O(\u221an)",
"cache_size": 128,
"avg_token_time": 0.0006042216904461384,
"tokens_per_second": 1655.0428253903872,
"max_memory_mb": 0.75,
"total_recomputes": 327424.0
},
{
"label": "Minimal O(1)",
"cache_size": 8,
"avg_token_time": 0.00022929944097995758,
"tokens_per_second": 4373.89985252146,
"max_memory_mb": 0.046875,
"total_recomputes": 388864.0
}
],
"2048": [
{
"label": "Full O(n)",
"cache_size": 2048,
"avg_token_time": 0.005077033815905452,
"tokens_per_second": 197.0929691857751,
"max_memory_mb": 11.994140625,
"total_recomputes": 0.0
},
{
"label": "Flash O(\u221an)",
"cache_size": 181,
"avg_token_time": 0.0007414041552692652,
"tokens_per_second": 1348.82682858517,
"max_memory_mb": 1.060546875,
"total_recomputes": 1387008.0
},
{
"label": "Minimal O(1)",
"cache_size": 8,
"avg_token_time": 0.0002398564014583826,
"tokens_per_second": 4169.296047863895,
"max_memory_mb": 0.046875,
"total_recomputes": 1564160.0
}
]
}
}