mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-08 23:15:01 +08:00
Compare commits
6 Commits
ee094aa0aa
...
8a99479d60
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
8a99479d60 | ||
|
|
512aad8754 | ||
|
|
38c540ce7b | ||
|
|
f46104deeb | ||
|
|
c2381fb05f | ||
|
|
36ac28a038 |
@ -30,11 +30,9 @@ MNK_FACTORS = [
|
||||
(32, 512, 512),
|
||||
(32, 1024, 2048),
|
||||
(45, 128, 2048),
|
||||
(45, 512, 512),
|
||||
(45, 1024, 128),
|
||||
(64, 512, 512),
|
||||
(64, 1024, 2048),
|
||||
(222, 128, 128),
|
||||
(222, 128, 2048),
|
||||
(222, 1024, 2048),
|
||||
]
|
||||
@ -97,7 +95,7 @@ class BatchedMMTensors:
|
||||
|
||||
|
||||
@pytest.mark.parametrize("num_experts", [8, 32])
|
||||
@pytest.mark.parametrize("max_tokens_per_expert", [32, 512])
|
||||
@pytest.mark.parametrize("max_tokens_per_expert", [32, 224, 512])
|
||||
@pytest.mark.parametrize("K", [128, 1024])
|
||||
@pytest.mark.parametrize("N", [128, 1024])
|
||||
@pytest.mark.parametrize("dtype", [torch.float8_e4m3fn, torch.bfloat16])
|
||||
|
||||
@ -52,7 +52,6 @@ MNK_FACTORS = [
|
||||
(83, 4608, 7168),
|
||||
(128, 512, 512),
|
||||
(128, 1024, 7168),
|
||||
(128, 4608, 512),
|
||||
(128, 4608, 7168),
|
||||
(2048, 128, 128),
|
||||
(2048, 1024, 7168),
|
||||
@ -72,7 +71,6 @@ MNK_FACTORS_DG = [
|
||||
(128, 4608, 7168),
|
||||
(192, 512, 512),
|
||||
(192, 1024, 7168),
|
||||
(192, 4608, 512),
|
||||
(192, 4608, 7168),
|
||||
(1335, 128, 128),
|
||||
(1335, 1024, 7168),
|
||||
|
||||
@ -40,7 +40,6 @@ MNK_FACTORS = [
|
||||
(128, 4096, 7168),
|
||||
(222, 512, 512),
|
||||
(222, 1024, 7168),
|
||||
(222, 4096, 512),
|
||||
(222, 4096, 7168),
|
||||
(2048, 128, 128),
|
||||
(2048, 1024, 7168),
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user