mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-06-08 19:29:09 +08:00
[Misc] Fix ScalarType float4 naming (#17690)
Signed-off-by: Lucas Wilkinson <lwilkinson@neuralmagic.com>
This commit is contained in:
parent
63ced7b43f
commit
6eae34533a
@ -17,7 +17,7 @@ PAD_SHAPES = [(90, 64), (150, 64), (128, 48), (128, 80), (150, 80), (90, 48),
|
|||||||
SEEDS = [42]
|
SEEDS = [42]
|
||||||
CUDA_DEVICES = ['cuda:0']
|
CUDA_DEVICES = ['cuda:0']
|
||||||
|
|
||||||
FLOAT4_E2M1_MAX = scalar_types.float4_e2m1fn.max()
|
FLOAT4_E2M1_MAX = scalar_types.float4_e2m1f.max()
|
||||||
FLOAT8_E4M3_MAX = torch.finfo(torch.float8_e4m3fn).max
|
FLOAT8_E4M3_MAX = torch.finfo(torch.float8_e4m3fn).max
|
||||||
|
|
||||||
# E2M1 to float
|
# E2M1 to float
|
||||||
|
|||||||
@ -11,7 +11,7 @@ from vllm.scalar_type import scalar_types
|
|||||||
(0, 15, scalar_types.uint4),
|
(0, 15, scalar_types.uint4),
|
||||||
(-8, 7, scalar_types.uint4b8),
|
(-8, 7, scalar_types.uint4b8),
|
||||||
(-128, 127, scalar_types.uint8b128),
|
(-128, 127, scalar_types.uint8b128),
|
||||||
(-6., 6., scalar_types.float4_e2m1fn),
|
(-6., 6., scalar_types.float4_e2m1f),
|
||||||
(-28., 28., scalar_types.float6_e3m2f),
|
(-28., 28., scalar_types.float6_e3m2f),
|
||||||
(torch.int8, scalar_types.int8),
|
(torch.int8, scalar_types.int8),
|
||||||
(torch.uint8, scalar_types.uint8),
|
(torch.uint8, scalar_types.uint8),
|
||||||
|
|||||||
@ -333,7 +333,7 @@ class scalar_types:
|
|||||||
float6_e3m2f = ScalarType.float_(3, 2, True, NanRepr.NONE)
|
float6_e3m2f = ScalarType.float_(3, 2, True, NanRepr.NONE)
|
||||||
|
|
||||||
# fp4, https://www.opencompute.org/documents/ocp-microscaling-formats-mx-v1-0-spec-final-pdf
|
# fp4, https://www.opencompute.org/documents/ocp-microscaling-formats-mx-v1-0-spec-final-pdf
|
||||||
float4_e2m1fn = ScalarType.float_(2, 1, True, NanRepr.NONE)
|
float4_e2m1f = ScalarType.float_(2, 1, True, NanRepr.NONE)
|
||||||
|
|
||||||
# "gptq" types
|
# "gptq" types
|
||||||
uint2b2 = ScalarType.uint(2, 2)
|
uint2b2 = ScalarType.uint(2, 2)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user