mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-16 21:35:50 +08:00
[ci/lint] Add back default arg for pre-commit (#12279)
Signed-off-by: kevin <kevin@anyscale.com>
This commit is contained in:
parent
df76e5af26
commit
64ea24d0b3
2
.github/workflows/pre-commit.yml
vendored
2
.github/workflows/pre-commit.yml
vendored
@ -16,4 +16,4 @@ jobs:
|
|||||||
- run: echo "::add-matcher::.github/workflows/matchers/actionlint.json"
|
- run: echo "::add-matcher::.github/workflows/matchers/actionlint.json"
|
||||||
- uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1
|
- uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1
|
||||||
with:
|
with:
|
||||||
extra_args: --hook-stage manual
|
extra_args: --all-files --hook-stage manual
|
||||||
|
|||||||
@ -74,11 +74,7 @@ DOLPHIN_CONFIG = GGUFTestConfig(
|
|||||||
)
|
)
|
||||||
|
|
||||||
MODELS = [
|
MODELS = [
|
||||||
LLAMA_CONFIG,
|
LLAMA_CONFIG, QWEN2_CONFIG, PHI3_CONFIG, GPT2_CONFIG, STABLELM_CONFIG,
|
||||||
QWEN2_CONFIG,
|
|
||||||
PHI3_CONFIG,
|
|
||||||
GPT2_CONFIG,
|
|
||||||
STABLELM_CONFIG,
|
|
||||||
DOLPHIN_CONFIG
|
DOLPHIN_CONFIG
|
||||||
# STARCODER_CONFIG, # broken
|
# STARCODER_CONFIG, # broken
|
||||||
]
|
]
|
||||||
@ -114,11 +110,12 @@ def test_models(
|
|||||||
messages, tokenize=False, add_generation_prompt=True)
|
messages, tokenize=False, add_generation_prompt=True)
|
||||||
|
|
||||||
# Run unquantized model.
|
# Run unquantized model.
|
||||||
with vllm_runner(model_name=model.original_model,
|
with vllm_runner(
|
||||||
enforce_eager=True, # faster tests
|
model_name=model.original_model,
|
||||||
dtype=dtype,
|
enforce_eager=True, # faster tests
|
||||||
max_model_len=MAX_MODEL_LEN,
|
dtype=dtype,
|
||||||
tensor_parallel_size=tp_size) as original_model:
|
max_model_len=MAX_MODEL_LEN,
|
||||||
|
tensor_parallel_size=tp_size) as original_model:
|
||||||
original_outputs = original_model.generate_greedy_logprobs(
|
original_outputs = original_model.generate_greedy_logprobs(
|
||||||
example_prompts[:-1], max_tokens, num_logprobs)
|
example_prompts[:-1], max_tokens, num_logprobs)
|
||||||
|
|
||||||
|
|||||||
@ -350,10 +350,8 @@ class SiglipMLP(nn.Module):
|
|||||||
else:
|
else:
|
||||||
# For other quantization, we require the hidden size to be a
|
# For other quantization, we require the hidden size to be a
|
||||||
# multiple of 64
|
# multiple of 64
|
||||||
quantizable = (
|
quantizable = (config.hidden_size % 64 == 0
|
||||||
config.hidden_size % 64 == 0
|
and config.intermediate_size % 64 == 0)
|
||||||
and config.intermediate_size % 64 == 0
|
|
||||||
)
|
|
||||||
self.fc1 = ColumnParallelLinear(
|
self.fc1 = ColumnParallelLinear(
|
||||||
config.hidden_size,
|
config.hidden_size,
|
||||||
config.intermediate_size,
|
config.intermediate_size,
|
||||||
|
|||||||
@ -101,7 +101,7 @@ def cpu_platform_plugin() -> Optional[str]:
|
|||||||
try:
|
try:
|
||||||
from importlib.metadata import version
|
from importlib.metadata import version
|
||||||
is_cpu = "cpu" in version("vllm")
|
is_cpu = "cpu" in version("vllm")
|
||||||
if is_cpu == False:
|
if not is_cpu:
|
||||||
import platform
|
import platform
|
||||||
is_cpu = platform.machine().lower().startswith("arm")
|
is_cpu = platform.machine().lower().startswith("arm")
|
||||||
|
|
||||||
|
|||||||
@ -10,10 +10,11 @@ from msgspec import field as msgspec_field
|
|||||||
from vllm.sampling_params import SamplingParams
|
from vllm.sampling_params import SamplingParams
|
||||||
|
|
||||||
|
|
||||||
class RequestStatsUpdate(msgspec.Struct,
|
class RequestStatsUpdate(
|
||||||
array_like=True,
|
msgspec.Struct, # type: ignore
|
||||||
omit_defaults=True,
|
array_like=True,
|
||||||
gc=False):
|
omit_defaults=True,
|
||||||
|
gc=False):
|
||||||
"""
|
"""
|
||||||
An update to the request stats.
|
An update to the request stats.
|
||||||
|
|
||||||
@ -341,8 +342,8 @@ class RequestStats:
|
|||||||
self.queued_ts_s = ts
|
self.queued_ts_s = ts
|
||||||
elif update.type == RequestStatsUpdate.Type.PREFILLING:
|
elif update.type == RequestStatsUpdate.Type.PREFILLING:
|
||||||
self.prefill_start_ts_s_lst.append(ts)
|
self.prefill_start_ts_s_lst.append(ts)
|
||||||
self.num_cached_tokens = update.num_cached_tokens
|
self.num_cached_tokens = update.num_cached_tokens or 0
|
||||||
self.num_computed_tokens = update.num_computed_tokens
|
self.num_computed_tokens = update.num_computed_tokens or 0
|
||||||
elif update.type == RequestStatsUpdate.Type.PREEMPTED:
|
elif update.type == RequestStatsUpdate.Type.PREEMPTED:
|
||||||
self._reset_for_preemption(ts)
|
self._reset_for_preemption(ts)
|
||||||
elif update.type == RequestStatsUpdate.Type.DECODING:
|
elif update.type == RequestStatsUpdate.Type.DECODING:
|
||||||
@ -350,7 +351,7 @@ class RequestStats:
|
|||||||
elif update.type == RequestStatsUpdate.Type.DETOKENIZED:
|
elif update.type == RequestStatsUpdate.Type.DETOKENIZED:
|
||||||
self._record_detokenized_output(
|
self._record_detokenized_output(
|
||||||
ts,
|
ts,
|
||||||
update.num_new_tokens,
|
update.num_new_tokens or 0,
|
||||||
)
|
)
|
||||||
elif update.type == RequestStatsUpdate.Type.FINISHED:
|
elif update.type == RequestStatsUpdate.Type.FINISHED:
|
||||||
self.finished_ts_s = ts
|
self.finished_ts_s = ts
|
||||||
@ -425,10 +426,11 @@ class EngineCoreProcessStats:
|
|||||||
output_queue_size: Optional[int] = None
|
output_queue_size: Optional[int] = None
|
||||||
|
|
||||||
|
|
||||||
class EngineCoreStatsSnapshot(msgspec.Struct,
|
class EngineCoreStatsSnapshot(
|
||||||
array_like=True,
|
msgspec.Struct, # type: ignore
|
||||||
omit_defaults=True,
|
array_like=True,
|
||||||
gc=False):
|
omit_defaults=True,
|
||||||
|
gc=False):
|
||||||
"""
|
"""
|
||||||
A snapshot of the EngineCore's current stats over a period of time.
|
A snapshot of the EngineCore's current stats over a period of time.
|
||||||
"""
|
"""
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user