mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-09 08:34:57 +08:00
[Platform] Custom ops support for LMhead and LogitsProcessor (#23564)
Signed-off-by: zzhx1 <zzh_201018@outlook.com>
This commit is contained in:
parent
2eb9986a2d
commit
736569da8d
@ -6,11 +6,11 @@ from concurrent.futures import ThreadPoolExecutor
|
||||
from typing import Optional
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
|
||||
import vllm.envs as envs
|
||||
from vllm.distributed import (tensor_model_parallel_all_gather,
|
||||
tensor_model_parallel_gather)
|
||||
from vllm.model_executor.custom_op import CustomOp
|
||||
from vllm.model_executor.layers.vocab_parallel_embedding import (
|
||||
VocabParallelEmbedding)
|
||||
from vllm.model_executor.sampling_metadata import SamplingMetadata
|
||||
@ -22,7 +22,8 @@ if envs.VLLM_LOGITS_PROCESSOR_THREADS is not None:
|
||||
envs.VLLM_LOGITS_PROCESSOR_THREADS)
|
||||
|
||||
|
||||
class LogitsProcessor(nn.Module):
|
||||
@CustomOp.register("logits_processor")
|
||||
class LogitsProcessor(CustomOp):
|
||||
"""Process logits and apply logits processors from sampling metadata.
|
||||
|
||||
This layer does the following:
|
||||
|
||||
@ -429,6 +429,7 @@ class VocabParallelEmbedding(CustomOp):
|
||||
return s
|
||||
|
||||
|
||||
@CustomOp.register("parallel_lm_head")
|
||||
class ParallelLMHead(VocabParallelEmbedding):
|
||||
"""Parallelized LM head.
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user