diff --git a/vllm/model_executor/layers/sampler.py b/vllm/model_executor/layers/sampler.py index 9dcfa42e2a52..013b44060d1a 100644 --- a/vllm/model_executor/layers/sampler.py +++ b/vllm/model_executor/layers/sampler.py @@ -82,8 +82,9 @@ class Sampler(nn.Module): # We use float32 for probabilities and log probabilities. # Compute the probabilities. probs = torch.softmax(logits, dim=-1, dtype=torch.float) - # Compute the log probabilities (before applying top-p and top-k). - logprobs = torch.log(probs) + # Compute the log probabilities. + # Use log_softmax to ensure numerical stability. + logprobs = torch.log_softmax(logits, dim=-1, dtype=torch.float) # Sample the next tokens. return _sample(probs, logprobs, input_metadata) diff --git a/vllm/sequence.py b/vllm/sequence.py index 795397a3731b..eac3af2823a1 100644 --- a/vllm/sequence.py +++ b/vllm/sequence.py @@ -350,7 +350,7 @@ class SequenceOutputs: def __eq__(self, other: object) -> bool: if not isinstance(other, SequenceOutputs): - return NotImplementedError() + raise NotImplementedError() return (self.parent_seq_id == other.parent_seq_id and self.output_token == other.output_token and self.logprobs == other.logprobs)