diff --git a/vllm/model_executor/layers/sampler.py b/vllm/model_executor/layers/sampler.py index a17dfcfb..9d25c605 100644 --- a/vllm/model_executor/layers/sampler.py +++ b/vllm/model_executor/layers/sampler.py @@ -290,7 +290,7 @@ def _get_topk_logprobs( def _build_sequence_outputs( parent_ids: List[int], next_token_ids: List[int], - selected_token_logprobs: torch.Tensor, + selected_token_logprobs: List[float], parent_seq_ids: List[int], parent_logprobs: torch.Tensor, num_output_logprobs: Optional[int], diff --git a/vllm/sampling_params.py b/vllm/sampling_params.py index 5206eb0b..9155c48d 100644 --- a/vllm/sampling_params.py +++ b/vllm/sampling_params.py @@ -61,7 +61,6 @@ class SamplingParams: max_tokens: Maximum number of tokens to generate per output sequence. logprobs: Number of log probabilities to return per output token. skip_special_tokens: Whether to skip special tokens in the output. - Defaults to true. """ def __init__( @@ -76,8 +75,8 @@ class SamplingParams: use_beam_search: bool = False, length_penalty: float = 1.0, early_stopping: Union[bool, str] = False, - stop: Union[None, str, List[str]] = None, - stop_token_ids: List[int] = None, + stop: Optional[Union[str, List[str]]] = None, + stop_token_ids: Optional[List[int]] = None, ignore_eos: bool = False, max_tokens: int = 16, logprobs: Optional[int] = None,