diff --git a/vllm/model_executor/layers/sampler.py b/vllm/model_executor/layers/sampler.py index f9d95fa7..25ba48c2 100644 --- a/vllm/model_executor/layers/sampler.py +++ b/vllm/model_executor/layers/sampler.py @@ -112,27 +112,6 @@ def _prune_hidden_states( sampling_metadata.selected_token_indices) -def _get_prompt_and_output_tokens( - sampling_metadata: SamplingMetadata, -) -> Tuple[List[List[int]], List[List[int]]]: - prompt_tokens: List[List[int]] = [] - output_tokens: List[List[int]] = [] - for i, seq_group in enumerate(sampling_metadata.seq_groups): - seq_ids, sampling_params = seq_group - if (i < sampling_metadata.num_prompts - and sampling_params.prompt_logprobs is not None): - # NOTE: prompt token positions do not need output tokens to - # compute penalties. - prompt_len = sampling_metadata.prompt_lens[i] - prompt_tokens.extend([] for _ in range(prompt_len - 1)) - output_tokens.extend([] for _ in range(prompt_len - 1)) - for seq_id in seq_ids: - seq_data = sampling_metadata.seq_data[seq_id] - prompt_tokens.append(seq_data.prompt_token_ids) - output_tokens.append(seq_data.output_token_ids) - return prompt_tokens, output_tokens - - def _get_bin_counts_and_mask( tokens: torch.Tensor, vocab_size: int, diff --git a/vllm/sampling_params.py b/vllm/sampling_params.py index 30a8036a..b5710eef 100644 --- a/vllm/sampling_params.py +++ b/vllm/sampling_params.py @@ -100,7 +100,7 @@ class SamplingParams: temperature: float = 1.0, top_p: float = 1.0, top_k: int = -1, - min_p: int = 0.0, + min_p: float = 0.0, use_beam_search: bool = False, length_penalty: float = 1.0, early_stopping: Union[bool, str] = False,