[Misc]Add customized information for models (#4132)
This commit is contained in:
parent
c3845d82dc
commit
d6f4bd7cdd
@ -43,3 +43,18 @@ def test_models(
|
|||||||
f"Test{i}:\nHF: {hf_output_str!r}\nvLLM: {vllm_output_str!r}")
|
f"Test{i}:\nHF: {hf_output_str!r}\nvLLM: {vllm_output_str!r}")
|
||||||
assert hf_output_ids == vllm_output_ids, (
|
assert hf_output_ids == vllm_output_ids, (
|
||||||
f"Test{i}:\nHF: {hf_output_ids}\nvLLM: {vllm_output_ids}")
|
f"Test{i}:\nHF: {hf_output_ids}\nvLLM: {vllm_output_ids}")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("model", MODELS)
|
||||||
|
@pytest.mark.parametrize("dtype", ["half"])
|
||||||
|
def test_model_print(
|
||||||
|
vllm_runner,
|
||||||
|
model: str,
|
||||||
|
dtype: str,
|
||||||
|
) -> None:
|
||||||
|
vllm_model = vllm_runner(model, dtype=dtype)
|
||||||
|
# This test is for verifying whether the model's extra_repr
|
||||||
|
# can be printed correctly.
|
||||||
|
print(vllm_model.model.llm_engine.model_executor.driver_worker.
|
||||||
|
model_runner.model)
|
||||||
|
del vllm_model
|
||||||
|
|||||||
@ -49,3 +49,18 @@ def test_models(
|
|||||||
f"Test{i}:\nHF: {hf_output_str!r}\nvLLM: {vllm_output_str!r}")
|
f"Test{i}:\nHF: {hf_output_str!r}\nvLLM: {vllm_output_str!r}")
|
||||||
assert hf_output_ids == vllm_output_ids, (
|
assert hf_output_ids == vllm_output_ids, (
|
||||||
f"Test{i}:\nHF: {hf_output_ids}\nvLLM: {vllm_output_ids}")
|
f"Test{i}:\nHF: {hf_output_ids}\nvLLM: {vllm_output_ids}")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("model", MODELS)
|
||||||
|
@pytest.mark.parametrize("dtype", ["float"])
|
||||||
|
def test_model_print(
|
||||||
|
vllm_runner,
|
||||||
|
model: str,
|
||||||
|
dtype: str,
|
||||||
|
) -> None:
|
||||||
|
vllm_model = vllm_runner(model, dtype=dtype)
|
||||||
|
# This test is for verifying whether the model's extra_repr
|
||||||
|
# can be printed correctly.
|
||||||
|
print(vllm_model.model.llm_engine.model_executor.driver_worker.
|
||||||
|
model_runner.model)
|
||||||
|
del vllm_model
|
||||||
|
|||||||
@ -47,3 +47,10 @@ class Attention(nn.Module):
|
|||||||
) -> torch.Tensor:
|
) -> torch.Tensor:
|
||||||
return self.impl.forward(query, key, value, kv_cache, attn_metadata,
|
return self.impl.forward(query, key, value, kv_cache, attn_metadata,
|
||||||
kv_scale)
|
kv_scale)
|
||||||
|
|
||||||
|
def extra_repr(self) -> str:
|
||||||
|
s = f"head_size={self.impl.head_size}" # type: ignore
|
||||||
|
s += f", num_heads={self.impl.num_heads}" # type: ignore
|
||||||
|
s += f", num_kv_heads={self.impl.num_kv_heads}" # type: ignore
|
||||||
|
s += f", scale={self.impl.scale}" # type: ignore
|
||||||
|
return s
|
||||||
|
|||||||
@ -67,6 +67,9 @@ class GeluAndMul(nn.Module):
|
|||||||
ops.gelu_tanh_and_mul(out, x)
|
ops.gelu_tanh_and_mul(out, x)
|
||||||
return out
|
return out
|
||||||
|
|
||||||
|
def extra_repr(self) -> str:
|
||||||
|
return f'approximate={repr(self.approximate)}'
|
||||||
|
|
||||||
|
|
||||||
class NewGELU(nn.Module):
|
class NewGELU(nn.Module):
|
||||||
|
|
||||||
|
|||||||
@ -64,3 +64,8 @@ class RMSNorm(nn.Module):
|
|||||||
self.variance_epsilon,
|
self.variance_epsilon,
|
||||||
)
|
)
|
||||||
return out
|
return out
|
||||||
|
|
||||||
|
def extra_repr(self) -> str:
|
||||||
|
s = f"hidden_size={self.weight.data.size(0)}"
|
||||||
|
s += f", eps={self.variance_epsilon}"
|
||||||
|
return s
|
||||||
|
|||||||
@ -181,6 +181,12 @@ class ReplicatedLinear(LinearBase):
|
|||||||
output_bias = self.bias if self.skip_bias_add else None
|
output_bias = self.bias if self.skip_bias_add else None
|
||||||
return output, output_bias
|
return output, output_bias
|
||||||
|
|
||||||
|
def extra_repr(self) -> str:
|
||||||
|
s = f"in_features={self.input_size}"
|
||||||
|
s += f", output_features={self.output_size}"
|
||||||
|
s += f", bias={self.bias is not None}"
|
||||||
|
return s
|
||||||
|
|
||||||
|
|
||||||
class ColumnParallelLinear(LinearBase):
|
class ColumnParallelLinear(LinearBase):
|
||||||
"""Linear layer with column parallelism.
|
"""Linear layer with column parallelism.
|
||||||
@ -281,6 +287,14 @@ class ColumnParallelLinear(LinearBase):
|
|||||||
output_bias = self.bias if self.skip_bias_add else None
|
output_bias = self.bias if self.skip_bias_add else None
|
||||||
return output, output_bias
|
return output, output_bias
|
||||||
|
|
||||||
|
def extra_repr(self) -> str:
|
||||||
|
s = f"in_features={self.input_size}"
|
||||||
|
s += f", output_features={self.output_size_per_partition}"
|
||||||
|
s += f", bias={self.bias is not None}"
|
||||||
|
s += f", tp_size={get_tensor_model_parallel_world_size()}"
|
||||||
|
s += f", gather_output={self.gather_output}"
|
||||||
|
return s
|
||||||
|
|
||||||
|
|
||||||
class MergedColumnParallelLinear(ColumnParallelLinear):
|
class MergedColumnParallelLinear(ColumnParallelLinear):
|
||||||
"""Packed linear layers with column parallelism.
|
"""Packed linear layers with column parallelism.
|
||||||
@ -685,3 +699,11 @@ class RowParallelLinear(LinearBase):
|
|||||||
output = output_
|
output = output_
|
||||||
output_bias = self.bias
|
output_bias = self.bias
|
||||||
return output, output_bias
|
return output, output_bias
|
||||||
|
|
||||||
|
def extra_repr(self) -> str:
|
||||||
|
s = f"input_features={self.input_size_per_partition}"
|
||||||
|
s += f", output_features={self.output_size}"
|
||||||
|
s += f", bias={self.bias is not None}"
|
||||||
|
s += f", tp_size={self.tp_size}"
|
||||||
|
s += f", reduce_results={self.reduce_results}"
|
||||||
|
return s
|
||||||
|
|||||||
@ -70,6 +70,12 @@ class LogitsProcessor(nn.Module):
|
|||||||
logits = logits[:, :self.org_vocab_size]
|
logits = logits[:, :self.org_vocab_size]
|
||||||
return logits
|
return logits
|
||||||
|
|
||||||
|
def extra_repr(self) -> str:
|
||||||
|
s = f"vocab_size={self.vocab_size}"
|
||||||
|
s += f", forg_vocab_size={self.org_vocab_size}"
|
||||||
|
s += f", scale={self.scale}, logits_as_input={self.logits_as_input}"
|
||||||
|
return s
|
||||||
|
|
||||||
|
|
||||||
def _prune_hidden_states(
|
def _prune_hidden_states(
|
||||||
hidden_states: torch.Tensor,
|
hidden_states: torch.Tensor,
|
||||||
|
|||||||
@ -156,6 +156,12 @@ class RotaryEmbedding(nn.Module):
|
|||||||
self.cos_sin_cache, self.is_neox_style)
|
self.cos_sin_cache, self.is_neox_style)
|
||||||
return query, key
|
return query, key
|
||||||
|
|
||||||
|
def extra_repr(self) -> str:
|
||||||
|
s = f"head_size={self.head_size}, rotary_dim={self.rotary_dim}"
|
||||||
|
s += f", max_position_embeddings={self.max_position_embeddings}"
|
||||||
|
s += f", base={self.base}, is_neox_style={self.is_neox_style}"
|
||||||
|
return s
|
||||||
|
|
||||||
|
|
||||||
class LinearScalingRotaryEmbedding(RotaryEmbedding):
|
class LinearScalingRotaryEmbedding(RotaryEmbedding):
|
||||||
"""RotaryEmbedding extended with linear scaling.
|
"""RotaryEmbedding extended with linear scaling.
|
||||||
|
|||||||
@ -105,6 +105,14 @@ class VocabParallelEmbedding(torch.nn.Module):
|
|||||||
output = tensor_model_parallel_all_reduce(output_parallel)
|
output = tensor_model_parallel_all_reduce(output_parallel)
|
||||||
return output
|
return output
|
||||||
|
|
||||||
|
def extra_repr(self) -> str:
|
||||||
|
s = f"num_embeddings={self.num_embeddings_per_partition}"
|
||||||
|
s += f", embedding_dim={self.embedding_dim}"
|
||||||
|
s += f", org_vocab_size={self.org_vocab_size}"
|
||||||
|
s += f', num_embeddings_padded={self.num_embeddings_padded}'
|
||||||
|
s += f', tp_size={self.tp_size}'
|
||||||
|
return s
|
||||||
|
|
||||||
|
|
||||||
class ParallelLMHead(VocabParallelEmbedding):
|
class ParallelLMHead(VocabParallelEmbedding):
|
||||||
"""Parallelized LM head.
|
"""Parallelized LM head.
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user