[Misc] Bump up transformers to v4.39.0 & Remove StarCoder2Config (#3551)

Co-authored-by: Roy <jasonailu87@gmail.com>
Co-authored-by: Roger Meier <r.meier@siemens.com>
This commit is contained in:
Woosuk Kwon 2024-03-21 07:58:12 -07:00 committed by GitHub
parent 865732342b
commit c188ecb080
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 3 additions and 76 deletions

View File

@ -7,7 +7,7 @@ ray >= 2.9
sentencepiece # Required for LLaMA tokenizer.
numpy
tokenizers>=0.15.0
transformers >= 4.38.0 # Required for Gemma.
transformers >= 4.39.0 # Required for StarCoder2.
fastapi
uvicorn[standard]
pydantic >= 2.0 # Required for OpenAI server.

View File

@ -5,7 +5,7 @@ ray >= 2.9
sentencepiece # Required for LLaMA tokenizer.
numpy
torch == 2.1.2
transformers >= 4.38.0 # Required for Gemma.
transformers >= 4.39.0 # Required for StarCoder2.
xformers == 0.0.23.post1 # Required for CUDA 12.1.
fastapi
uvicorn[standard]

View File

@ -22,6 +22,7 @@ from typing import List, Optional, Tuple
import torch
from torch import nn
from transformers import Starcoder2Config
from vllm.model_executor.input_metadata import InputMetadata
from vllm.model_executor.sampling_metadata import SamplingMetadata
@ -42,13 +43,6 @@ from vllm.model_executor.weight_utils import (default_weight_loader,
hf_model_weights_iterator)
from vllm.sequence import SamplerOutput
try:
from transformers import Starcoder2Config
except ImportError:
# fallback to PretrainedConfig
# NOTE: Please install transformers from source or use transformers>=4.39.0
from transformers import PretrainedConfig as Starcoder2Config
KVCache = Tuple[torch.Tensor, torch.Tensor]

View File

@ -9,7 +9,6 @@ _CONFIG_REGISTRY = {
"mpt": MPTConfig,
"RefinedWeb": RWConfig, # For tiiuae/falcon-40b(-instruct)
"RefinedWebModel": RWConfig, # For tiiuae/falcon-7b(-instruct)
"starcoder2": Starcoder2Config,
"jais": JAISConfig,
}
@ -18,15 +17,6 @@ def get_config(model: str,
trust_remote_code: bool,
revision: Optional[str] = None,
code_revision: Optional[str] = None) -> PretrainedConfig:
# FIXME(woosuk): This is a temporary fix for StarCoder2.
# Remove this when the model is supported by HuggingFace transformers.
if "bigcode" in model and "starcoder2" in model:
config_class = _CONFIG_REGISTRY["starcoder2"]
config = config_class.from_pretrained(model,
revision=revision,
code_revision=code_revision)
return config
try:
config = AutoConfig.from_pretrained(
model,

View File

@ -4,13 +4,11 @@ from vllm.transformers_utils.configs.mpt import MPTConfig
# tiiuae/falcon-7b(-instruct) models. Newer Falcon models will use the
# `FalconConfig` class from the official HuggingFace transformers library.
from vllm.transformers_utils.configs.falcon import RWConfig
from vllm.transformers_utils.configs.starcoder2 import Starcoder2Config
from vllm.transformers_utils.configs.jais import JAISConfig
__all__ = [
"ChatGLMConfig",
"MPTConfig",
"RWConfig",
"Starcoder2Config",
"JAISConfig",
]

View File

@ -1,55 +0,0 @@
from transformers import PretrainedConfig
class Starcoder2Config(PretrainedConfig):
model_type = "starcoder2"
keys_to_ignore_at_inference = ["past_key_values"]
def __init__(
self,
vocab_size=49152,
hidden_size=3072,
intermediate_size=12288,
num_hidden_layers=30,
num_attention_heads=24,
num_key_value_heads=2,
hidden_act="gelu_pytorch_tanh",
max_position_embeddings=4096,
initializer_range=0.018042,
norm_epsilon=1e-5,
use_cache=True,
bos_token_id=50256,
eos_token_id=50256,
rope_theta=10000.0,
sliding_window=None,
attention_dropout=0.0,
residual_dropout=0.0,
embedding_dropout=0.0,
use_bias=True,
**kwargs,
):
self.vocab_size = vocab_size
self.max_position_embeddings = max_position_embeddings
self.hidden_size = hidden_size
self.intermediate_size = intermediate_size
self.num_hidden_layers = num_hidden_layers
self.num_attention_heads = num_attention_heads
self.sliding_window = sliding_window
self.use_bias = use_bias
self.num_key_value_heads = num_key_value_heads
self.hidden_act = hidden_act
self.initializer_range = initializer_range
self.norm_epsilon = norm_epsilon
self.use_cache = use_cache
self.rope_theta = rope_theta
self.attention_dropout = attention_dropout
self.residual_dropout = residual_dropout
self.embedding_dropout = embedding_dropout
super().__init__(
bos_token_id=bos_token_id,
eos_token_id=eos_token_id,
**kwargs,
)
if self.architectures is None:
self.architectures = ['Starcoder2ForCausalLM']