[Misc] Bump up transformers to v4.39.0 & Remove StarCoder2Config (#3551)
Co-authored-by: Roy <jasonailu87@gmail.com> Co-authored-by: Roger Meier <r.meier@siemens.com>
This commit is contained in:
parent
865732342b
commit
c188ecb080
@ -7,7 +7,7 @@ ray >= 2.9
|
||||
sentencepiece # Required for LLaMA tokenizer.
|
||||
numpy
|
||||
tokenizers>=0.15.0
|
||||
transformers >= 4.38.0 # Required for Gemma.
|
||||
transformers >= 4.39.0 # Required for StarCoder2.
|
||||
fastapi
|
||||
uvicorn[standard]
|
||||
pydantic >= 2.0 # Required for OpenAI server.
|
||||
|
||||
@ -5,7 +5,7 @@ ray >= 2.9
|
||||
sentencepiece # Required for LLaMA tokenizer.
|
||||
numpy
|
||||
torch == 2.1.2
|
||||
transformers >= 4.38.0 # Required for Gemma.
|
||||
transformers >= 4.39.0 # Required for StarCoder2.
|
||||
xformers == 0.0.23.post1 # Required for CUDA 12.1.
|
||||
fastapi
|
||||
uvicorn[standard]
|
||||
|
||||
@ -22,6 +22,7 @@ from typing import List, Optional, Tuple
|
||||
|
||||
import torch
|
||||
from torch import nn
|
||||
from transformers import Starcoder2Config
|
||||
|
||||
from vllm.model_executor.input_metadata import InputMetadata
|
||||
from vllm.model_executor.sampling_metadata import SamplingMetadata
|
||||
@ -42,13 +43,6 @@ from vllm.model_executor.weight_utils import (default_weight_loader,
|
||||
hf_model_weights_iterator)
|
||||
from vllm.sequence import SamplerOutput
|
||||
|
||||
try:
|
||||
from transformers import Starcoder2Config
|
||||
except ImportError:
|
||||
# fallback to PretrainedConfig
|
||||
# NOTE: Please install transformers from source or use transformers>=4.39.0
|
||||
from transformers import PretrainedConfig as Starcoder2Config
|
||||
|
||||
KVCache = Tuple[torch.Tensor, torch.Tensor]
|
||||
|
||||
|
||||
|
||||
@ -9,7 +9,6 @@ _CONFIG_REGISTRY = {
|
||||
"mpt": MPTConfig,
|
||||
"RefinedWeb": RWConfig, # For tiiuae/falcon-40b(-instruct)
|
||||
"RefinedWebModel": RWConfig, # For tiiuae/falcon-7b(-instruct)
|
||||
"starcoder2": Starcoder2Config,
|
||||
"jais": JAISConfig,
|
||||
}
|
||||
|
||||
@ -18,15 +17,6 @@ def get_config(model: str,
|
||||
trust_remote_code: bool,
|
||||
revision: Optional[str] = None,
|
||||
code_revision: Optional[str] = None) -> PretrainedConfig:
|
||||
# FIXME(woosuk): This is a temporary fix for StarCoder2.
|
||||
# Remove this when the model is supported by HuggingFace transformers.
|
||||
if "bigcode" in model and "starcoder2" in model:
|
||||
config_class = _CONFIG_REGISTRY["starcoder2"]
|
||||
config = config_class.from_pretrained(model,
|
||||
revision=revision,
|
||||
code_revision=code_revision)
|
||||
return config
|
||||
|
||||
try:
|
||||
config = AutoConfig.from_pretrained(
|
||||
model,
|
||||
|
||||
@ -4,13 +4,11 @@ from vllm.transformers_utils.configs.mpt import MPTConfig
|
||||
# tiiuae/falcon-7b(-instruct) models. Newer Falcon models will use the
|
||||
# `FalconConfig` class from the official HuggingFace transformers library.
|
||||
from vllm.transformers_utils.configs.falcon import RWConfig
|
||||
from vllm.transformers_utils.configs.starcoder2 import Starcoder2Config
|
||||
from vllm.transformers_utils.configs.jais import JAISConfig
|
||||
|
||||
__all__ = [
|
||||
"ChatGLMConfig",
|
||||
"MPTConfig",
|
||||
"RWConfig",
|
||||
"Starcoder2Config",
|
||||
"JAISConfig",
|
||||
]
|
||||
|
||||
@ -1,55 +0,0 @@
|
||||
from transformers import PretrainedConfig
|
||||
|
||||
|
||||
class Starcoder2Config(PretrainedConfig):
|
||||
model_type = "starcoder2"
|
||||
keys_to_ignore_at_inference = ["past_key_values"]
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
vocab_size=49152,
|
||||
hidden_size=3072,
|
||||
intermediate_size=12288,
|
||||
num_hidden_layers=30,
|
||||
num_attention_heads=24,
|
||||
num_key_value_heads=2,
|
||||
hidden_act="gelu_pytorch_tanh",
|
||||
max_position_embeddings=4096,
|
||||
initializer_range=0.018042,
|
||||
norm_epsilon=1e-5,
|
||||
use_cache=True,
|
||||
bos_token_id=50256,
|
||||
eos_token_id=50256,
|
||||
rope_theta=10000.0,
|
||||
sliding_window=None,
|
||||
attention_dropout=0.0,
|
||||
residual_dropout=0.0,
|
||||
embedding_dropout=0.0,
|
||||
use_bias=True,
|
||||
**kwargs,
|
||||
):
|
||||
self.vocab_size = vocab_size
|
||||
self.max_position_embeddings = max_position_embeddings
|
||||
self.hidden_size = hidden_size
|
||||
self.intermediate_size = intermediate_size
|
||||
self.num_hidden_layers = num_hidden_layers
|
||||
self.num_attention_heads = num_attention_heads
|
||||
self.sliding_window = sliding_window
|
||||
self.use_bias = use_bias
|
||||
self.num_key_value_heads = num_key_value_heads
|
||||
self.hidden_act = hidden_act
|
||||
self.initializer_range = initializer_range
|
||||
self.norm_epsilon = norm_epsilon
|
||||
self.use_cache = use_cache
|
||||
self.rope_theta = rope_theta
|
||||
self.attention_dropout = attention_dropout
|
||||
self.residual_dropout = residual_dropout
|
||||
self.embedding_dropout = embedding_dropout
|
||||
|
||||
super().__init__(
|
||||
bos_token_id=bos_token_id,
|
||||
eos_token_id=eos_token_id,
|
||||
**kwargs,
|
||||
)
|
||||
if self.architectures is None:
|
||||
self.architectures = ['Starcoder2ForCausalLM']
|
||||
Loading…
Reference in New Issue
Block a user