vllm/vllm/model_executor/models
2024-02-05 17:38:02 -08:00
..
__init__.py Add Internlm2 (#2666) 2024-02-01 09:27:40 -08:00
aquila.py Use NCCL instead of ray for control-plane communication to remove serialization overhead (#2221) 2024-01-03 11:30:22 -08:00
baichuan.py Use NCCL instead of ray for control-plane communication to remove serialization overhead (#2221) 2024-01-03 11:30:22 -08:00
bloom.py Use NCCL instead of ray for control-plane communication to remove serialization overhead (#2221) 2024-01-03 11:30:22 -08:00
chatglm.py Use NCCL instead of ray for control-plane communication to remove serialization overhead (#2221) 2024-01-03 11:30:22 -08:00
decilm.py Added DeciLM-7b and DeciLM-7b-instruct (#2062) 2023-12-19 02:29:33 -08:00
deepseek.py Add fused top-K softmax kernel for MoE (#2769) 2024-02-05 17:38:02 -08:00
falcon.py Use NCCL instead of ray for control-plane communication to remove serialization overhead (#2221) 2024-01-03 11:30:22 -08:00
gpt2.py Use NCCL instead of ray for control-plane communication to remove serialization overhead (#2221) 2024-01-03 11:30:22 -08:00
gpt_bigcode.py Use NCCL instead of ray for control-plane communication to remove serialization overhead (#2221) 2024-01-03 11:30:22 -08:00
gpt_j.py Use NCCL instead of ray for control-plane communication to remove serialization overhead (#2221) 2024-01-03 11:30:22 -08:00
gpt_neox.py Use NCCL instead of ray for control-plane communication to remove serialization overhead (#2221) 2024-01-03 11:30:22 -08:00
internlm2.py Add Internlm2 (#2666) 2024-02-01 09:27:40 -08:00
internlm.py Use NCCL instead of ray for control-plane communication to remove serialization overhead (#2221) 2024-01-03 11:30:22 -08:00
llama.py [Experimental] Add multi-LoRA support (#1804) 2024-01-23 15:26:37 -08:00
mistral.py [Experimental] Add multi-LoRA support (#1804) 2024-01-23 15:26:37 -08:00
mixtral_quant.py Add quantized mixtral support (#2673) 2024-01-30 16:34:10 -08:00
mixtral.py Add fused top-K softmax kernel for MoE (#2769) 2024-02-05 17:38:02 -08:00
mpt.py Use NCCL instead of ray for control-plane communication to remove serialization overhead (#2221) 2024-01-03 11:30:22 -08:00
opt.py Use NCCL instead of ray for control-plane communication to remove serialization overhead (#2221) 2024-01-03 11:30:22 -08:00
phi.py Address Phi modeling update 2 (#2428) 2024-01-12 12:16:49 -08:00
qwen2.py fix names and license for Qwen2 (#2589) 2024-01-24 22:37:51 -08:00
qwen.py Use NCCL instead of ray for control-plane communication to remove serialization overhead (#2221) 2024-01-03 11:30:22 -08:00
stablelm.py Support for Stable LM 2 (#2598) 2024-01-26 12:45:19 -08:00
yi.py Use NCCL instead of ray for control-plane communication to remove serialization overhead (#2221) 2024-01-03 11:30:22 -08:00