| .. |
|
__init__.py
|
Change the name to vLLM (#150)
|
2023-06-17 03:07:40 -07:00 |
|
cache_engine.py
|
[Kernel] Support sliding window in flash attention backend (#9403)
|
2024-10-20 10:57:52 -07:00 |
|
cpu_embedding_model_runner.py
|
[Bugfix][Hardware][CPU] Fix broken encoder-decoder CPU runner (#10218)
|
2024-11-11 12:37:58 +00:00 |
|
cpu_enc_dec_model_runner.py
|
[Bugfix][Hardware][CPU] Fix broken encoder-decoder CPU runner (#10218)
|
2024-11-11 12:37:58 +00:00 |
|
cpu_model_runner.py
|
[Hardware][CPU] Add embedding models support for CPU backend (#10193)
|
2024-11-11 08:54:28 +00:00 |
|
cpu_worker.py
|
[Hardware][CPU] Add embedding models support for CPU backend (#10193)
|
2024-11-11 08:54:28 +00:00 |
|
embedding_model_runner.py
|
[0/N] Rename MultiModalInputs to MultiModalKwargs (#10040)
|
2024-11-09 11:31:02 +08:00 |
|
enc_dec_model_runner.py
|
[0/N] Rename MultiModalInputs to MultiModalKwargs (#10040)
|
2024-11-09 11:31:02 +08:00 |
|
hpu_model_runner.py
|
[0/N] Rename MultiModalInputs to MultiModalKwargs (#10040)
|
2024-11-09 11:31:02 +08:00 |
|
hpu_worker.py
|
[Hardware][Intel-Gaudi] Add Intel Gaudi (HPU) inference backend (#6143)
|
2024-11-06 01:09:10 -08:00 |
|
model_runner_base.py
|
[2/N] executor pass the complete config to worker/modelrunner (#9938)
|
2024-11-02 07:35:05 -07:00 |
|
model_runner.py
|
[0/N] Rename MultiModalInputs to MultiModalKwargs (#10040)
|
2024-11-09 11:31:02 +08:00 |
|
multi_step_model_runner.py
|
[2/N] executor pass the complete config to worker/modelrunner (#9938)
|
2024-11-02 07:35:05 -07:00 |
|
multi_step_tpu_worker.py
|
[TPU] Implement multi-step scheduling (#8489)
|
2024-09-14 16:58:31 -07:00 |
|
multi_step_worker.py
|
[2/N] executor pass the complete config to worker/modelrunner (#9938)
|
2024-11-02 07:35:05 -07:00 |
|
neuron_model_runner.py
|
[0/N] Rename MultiModalInputs to MultiModalKwargs (#10040)
|
2024-11-09 11:31:02 +08:00 |
|
neuron_worker.py
|
[2/N] executor pass the complete config to worker/modelrunner (#9938)
|
2024-11-02 07:35:05 -07:00 |
|
openvino_model_runner.py
|
[0/N] Rename MultiModalInputs to MultiModalKwargs (#10040)
|
2024-11-09 11:31:02 +08:00 |
|
openvino_worker.py
|
[2/N] executor pass the complete config to worker/modelrunner (#9938)
|
2024-11-02 07:35:05 -07:00 |
|
tpu_model_runner.py
|
[3/N] model runner pass the whole config to model (#9958)
|
2024-11-02 12:08:49 -07:00 |
|
tpu_worker.py
|
[2/N] executor pass the complete config to worker/modelrunner (#9938)
|
2024-11-02 07:35:05 -07:00 |
|
utils.py
|
[Doc] Compatibility matrix for mutual exclusive features (#8512)
|
2024-10-11 11:18:50 -07:00 |
|
worker_base.py
|
[2/N] executor pass the complete config to worker/modelrunner (#9938)
|
2024-11-02 07:35:05 -07:00 |
|
worker.py
|
[Misc] Consolidate ModelConfig code related to HF config (#10104)
|
2024-11-07 06:00:21 +00:00 |
|
xpu_model_runner.py
|
[0/N] Rename MultiModalInputs to MultiModalKwargs (#10040)
|
2024-11-09 11:31:02 +08:00 |
|
xpu_worker.py
|
[2/N] executor pass the complete config to worker/modelrunner (#9938)
|
2024-11-02 07:35:05 -07:00 |