| .. |
|
__init__.py
|
Change the name to vLLM (#150)
|
2023-06-17 03:07:40 -07:00 |
|
cache_engine.py
|
[Model] Jamba support (#4115)
|
2024-07-02 23:11:29 +00:00 |
|
cpu_enc_dec_model_runner.py
|
[Hardware][CPU] Cross-attention and Encoder-Decoder models support on CPU backend (#9089)
|
2024-10-07 06:50:35 +00:00 |
|
cpu_model_runner.py
|
[Core][Frontend] Add Support for Inference Time mm_processor_kwargs (#9131)
|
2024-10-08 14:12:56 +00:00 |
|
cpu_worker.py
|
[Hardware][CPU] Support AWQ for CPU backend (#7515)
|
2024-10-09 10:28:08 -06:00 |
|
embedding_model_runner.py
|
[Model] PP support for embedding models and update docs (#9090)
|
2024-10-06 16:35:27 +08:00 |
|
enc_dec_model_runner.py
|
[misc] add forward context for attention (#9029)
|
2024-10-03 12:09:42 -07:00 |
|
model_runner_base.py
|
[MISC] Skip dumping inputs when unpicklable (#8744)
|
2024-09-24 06:10:03 +00:00 |
|
model_runner.py
|
[torch.compile] integration with compilation control (#9058)
|
2024-10-10 12:39:36 -07:00 |
|
multi_step_model_runner.py
|
[Bugfix] Fix PP for Multi-Step (#8887)
|
2024-09-28 08:52:46 -07:00 |
|
multi_step_tpu_worker.py
|
[TPU] Implement multi-step scheduling (#8489)
|
2024-09-14 16:58:31 -07:00 |
|
multi_step_worker.py
|
[Core] Multi-Step + Single Step Prefills via Chunked Prefill code path (#8378)
|
2024-09-27 13:32:07 -07:00 |
|
neuron_model_runner.py
|
[Core][Frontend] Add Support for Inference Time mm_processor_kwargs (#9131)
|
2024-10-08 14:12:56 +00:00 |
|
neuron_worker.py
|
[Bugfix] neuron: enable tensor parallelism (#7562)
|
2024-08-26 15:13:13 -07:00 |
|
openvino_model_runner.py
|
[Core][Frontend] Add Support for Inference Time mm_processor_kwargs (#9131)
|
2024-10-08 14:12:56 +00:00 |
|
openvino_worker.py
|
[OpenVINO] Enable GPU support for OpenVINO vLLM backend (#8192)
|
2024-10-02 17:50:01 -04:00 |
|
tpu_model_runner.py
|
[misc] hide best_of from engine (#9261)
|
2024-10-10 21:30:44 -07:00 |
|
tpu_worker.py
|
[torch.compile] use empty tensor instead of None for profiling (#8875)
|
2024-09-27 08:11:32 -07:00 |
|
utils.py
|
[Model] Add support for the multi-modal Llama 3.2 model (#8811)
|
2024-09-25 13:29:32 -07:00 |
|
worker_base.py
|
[Core] Logprobs support in Multi-step (#7652)
|
2024-08-29 19:19:08 -07:00 |
|
worker.py
|
[CI/Build] Avoid CUDA initialization (#8534)
|
2024-09-18 10:38:11 +00:00 |
|
xpu_model_runner.py
|
[Intel GPU] Fix xpu decode input (#9145)
|
2024-10-08 03:51:14 +00:00 |
|
xpu_worker.py
|
[Hardware][Intel GPU] Add intel GPU pipeline parallel support. (#7810)
|
2024-08-27 10:07:02 -07:00 |