| .. |
|
__init__.py
|
Change the name to vLLM (#150)
|
2023-06-17 03:07:40 -07:00 |
|
cache_engine.py
|
[Model] Jamba support (#4115)
|
2024-07-02 23:11:29 +00:00 |
|
cpu_model_runner.py
|
[Misc] Raise error when using encoder/decoder model with cpu backend (#8355)
|
2024-09-12 05:45:24 +00:00 |
|
cpu_worker.py
|
[Hardware][Intel] Support compressed-tensor W8A8 for CPU backend (#7257)
|
2024-09-11 09:46:46 -07:00 |
|
embedding_model_runner.py
|
[VLM] Refactor MultiModalConfig initialization and profiling (#7530)
|
2024-08-17 13:30:55 -07:00 |
|
enc_dec_model_runner.py
|
[Encoder decoder] Add cuda graph support during decoding for encoder-decoder models (#7631)
|
2024-09-17 07:35:01 -07:00 |
|
model_runner_base.py
|
[Misc] Don't dump contents of kvcache tensors on errors (#8527)
|
2024-09-17 12:24:29 -07:00 |
|
model_runner.py
|
[Encoder decoder] Add cuda graph support during decoding for encoder-decoder models (#7631)
|
2024-09-17 07:35:01 -07:00 |
|
multi_step_model_runner.py
|
[Core] *Prompt* logprobs support in Multi-step (#8199)
|
2024-09-18 08:38:43 -07:00 |
|
multi_step_tpu_worker.py
|
[TPU] Implement multi-step scheduling (#8489)
|
2024-09-14 16:58:31 -07:00 |
|
multi_step_worker.py
|
[Core] Optimize Async + Multi-step (#8050)
|
2024-09-03 18:50:29 +00:00 |
|
neuron_model_runner.py
|
[Neuron] Adding support for adding/ overriding neuron configuration a… (#8062)
|
2024-09-04 16:33:43 -07:00 |
|
neuron_worker.py
|
[Bugfix] neuron: enable tensor parallelism (#7562)
|
2024-08-26 15:13:13 -07:00 |
|
openvino_model_runner.py
|
[Core] Logprobs support in Multi-step (#7652)
|
2024-08-29 19:19:08 -07:00 |
|
openvino_worker.py
|
[Core] Logprobs support in Multi-step (#7652)
|
2024-08-29 19:19:08 -07:00 |
|
tpu_model_runner.py
|
[TPU] Implement multi-step scheduling (#8489)
|
2024-09-14 16:58:31 -07:00 |
|
tpu_worker.py
|
[TPU][Bugfix] Use XLA rank for persistent cache path (#8137)
|
2024-09-03 18:35:33 -07:00 |
|
utils.py
|
[Encoder decoder] Add cuda graph support during decoding for encoder-decoder models (#7631)
|
2024-09-17 07:35:01 -07:00 |
|
worker_base.py
|
[Core] Logprobs support in Multi-step (#7652)
|
2024-08-29 19:19:08 -07:00 |
|
worker.py
|
[CI/Build] Avoid CUDA initialization (#8534)
|
2024-09-18 10:38:11 +00:00 |
|
xpu_model_runner.py
|
[Core] Logprobs support in Multi-step (#7652)
|
2024-08-29 19:19:08 -07:00 |
|
xpu_worker.py
|
[Hardware][Intel GPU] Add intel GPU pipeline parallel support. (#7810)
|
2024-08-27 10:07:02 -07:00 |