| .. |
|
__init__.py
|
Change the name to vLLM (#150)
|
2023-06-17 03:07:40 -07:00 |
|
cache_engine.py
|
[Model] Jamba support (#4115)
|
2024-07-02 23:11:29 +00:00 |
|
cpu_model_runner.py
|
[Misc] Consolidate and optimize logic for building padded tensors (#6541)
|
2024-07-20 04:17:24 +00:00 |
|
cpu_worker.py
|
[CORE] Adding support for insertion of soft-tuned prompts (#4645)
|
2024-07-09 13:26:36 -07:00 |
|
embedding_model_runner.py
|
[CORE] Adding support for insertion of soft-tuned prompts (#4645)
|
2024-07-09 13:26:36 -07:00 |
|
model_runner_base.py
|
[Misc] Add a wrapper for torch.inference_mode (#6618)
|
2024-07-21 18:43:11 -07:00 |
|
model_runner.py
|
[misc] add start loading models for users information (#6670)
|
2024-07-22 20:08:02 -07:00 |
|
neuron_model_runner.py
|
[Misc] Consolidate and optimize logic for building padded tensors (#6541)
|
2024-07-20 04:17:24 +00:00 |
|
neuron_worker.py
|
[Core] Pipeline Parallel Support (#4412)
|
2024-07-02 10:58:08 -07:00 |
|
openvino_model_runner.py
|
[vlm] Remove vision language config. (#6089)
|
2024-07-03 22:14:16 +00:00 |
|
openvino_worker.py
|
[core][distributed] support n layers % pp size != 0 (#6115)
|
2024-07-03 16:40:31 -07:00 |
|
tpu_model_runner.py
|
[TPU] Refactor TPU worker & model runner (#6506)
|
2024-07-18 01:34:16 -07:00 |
|
tpu_worker.py
|
[TPU] Refactor TPU worker & model runner (#6506)
|
2024-07-18 01:34:16 -07:00 |
|
worker_base.py
|
[Misc] Add a wrapper for torch.inference_mode (#6618)
|
2024-07-21 18:43:11 -07:00 |
|
worker.py
|
[Misc] Use torch.Tensor for type annotation (#6505)
|
2024-07-17 13:01:10 +00:00 |
|
xpu_model_runner.py
|
[Misc] Consolidate and optimize logic for building padded tensors (#6541)
|
2024-07-20 04:17:24 +00:00 |
|
xpu_worker.py
|
[CORE] Adding support for insertion of soft-tuned prompts (#4645)
|
2024-07-09 13:26:36 -07:00 |