| .. |
|
__init__.py
|
Change the name to vLLM (#150)
|
2023-06-17 03:07:40 -07:00 |
|
cache_engine.py
|
[Model] Jamba support (#4115)
|
2024-07-02 23:11:29 +00:00 |
|
cpu_model_runner.py
|
[VLM] Refactor MultiModalConfig initialization and profiling (#7530)
|
2024-08-17 13:30:55 -07:00 |
|
cpu_worker.py
|
[VLM] Refactor MultiModalConfig initialization and profiling (#7530)
|
2024-08-17 13:30:55 -07:00 |
|
embedding_model_runner.py
|
[VLM] Refactor MultiModalConfig initialization and profiling (#7530)
|
2024-08-17 13:30:55 -07:00 |
|
enc_dec_model_runner.py
|
[VLM] Refactor MultiModalConfig initialization and profiling (#7530)
|
2024-08-17 13:30:55 -07:00 |
|
model_runner_base.py
|
[core] Multi Step Scheduling (#7000)
|
2024-08-19 13:52:13 -07:00 |
|
model_runner.py
|
[VLM] Refactor MultiModalConfig initialization and profiling (#7530)
|
2024-08-17 13:30:55 -07:00 |
|
multi_step_model_runner.py
|
[core] Multi Step Scheduling (#7000)
|
2024-08-19 13:52:13 -07:00 |
|
multi_step_worker.py
|
[core] Multi Step Scheduling (#7000)
|
2024-08-19 13:52:13 -07:00 |
|
neuron_model_runner.py
|
[Bugfix] update neuron for version > 0.5.0 (#7175)
|
2024-08-15 09:44:14 -07:00 |
|
neuron_worker.py
|
[Bugfix] update neuron for version > 0.5.0 (#7175)
|
2024-08-15 09:44:14 -07:00 |
|
openvino_model_runner.py
|
[Bugfix] Fix broadcasting logic for multi_modal_kwargs (#6836)
|
2024-07-31 10:38:45 +08:00 |
|
openvino_worker.py
|
[core][distributed] support n layers % pp size != 0 (#6115)
|
2024-07-03 16:40:31 -07:00 |
|
tpu_model_runner.py
|
[TPU] Remove redundant input tensor cloning (#7660)
|
2024-08-19 15:55:04 -07:00 |
|
tpu_worker.py
|
[TPU] Skip creating empty tensor (#7630)
|
2024-08-17 14:22:46 -07:00 |
|
utils.py
|
[VLM] Refactor MultiModalConfig initialization and profiling (#7530)
|
2024-08-17 13:30:55 -07:00 |
|
worker_base.py
|
[core] Multi Step Scheduling (#7000)
|
2024-08-19 13:52:13 -07:00 |
|
worker.py
|
[Core] Optimize SPMD architecture with delta + serialization optimization (#7109)
|
2024-08-18 17:57:20 -07:00 |
|
xpu_model_runner.py
|
[Hardware] [Intel GPU] refactor xpu worker/executor (#7686)
|
2024-08-20 09:54:10 -07:00 |
|
xpu_worker.py
|
[Hardware] [Intel GPU] refactor xpu worker/executor (#7686)
|
2024-08-20 09:54:10 -07:00 |