| .. |
|
__init__.py
|
Change the name to vLLM (#150)
|
2023-06-17 03:07:40 -07:00 |
|
cache_engine.py
|
[Model] Jamba support (#4115)
|
2024-07-02 23:11:29 +00:00 |
|
cpu_model_runner.py
|
[vlm] Remove vision language config. (#6089)
|
2024-07-03 22:14:16 +00:00 |
|
cpu_worker.py
|
[Hardware][Intel CPU] Adding intel openmp tunings in Docker file (#6008)
|
2024-07-04 15:22:12 -07:00 |
|
embedding_model_runner.py
|
[vlm] Remove vision language config. (#6089)
|
2024-07-03 22:14:16 +00:00 |
|
model_runner_base.py
|
[Model] Jamba support (#4115)
|
2024-07-02 23:11:29 +00:00 |
|
model_runner.py
|
[VLM] Calculate maximum number of multi-modal tokens by model (#6121)
|
2024-07-04 16:37:23 -07:00 |
|
neuron_model_runner.py
|
[Core] Dynamic image size support for VLMs (#5276)
|
2024-07-02 20:34:00 -07:00 |
|
neuron_worker.py
|
[Core] Pipeline Parallel Support (#4412)
|
2024-07-02 10:58:08 -07:00 |
|
openvino_model_runner.py
|
[vlm] Remove vision language config. (#6089)
|
2024-07-03 22:14:16 +00:00 |
|
openvino_worker.py
|
[core][distributed] support n layers % pp size != 0 (#6115)
|
2024-07-03 16:40:31 -07:00 |
|
tpu_model_runner.py
|
[vlm] Remove vision language config. (#6089)
|
2024-07-03 22:14:16 +00:00 |
|
tpu_worker.py
|
[core][distributed] support n layers % pp size != 0 (#6115)
|
2024-07-03 16:40:31 -07:00 |
|
worker_base.py
|
[Model] Jamba support (#4115)
|
2024-07-02 23:11:29 +00:00 |
|
worker.py
|
[core][distributed] support n layers % pp size != 0 (#6115)
|
2024-07-03 16:40:31 -07:00 |
|
xpu_model_runner.py
|
[VLM] Calculate maximum number of multi-modal tokens by model (#6121)
|
2024-07-04 16:37:23 -07:00 |
|
xpu_worker.py
|
[core][distributed] support n layers % pp size != 0 (#6115)
|
2024-07-03 16:40:31 -07:00 |