| .. |
|
__init__.py
|
Add distributed model executor abstraction (#3191)
|
2024-03-11 11:03:45 -07:00 |
|
cpu_executor.py
|
[LoRA] Add support for pinning lora adapters in the LRU cache (#5603)
|
2024-06-21 15:42:46 -07:00 |
|
distributed_gpu_executor.py
|
[Core] Pipeline Parallel Support (#4412)
|
2024-07-02 10:58:08 -07:00 |
|
executor_base.py
|
[Core] Pipeline Parallel Support (#4412)
|
2024-07-02 10:58:08 -07:00 |
|
gpu_executor.py
|
[Core] Pipeline Parallel Support (#4412)
|
2024-07-02 10:58:08 -07:00 |
|
multiproc_gpu_executor.py
|
[Core] Pipeline Parallel Support (#4412)
|
2024-07-02 10:58:08 -07:00 |
|
multiproc_worker_utils.py
|
[Core][Doc] Default to multiprocessing for single-node distributed case (#5230)
|
2024-06-11 11:10:41 -07:00 |
|
neuron_executor.py
|
[Core] Refactor Worker and ModelRunner to consolidate control plane communication (#5408)
|
2024-06-25 20:30:03 -07:00 |
|
openvino_executor.py
|
[Hardware][Intel] OpenVINO vLLM backend (#5379)
|
2024-06-28 13:50:16 +00:00 |
|
ray_gpu_executor.py
|
[Core] Pipeline Parallel Support (#4412)
|
2024-07-02 10:58:08 -07:00 |
|
ray_utils.py
|
[Hardware][Intel GPU] Add Intel GPU(XPU) inference backend (#3814)
|
2024-06-17 11:01:25 -07:00 |
|
ray_xpu_executor.py
|
[Hardware][Intel GPU] Add Intel GPU(XPU) inference backend (#3814)
|
2024-06-17 11:01:25 -07:00 |
|
tpu_executor.py
|
[Hardware][TPU] Refactor TPU backend (#5831)
|
2024-06-25 15:25:52 -07:00 |
|
xpu_executor.py
|
[Hardware][Intel GPU] Add Intel GPU(XPU) inference backend (#3814)
|
2024-06-17 11:01:25 -07:00 |