vllm/worker at 18b296fdb2248e8a65bf005e7193ebd523b875b6 - vllm

History

youkaichao 18b296fdb2 [core] remove beam search from the core (#9105 )		2024-10-07 05:47:04 +00:00
..
__init__.py	Change the name to vLLM (#150 )	2023-06-17 03:07:40 -07:00
cache_engine.py	[Model] Jamba support (#4115 )	2024-07-02 23:11:29 +00:00
cpu_model_runner.py	[Bugfix][Hardware][CPU] Fix CPU model input for decode (#9044 )	2024-10-06 19:14:27 -07:00
cpu_worker.py	[Hardware][Intel] Support compressed-tensor W8A8 for CPU backend (#7257 )	2024-09-11 09:46:46 -07:00
embedding_model_runner.py	[Model] PP support for embedding models and update docs (#9090 )	2024-10-06 16:35:27 +08:00
enc_dec_model_runner.py	[misc] add forward context for attention (#9029 )	2024-10-03 12:09:42 -07:00
model_runner_base.py	[MISC] Skip dumping inputs when unpicklable (#8744 )	2024-09-24 06:10:03 +00:00
model_runner.py	[Misc] Move registry to its own file (#9064 )	2024-10-04 10:01:37 +00:00
multi_step_model_runner.py	[Bugfix] Fix PP for Multi-Step (#8887 )	2024-09-28 08:52:46 -07:00
multi_step_tpu_worker.py	[TPU] Implement multi-step scheduling (#8489 )	2024-09-14 16:58:31 -07:00
multi_step_worker.py	[Core] Multi-Step + Single Step Prefills via Chunked Prefill code path (#8378 )	2024-09-27 13:32:07 -07:00
neuron_model_runner.py	[Hardware][Neuron] Add on-device sampling support for Neuron (#8746 )	2024-10-04 16:42:20 -07:00
neuron_worker.py	[Bugfix] neuron: enable tensor parallelism (#7562 )	2024-08-26 15:13:13 -07:00
openvino_model_runner.py	[OpenVINO] Enable GPU support for OpenVINO vLLM backend (#8192 )	2024-10-02 17:50:01 -04:00
openvino_worker.py	[OpenVINO] Enable GPU support for OpenVINO vLLM backend (#8192 )	2024-10-02 17:50:01 -04:00
tpu_model_runner.py	[core] remove beam search from the core (#9105 )	2024-10-07 05:47:04 +00:00
tpu_worker.py	[torch.compile] use empty tensor instead of None for profiling (#8875 )	2024-09-27 08:11:32 -07:00
utils.py	[Model] Add support for the multi-modal Llama 3.2 model (#8811 )	2024-09-25 13:29:32 -07:00
worker_base.py	[Core] Logprobs support in Multi-step (#7652 )	2024-08-29 19:19:08 -07:00
worker.py	[CI/Build] Avoid CUDA initialization (#8534 )	2024-09-18 10:38:11 +00:00
xpu_model_runner.py	[torch.compile] use empty tensor instead of None for profiling (#8875 )	2024-09-27 08:11:32 -07:00
xpu_worker.py	[Hardware][Intel GPU] Add intel GPU pipeline parallel support. (#7810 )	2024-08-27 10:07:02 -07:00