diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py index 784c0caf..83dfbc2e 100644 --- a/vllm/engine/llm_engine.py +++ b/vllm/engine/llm_engine.py @@ -153,7 +153,7 @@ class LLMEngine: placement_group=placement_group, placement_group_capture_child_tasks=True), **ray_remote_kwargs, - )(RayWorker).remote() + )(RayWorker).remote(self.model_config.trust_remote_code) self.workers.append(worker) # Initialize torch distributed process group for the workers. diff --git a/vllm/engine/ray_utils.py b/vllm/engine/ray_utils.py index f085f922..2982f0f9 100644 --- a/vllm/engine/ray_utils.py +++ b/vllm/engine/ray_utils.py @@ -11,7 +11,11 @@ try: """Ray wrapper for vllm.worker.Worker, allowing Worker to be lazliy initialized after Ray sets CUDA_VISIBLE_DEVICES.""" - def __init__(self) -> None: + def __init__(self, init_cached_hf_modules=False) -> None: + if init_cached_hf_modules: + # pylint: disable=import-outside-toplevel + from transformers.dynamic_module_utils import init_hf_modules + init_hf_modules() self.worker = None def init_worker(self, worker_init_fn):