[Core] Move ray_utils.py from engine to executor package (#4347)
This commit is contained in:
parent
96e90fdeb3
commit
479d69fad0
@ -3,8 +3,8 @@
|
|||||||
from vllm.engine.arg_utils import AsyncEngineArgs, EngineArgs
|
from vllm.engine.arg_utils import AsyncEngineArgs, EngineArgs
|
||||||
from vllm.engine.async_llm_engine import AsyncLLMEngine
|
from vllm.engine.async_llm_engine import AsyncLLMEngine
|
||||||
from vllm.engine.llm_engine import LLMEngine
|
from vllm.engine.llm_engine import LLMEngine
|
||||||
from vllm.engine.ray_utils import initialize_ray_cluster
|
|
||||||
from vllm.entrypoints.llm import LLM
|
from vllm.entrypoints.llm import LLM
|
||||||
|
from vllm.executor.ray_utils import initialize_ray_cluster
|
||||||
from vllm.model_executor.models import ModelRegistry
|
from vllm.model_executor.models import ModelRegistry
|
||||||
from vllm.outputs import CompletionOutput, RequestOutput
|
from vllm.outputs import CompletionOutput, RequestOutput
|
||||||
from vllm.sampling_params import SamplingParams
|
from vllm.sampling_params import SamplingParams
|
||||||
|
|||||||
@ -10,7 +10,7 @@ from transformers import PreTrainedTokenizer
|
|||||||
from vllm.config import ModelConfig
|
from vllm.config import ModelConfig
|
||||||
from vllm.engine.arg_utils import AsyncEngineArgs
|
from vllm.engine.arg_utils import AsyncEngineArgs
|
||||||
from vllm.engine.llm_engine import LLMEngine
|
from vllm.engine.llm_engine import LLMEngine
|
||||||
from vllm.engine.ray_utils import initialize_ray_cluster, ray
|
from vllm.executor.ray_utils import initialize_ray_cluster, ray
|
||||||
from vllm.logger import init_logger
|
from vllm.logger import init_logger
|
||||||
from vllm.lora.request import LoRARequest
|
from vllm.lora.request import LoRARequest
|
||||||
from vllm.outputs import RequestOutput
|
from vllm.outputs import RequestOutput
|
||||||
|
|||||||
@ -15,8 +15,8 @@ from vllm.engine.output_processor.interfaces import (
|
|||||||
SequenceGroupOutputProcessor)
|
SequenceGroupOutputProcessor)
|
||||||
from vllm.engine.output_processor.stop_checker import StopChecker
|
from vllm.engine.output_processor.stop_checker import StopChecker
|
||||||
from vllm.engine.output_processor.util import create_output_by_sequence_group
|
from vllm.engine.output_processor.util import create_output_by_sequence_group
|
||||||
from vllm.engine.ray_utils import initialize_ray_cluster
|
|
||||||
from vllm.executor.executor_base import ExecutorBase
|
from vllm.executor.executor_base import ExecutorBase
|
||||||
|
from vllm.executor.ray_utils import initialize_ray_cluster
|
||||||
from vllm.logger import init_logger
|
from vllm.logger import init_logger
|
||||||
from vllm.lora.request import LoRARequest
|
from vllm.lora.request import LoRARequest
|
||||||
from vllm.outputs import RequestOutput
|
from vllm.outputs import RequestOutput
|
||||||
|
|||||||
@ -5,8 +5,8 @@ from collections import defaultdict
|
|||||||
from itertools import islice, repeat
|
from itertools import islice, repeat
|
||||||
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Set, Tuple
|
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Set, Tuple
|
||||||
|
|
||||||
from vllm.engine.ray_utils import RayWorkerWrapper, ray
|
|
||||||
from vllm.executor.executor_base import ExecutorAsyncBase, ExecutorBase
|
from vllm.executor.executor_base import ExecutorAsyncBase, ExecutorBase
|
||||||
|
from vllm.executor.ray_utils import RayWorkerWrapper, ray
|
||||||
from vllm.logger import init_logger
|
from vllm.logger import init_logger
|
||||||
from vllm.lora.request import LoRARequest
|
from vllm.lora.request import LoRARequest
|
||||||
from vllm.sequence import SamplerOutput, SequenceGroupMetadata
|
from vllm.sequence import SamplerOutput, SequenceGroupMetadata
|
||||||
@ -74,7 +74,7 @@ class RayGPUExecutor(ExecutorBase):
|
|||||||
|
|
||||||
# The driver dummy worker does not actually use any resources.
|
# The driver dummy worker does not actually use any resources.
|
||||||
# It holds the resource for the driver worker.
|
# It holds the resource for the driver worker.
|
||||||
self.driver_dummy_worker: RayWorkerWrapper = None
|
self.driver_dummy_worker: Optional[RayWorkerWrapper] = None
|
||||||
# The remaining workers are the actual ray actors.
|
# The remaining workers are the actual ray actors.
|
||||||
self.workers: List[RayWorkerWrapper] = []
|
self.workers: List[RayWorkerWrapper] = []
|
||||||
|
|
||||||
@ -318,6 +318,7 @@ class RayGPUExecutor(ExecutorBase):
|
|||||||
driver_worker_output = self.driver_worker.execute_method(
|
driver_worker_output = self.driver_worker.execute_method(
|
||||||
method, *driver_args, **driver_kwargs)
|
method, *driver_args, **driver_kwargs)
|
||||||
else:
|
else:
|
||||||
|
assert self.driver_dummy_worker is not None
|
||||||
driver_worker_output = ray.get(
|
driver_worker_output = ray.get(
|
||||||
self.driver_dummy_worker.execute_method.remote(
|
self.driver_dummy_worker.execute_method.remote(
|
||||||
method, *driver_args, **driver_kwargs))
|
method, *driver_args, **driver_kwargs))
|
||||||
@ -353,8 +354,9 @@ class RayGPUExecutor(ExecutorBase):
|
|||||||
# a dummy value for now. It will be fixed soon.
|
# a dummy value for now. It will be fixed soon.
|
||||||
with InputNode() as input_data:
|
with InputNode() as input_data:
|
||||||
forward_dag = MultiOutputNode([
|
forward_dag = MultiOutputNode([
|
||||||
worker.execute_model_compiled_dag_remote.bind(input_data)
|
worker.execute_model_compiled_dag_remote.
|
||||||
for worker in self.workers
|
bind( # type: ignore[attr-defined]
|
||||||
|
input_data) for worker in self.workers
|
||||||
])
|
])
|
||||||
return forward_dag.experimental_compile()
|
return forward_dag.experimental_compile()
|
||||||
|
|
||||||
|
|||||||
@ -1,7 +1,7 @@
|
|||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
from vllm.config import TokenizerPoolConfig
|
from vllm.config import TokenizerPoolConfig
|
||||||
from vllm.engine.ray_utils import ray
|
from vllm.executor.ray_utils import ray
|
||||||
from vllm.transformers_utils.tokenizer_group.base_tokenizer_group import (
|
from vllm.transformers_utils.tokenizer_group.base_tokenizer_group import (
|
||||||
BaseTokenizerGroup)
|
BaseTokenizerGroup)
|
||||||
from vllm.transformers_utils.tokenizer_group.tokenizer_group import (
|
from vllm.transformers_utils.tokenizer_group.tokenizer_group import (
|
||||||
|
|||||||
@ -6,7 +6,7 @@ from ray.util.scheduling_strategies import NodeAffinitySchedulingStrategy
|
|||||||
from transformers import PreTrainedTokenizer
|
from transformers import PreTrainedTokenizer
|
||||||
|
|
||||||
from vllm.config import TokenizerPoolConfig
|
from vllm.config import TokenizerPoolConfig
|
||||||
from vllm.engine.ray_utils import ray
|
from vllm.executor.ray_utils import ray
|
||||||
from vllm.lora.request import LoRARequest
|
from vllm.lora.request import LoRARequest
|
||||||
from vllm.transformers_utils.tokenizer_group.base_tokenizer_group import (
|
from vllm.transformers_utils.tokenizer_group.base_tokenizer_group import (
|
||||||
BaseTokenizerGroup)
|
BaseTokenizerGroup)
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user