[MISC] rename CudaMemoryProfiler to DeviceMemoryProfiler (#8703)
This commit is contained in:
parent
8ca5051b9a
commit
ca2b628b3c
@ -757,7 +757,7 @@ def is_pin_memory_available() -> bool:
|
|||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
class CudaMemoryProfiler:
|
class DeviceMemoryProfiler:
|
||||||
|
|
||||||
def __init__(self, device: Optional[torch.types.Device] = None):
|
def __init__(self, device: Optional[torch.types.Device] = None):
|
||||||
self.device = device
|
self.device = device
|
||||||
|
|||||||
@ -45,7 +45,7 @@ from vllm.prompt_adapter.worker_manager import (
|
|||||||
LRUCacheWorkerPromptAdapterManager)
|
LRUCacheWorkerPromptAdapterManager)
|
||||||
from vllm.sampling_params import SamplingParams
|
from vllm.sampling_params import SamplingParams
|
||||||
from vllm.sequence import IntermediateTensors, SequenceGroupMetadata
|
from vllm.sequence import IntermediateTensors, SequenceGroupMetadata
|
||||||
from vllm.utils import (CudaMemoryProfiler, PyObjectCache, async_tensor_h2d,
|
from vllm.utils import (DeviceMemoryProfiler, PyObjectCache, async_tensor_h2d,
|
||||||
flatten_2d_lists, is_hip, is_pin_memory_available,
|
flatten_2d_lists, is_hip, is_pin_memory_available,
|
||||||
supports_dynamo)
|
supports_dynamo)
|
||||||
from vllm.worker.model_runner_base import (
|
from vllm.worker.model_runner_base import (
|
||||||
@ -1012,7 +1012,7 @@ class GPUModelRunnerBase(ModelRunnerBase[TModelInputForGPU]):
|
|||||||
|
|
||||||
def load_model(self) -> None:
|
def load_model(self) -> None:
|
||||||
logger.info("Starting to load model %s...", self.model_config.model)
|
logger.info("Starting to load model %s...", self.model_config.model)
|
||||||
with CudaMemoryProfiler() as m:
|
with DeviceMemoryProfiler() as m:
|
||||||
self.model = get_model(model_config=self.model_config,
|
self.model = get_model(model_config=self.model_config,
|
||||||
device_config=self.device_config,
|
device_config=self.device_config,
|
||||||
load_config=self.load_config,
|
load_config=self.load_config,
|
||||||
|
|||||||
@ -21,7 +21,7 @@ from vllm.multimodal import (MULTIMODAL_REGISTRY, BatchedTensorInputs,
|
|||||||
MultiModalInputs, MultiModalRegistry)
|
MultiModalInputs, MultiModalRegistry)
|
||||||
from vllm.sampling_params import SamplingParams
|
from vllm.sampling_params import SamplingParams
|
||||||
from vllm.sequence import IntermediateTensors, SequenceGroupMetadata
|
from vllm.sequence import IntermediateTensors, SequenceGroupMetadata
|
||||||
from vllm.utils import CudaMemoryProfiler, make_tensor_with_pad
|
from vllm.utils import DeviceMemoryProfiler, make_tensor_with_pad
|
||||||
from vllm.worker.model_runner import AttentionMetadata, SamplingMetadata
|
from vllm.worker.model_runner import AttentionMetadata, SamplingMetadata
|
||||||
from vllm.worker.model_runner_base import (
|
from vllm.worker.model_runner_base import (
|
||||||
ModelRunnerBase, ModelRunnerInputBase, ModelRunnerInputBuilderBase,
|
ModelRunnerBase, ModelRunnerInputBase, ModelRunnerInputBuilderBase,
|
||||||
@ -391,7 +391,7 @@ class XPUModelRunner(ModelRunnerBase[ModelInputForXPUWithSamplingMetadata]):
|
|||||||
self.model: nn.Module # Set after init_Model
|
self.model: nn.Module # Set after init_Model
|
||||||
|
|
||||||
def load_model(self) -> None:
|
def load_model(self) -> None:
|
||||||
with CudaMemoryProfiler() as m:
|
with DeviceMemoryProfiler() as m:
|
||||||
self.model = get_model(
|
self.model = get_model(
|
||||||
model_config=self.model_config,
|
model_config=self.model_config,
|
||||||
device_config=self.device_config,
|
device_config=self.device_config,
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user