[Bugfix] More type hint fixes for py 3.8 (#4039)

2024-04-12 21:07:04 -07:00 · 2024-04-12 21:07:04 -07:00 · 5c2e66e487
commit 5c2e66e487
parent 546e721168
4 changed files with 8 additions and 8 deletions
--- a/vllm/executor/executor_base.py
+++ b/vllm/executor/executor_base.py
@ -39,7 +39,7 @@ class ExecutorBase(ABC):
        ExecutorBase may require modification of the result, e.g. to ensure the
        selected cache sizes are compatible with all workers.

-        Returns a tuple[num_gpu_blocks, num_cpu_blocks], where num_gpu_blocks
+        Returns a Tuple[num_gpu_blocks, num_cpu_blocks], where num_gpu_blocks
        are blocks that are "active" on the device and can be appended to.
        num_cpu_blocks refers to "swapped" blocks in CPU memory and cannot be
        appended to.
--- a/vllm/worker/cpu_worker.py
+++ b/vllm/worker/cpu_worker.py
@ -1,5 +1,5 @@
 """A CPU worker class."""
-from typing import Dict, List, Optional
+from typing import Dict, List, Optional, Tuple

 import torch
 import torch.distributed
@ -157,7 +157,7 @@ class CPUWorker(LoraNotSupportedWorkerBase):
    def load_model(self):
        self.model_runner.load_model()

-    def determine_num_available_blocks(self) -> tuple[int, int]:
+    def determine_num_available_blocks(self) -> Tuple[int, int]:
        """Determine the number of blocks available for the KV cache.

        This determines how many KV blocks can fit into the configured CPU
--- a/vllm/worker/neuron_worker.py
+++ b/vllm/worker/neuron_worker.py
@ -1,5 +1,5 @@
 """A Neuron worker class."""
-from typing import List, Optional
+from typing import List, Optional, Tuple

 import torch
 import torch.distributed
@ -40,7 +40,7 @@ class NeuronWorker(LoraNotSupportedWorkerBase):
    def load_model(self):
        self.model_runner.load_model()

-    def determine_num_available_blocks(self) -> tuple[int, int]:
+    def determine_num_available_blocks(self) -> Tuple[int, int]:
        """Determine the number of available KV blocks.

        Swapping is not yet supported, so always return num_cpu_blocks=0.
--- a/vllm/worker/worker_base.py
+++ b/vllm/worker/worker_base.py
@ -1,5 +1,5 @@
 from abc import ABC, abstractmethod
-from typing import Dict, List
+from typing import Dict, List, Tuple

 from vllm.lora.request import LoRARequest
 from vllm.sequence import SamplerOutput, SequenceGroupMetadata
@ -18,14 +18,14 @@ class WorkerBase(ABC):
        raise NotImplementedError

    @abstractmethod
-    def determine_num_available_blocks(self) -> tuple[int, int]:
+    def determine_num_available_blocks(self) -> Tuple[int, int]:
        """Determine the number of available blocks for the GPU KV cache and
        swappable CPU KV cache.

        The implementation may run profiling or other heuristics to determine
        the size of caches.

-        Returns a tuple[num_gpu_blocks, num_cpu_blocks], where num_gpu_blocks
+        Returns a Tuple[num_gpu_blocks, num_cpu_blocks], where num_gpu_blocks
        are blocks that are "active" on the device and can be appended to.
        num_cpu_blocks refers to "swapped" blocks in CPU memory and cannot be
        appended to.