[TPU][Bugfix] Use XLA rank for persistent cache path (#8137)
This commit is contained in:
parent
d4db9f53c8
commit
61f4a93d14
@ -59,7 +59,7 @@ First, install the dependencies:
|
|||||||
$ export DATE="20240828"
|
$ export DATE="20240828"
|
||||||
$ export TORCH_VERSION="2.5.0"
|
$ export TORCH_VERSION="2.5.0"
|
||||||
$ pip install https://storage.googleapis.com/pytorch-xla-releases/wheels/tpuvm/torch-${TORCH_VERSION}.dev${DATE}-cp310-cp310-linux_x86_64.whl
|
$ pip install https://storage.googleapis.com/pytorch-xla-releases/wheels/tpuvm/torch-${TORCH_VERSION}.dev${DATE}-cp310-cp310-linux_x86_64.whl
|
||||||
$ pip3 install https://storage.googleapis.com/pytorch-xla-releases/wheels/tpuvm/torch_xla-${TORCH_VERSION}.dev${DATE}-cp310-cp310-linux_x86_64.whl
|
$ pip install https://storage.googleapis.com/pytorch-xla-releases/wheels/tpuvm/torch_xla-${TORCH_VERSION}.dev${DATE}-cp310-cp310-linux_x86_64.whl
|
||||||
|
|
||||||
$ # Install JAX and Pallas.
|
$ # Install JAX and Pallas.
|
||||||
$ pip install torch_xla[tpu] -f https://storage.googleapis.com/libtpu-releases/index.html
|
$ pip install torch_xla[tpu] -f https://storage.googleapis.com/libtpu-releases/index.html
|
||||||
|
|||||||
@ -102,8 +102,9 @@ class TPUWorker(LoraNotSupportedWorkerBase, LocalOrDistributedWorkerBase):
|
|||||||
# NOTE(woosuk): Set per-rank cache path since different ranks
|
# NOTE(woosuk): Set per-rank cache path since different ranks
|
||||||
# can have slightly different XLA graphs.
|
# can have slightly different XLA graphs.
|
||||||
world_size = self.parallel_config.world_size
|
world_size = self.parallel_config.world_size
|
||||||
|
rank = xr.global_ordinal()
|
||||||
per_rank_path = os.path.join(envs.VLLM_XLA_CACHE_PATH,
|
per_rank_path = os.path.join(envs.VLLM_XLA_CACHE_PATH,
|
||||||
f"tp{world_size}_rank{self.rank}")
|
f"tp{world_size}_rank{rank}")
|
||||||
xr.initialize_cache(per_rank_path, readonly=False)
|
xr.initialize_cache(per_rank_path, readonly=False)
|
||||||
|
|
||||||
def load_model(self):
|
def load_model(self):
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user