core.resharding.copy_services.gloo_copy_service#

Module Contents#

Classes#

SendOp

Simple container describing a single send operation.

RecvOp

Simple container describing a single receive operation.

GlooCopyService

CopyService implementation that routes refit traffic over a CPU/Gloo process group instead of NCCL.

Data#

API#

core.resharding.copy_services.gloo_copy_service.logger#

‘getLogger(…)’

class core.resharding.copy_services.gloo_copy_service.SendOp#

Simple container describing a single send operation.

task_id: int | None#

None

tensor: torch.Tensor#

None

dest_rank: int#

None

class core.resharding.copy_services.gloo_copy_service.RecvOp#

Simple container describing a single receive operation.

task_id: int | None#

None

tensor: torch.Tensor#

None

src_rank: int#

None

class core.resharding.copy_services.gloo_copy_service.GlooCopyService(group=None)#

Bases: core.resharding.copy_services.base.CopyService

CopyService implementation that routes refit traffic over a CPU/Gloo process group instead of NCCL.

Initialization

submit_send(
src_tensor: torch.Tensor,
dest_rank: int,
task_id: Optional[int] = None,
)#
submit_recv(
dest_tensor: torch.Tensor,
src_rank: int,
task_id: Optional[int] = None,
)#
run()#