Source code for nemo_run.core.packaging.hybrid
import os
from dataclasses import dataclass, field
from pathlib import Path
from typing import Dict
from invoke.context import Context
from nemo_run.core.packaging.base import Packager
[docs]
@dataclass(kw_only=True)
class HybridPackager(Packager):
"""
A packager that combines multiple other packagers into one final archive.
Each subpackager is mapped to a target directory name, which will become
the top-level folder under which that packager's content is placed.
If `extract_at_root` is True, the contents of each sub-packager are extracted
directly at the root of the final archive (i.e. without being nested in a subfolder).
"""
sub_packagers: Dict[str, Packager] = field(default_factory=dict)
extract_at_root: bool = False
def package(self, path: Path, job_dir: str, name: str) -> str:
final_tar_gz = os.path.join(job_dir, f"{name}.tar.gz")
if os.path.exists(final_tar_gz):
return final_tar_gz
# Create an empty tar to append packaged files from each sub-packager
tmp_tar = final_tar_gz + ".tmp"
ctx = Context()
ctx.run(f"tar -cf {tmp_tar} --files-from /dev/null")
# For each subpackager, run its .package() method,
# extract the content and add it to the final tar
for folder_name, packager in self.sub_packagers.items():
subarchive_path = packager.package(path, job_dir, f"{name}_{folder_name}")
# Create a temp folder, extract subarchive content into it,
# then add that folder to the final tar under the desired subpath
tmp_extract_dir = os.path.join(job_dir, f"__extract_{folder_name}")
os.makedirs(tmp_extract_dir, exist_ok=True)
ctx.run(f"tar -xf {subarchive_path} -C {tmp_extract_dir}")
# If extract_at_root is True then add files directly to the archive root.
# Otherwise, add them under a subfolder named after the key.
if self.extract_at_root:
ctx.run(f"tar -rf {tmp_tar} -C {tmp_extract_dir} .")
else:
sysname = os.uname().sysname
if sysname == "Darwin":
# BSD tar uses the -s option with a chosen delimiter (here we use a comma)
# The first -s replaces an entry that is exactly "."
# The second -s replaces entries starting with "./" (i.e. files inside)
transform_option = f"-s ',^\\.$,{folder_name},' -s ',^\\./,{folder_name}/,'"
else:
transform_option = f"--transform='s,^,{folder_name}/,'"
ctx.run(f"tar {transform_option} -rf {tmp_tar} -C {tmp_extract_dir} .")
ctx.run(f"rm -rf {tmp_extract_dir}")
ctx.run(f"rm {subarchive_path}")
# Finally, compress the combined tar
ctx.run(f"gzip -c {tmp_tar} > {final_tar_gz}")
ctx.run(f"rm {tmp_tar}")
return final_tar_gz