Train an ESM2 model on UR data.
Source code in bionemo/esm2/scripts/finetune_esm2.py
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491 | def finetune_esm2_entrypoint() -> Tuple[Path, Callback | None, nl.Trainer]:
"""Train an ESM2 model on UR data."""
parser = get_parser()
args = parser.parse_args()
# Validate arguments
if args.lora_checkpoint_path and not args.lora_finetune:
raise ValueError("Arguments --lora-checkpoint-path cannot be set when not using lora-finetune.")
if args.precision not in get_args(PrecisionTypes):
raise ValueError(f"Precision {args.precision} not supported. Supported precisions are: {PrecisionTypes}")
if args.task_type not in ["classification", "regression"]:
raise ValueError(
f"Task type {args.task_type} not supported. Supported task types are: classification, regression"
)
if args.dataset_class not in SUPPORTED_DATASETS:
raise ValueError(
f"Dataset class {args.dataset_class} not supported. Supported dataset classes are: {SUPPORTED_DATASETS.keys()}"
)
if args.config_class not in SUPPORTED_CONFIGS:
raise ValueError(
f"Config class {args.config_class} not supported. Supported config classes are: {SUPPORTED_CONFIGS.keys()}"
)
if args.min_seq_length is not None and args.dataset_class == "InMemorySingleValueDataset":
raise ValueError("Arguments --min-seq-length cannot be set when using InMemorySingleValueDataset.")
train_model(**vars(args))
|