Skip to content

Finetune esm2

finetune_esm2_entrypoint()

Train an ESM2 model on UR data.

Source code in bionemo/esm2/scripts/finetune_esm2.py
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
def finetune_esm2_entrypoint() -> Tuple[Path, Callback | None, nl.Trainer]:
    """Train an ESM2 model on UR data."""
    parser = get_parser()
    args = parser.parse_args()

    # Validate arguments
    if args.lora_checkpoint_path and not args.lora_finetune:
        raise ValueError("Arguments --lora-checkpoint-path cannot be set when not using lora-finetune.")
    if args.precision not in get_args(PrecisionTypes):
        raise ValueError(f"Precision {args.precision} not supported. Supported precisions are: {PrecisionTypes}")
    if args.task_type not in ["classification", "regression"]:
        raise ValueError(
            f"Task type {args.task_type} not supported. Supported task types are: classification, regression"
        )
    if args.dataset_class not in SUPPORTED_DATASETS:
        raise ValueError(
            f"Dataset class {args.dataset_class} not supported. Supported dataset classes are: {SUPPORTED_DATASETS.keys()}"
        )
    if args.config_class not in SUPPORTED_CONFIGS:
        raise ValueError(
            f"Config class {args.config_class} not supported. Supported config classes are: {SUPPORTED_CONFIGS.keys()}"
        )
    if args.min_seq_length is not None and args.dataset_class == "InMemorySingleValueDataset":
        raise ValueError("Arguments --min-seq-length cannot be set when using InMemorySingleValueDataset.")

    train_model(**vars(args))