Important

You are viewing the NeMo 2.0 documentation. This release introduces significant changes to the API and a new library, NeMo Run. We are currently porting all features from NeMo 1.0 to 2.0. For documentation on previous versions or features not yet available in 2.0, please refer to the NeMo 24.07 documentation.

Task Decontamination#

Base Class#

class nemo_curator.tasks.DownstreamTask#
class nemo_curator.tasks.import_task(task_path)#

Module#

class nemo_curator.TaskDecontamination(
tasks: DownstreamTask | Iterable[DownstreamTask],
text_field='text',
max_ngram_size=13,
max_matches=10,
min_document_length=200,
remove_char_each_side=200,
max_splits=10,
removed_dir=None,
)#
prepare_task_ngram_count() dict#

Computes a dictionary of all ngrams in each task as keys and each value set to 0.

Tasks#

class nemo_curator.tasks.Race(min_ngram_size=8, max_ngram_size=13)#
class nemo_curator.tasks.Squad(min_ngram_size=8, max_ngram_size=13)#
class nemo_curator.tasks.ArcEasy(min_ngram_size=8, max_ngram_size=13)#
class nemo_curator.tasks.ArcChallenge(min_ngram_size=8, max_ngram_size=13)#
class nemo_curator.tasks.OpenBookQA(min_ngram_size=8, max_ngram_size=13)#
class nemo_curator.tasks.BoolQ(min_ngram_size=8, max_ngram_size=13)#
class nemo_curator.tasks.Copa(min_ngram_size=8, max_ngram_size=13)#
class nemo_curator.tasks.RTE(min_ngram_size=8, max_ngram_size=13)#
class nemo_curator.tasks.MultiRC(min_ngram_size=8, max_ngram_size=13)#
class nemo_curator.tasks.WSC(min_ngram_size=8, max_ngram_size=13)#
class nemo_curator.tasks.CB(min_ngram_size=8, max_ngram_size=13)#
class nemo_curator.tasks.ANLI(min_ngram_size=8, max_ngram_size=13)#
class nemo_curator.tasks.Record(min_ngram_size=8, max_ngram_size=13)#
class nemo_curator.tasks.COQA(file_path, min_ngram_size=8, max_ngram_size=13)#
class nemo_curator.tasks.TriviaQA(min_ngram_size=8, max_ngram_size=13)#
class nemo_curator.tasks.Quac(min_ngram_size=8, max_ngram_size=13)#
class nemo_curator.tasks.WebQA(min_ngram_size=8, max_ngram_size=13)#
class nemo_curator.tasks.Drop(min_ngram_size=8, max_ngram_size=13)#
class nemo_curator.tasks.WiC(min_ngram_size=8, max_ngram_size=13)#
class nemo_curator.tasks.MMLU(path=None, min_ngram_size=8, max_ngram_size=13)#
class nemo_curator.tasks.BigBenchHard(path=None, min_ngram_size=8, max_ngram_size=13)#
class nemo_curator.tasks.BigBenchLight(path=None, min_ngram_size=8, max_ngram_size=13)#
class nemo_curator.tasks.Multilingual(path=None, min_ngram_size=8, max_ngram_size=13)#
class nemo_curator.tasks.PIQA(min_ngram_size=8, max_ngram_size=13)#
class nemo_curator.tasks.Winogrande(min_ngram_size=8, max_ngram_size=13)#
class nemo_curator.tasks.Lambada(file_path, min_ngram_size=8, max_ngram_size=13)#
class nemo_curator.tasks.NumDasc(n, file_path, min_ngram_size=8, max_ngram_size=13)#
class nemo_curator.tasks.StoryCloze(file_path, min_ngram_size=8, max_ngram_size=13)#