This module function allows for the consolidation of multiple dfp pipeline modules relevant to inference/training process into a single module.
Parameter |
Type |
Description |
Example Value |
Default Value |
---|---|---|---|---|
|
string |
Directory used for caching intermediate results. |
“/tmp/cache” |
|
|
string |
Name of the column containing timestamps. |
“timestamp” |
|
|
dictionary |
Options for pre-filtering control messages. |
See Below |
|
|
dictionary |
Options for batching files. |
See Below |
|
|
dictionary |
Options for splitting data by user. |
See Below |
|
|
dictionary |
Supported data loaders for different file types. |
- |
|
pre_filter_options
Parameter |
Type |
Description |
Example Value |
Default Value |
---|---|---|---|---|
|
boolean |
Enables filtering based on task type. |
true |
|
|
string |
The task type to be used as a filter. |
“task_a” |
|
|
boolean |
Enables filtering based on data type. |
true |
|
|
string |
The data type to be used as a filter. |
“type_a” |
|
batching_options
Parameter |
Type |
Description |
Example Value |
Default Value |
---|---|---|---|---|
|
string |
End time of the time range to process. |
“2022-01-01T00:00:00Z” |
|
|
string |
ISO date regex pattern. |
“\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z” |
|
|
dictionary |
Keyword arguments to pass to the parser. |
{} |
|
|
string |
Time period to batch the data. |
“1D” |
|
|
float |
Sampling rate in seconds. |
“1.0” |
|
|
string |
Start time of the time range to process. |
“2021-01-01T00:00:00Z” |
|
user_splitting_options
Parameter |
Type |
Description |
Example Value |
Default Value |
---|---|---|---|---|
|
string |
Fallback user to use if no model is found for a user. |
“generic” |
|
|
boolean |
Include generic models in the results. |
“true” |
|
|
boolean |
Include individual models in the results. |
“true” |
|
|
list |
List of users to include in the results. |
[“user_a”, “user_b”] |
|
|
list |
List of users to exclude from the results. |
[“user_c”] |
|
|
string |
Column name for the user ID. |
“user_id” |
|
{
"cache_dir": "/tmp/cache",
"timestamp_column_name": "timestamp",
"pre_filter_options": {
"enable_task_filtering": true,
"filter_task_type": "task_a",
"enable_data_filtering": true,
"filter_data_type": "type_a"
},
"batching_options": {
"end_time": "2022-01-01T00:00:00Z",
"iso_date_regex_pattern": "\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}Z",
"parser_kwargs": {},
"period": "1D",
"sampling_rate_s": 1.0,
"start_time": "2021-01-01T00:00:00Z"
},
"user_splitting_options": {
"fallback_username": "generic",
"include_generic": true,
"include_individual": true,
"only_users": [
"user_a",
"user_b"
],
"skip_users": [
"user_c"
],
"userid_column_name": "user_id"
},
"supported_loaders": {}
}