dfp_preproc
#
This module function allows for the consolidation of multiple DFP pipeline modules relevant to inference/training process into a single module.
Configurable Parameters#
Parameter |
Type |
Description |
Example Value |
Default Value |
---|---|---|---|---|
|
string |
Directory used for caching intermediate results. |
|
|
|
string |
Name of the column containing timestamps. |
|
|
|
dictionary |
Options for pre-filtering control messages. |
Refer Below |
|
|
dictionary |
Options for batching files. |
Refer Below |
|
|
dictionary |
Options for splitting data by user. |
Refer Below |
|
|
dictionary |
Supported data loaders for different file types. |
- |
|
pre_filter_options
#
Parameter |
Type |
Description |
Example Value |
Default Value |
---|---|---|---|---|
|
boolean |
Enables filtering based on task type. |
|
|
|
string |
The task type to be used as a filter. |
|
|
|
boolean |
Enables filtering based on data type. |
|
|
|
string |
The data type to be used as a filter. |
|
|
batching_options
#
Parameter |
Type |
Description |
Example Value |
Default Value |
---|---|---|---|---|
|
string |
End time of the time range to process. |
|
|
|
string |
ISO date regex pattern. |
|
|
|
dictionary |
Keyword arguments to pass to the parser. |
|
|
|
string |
Time period to batch the data. |
|
|
|
float |
Sampling rate in seconds. |
|
|
|
string |
Start time of the time range to process. |
|
|
user_splitting_options
#
Parameter |
Type |
Description |
Example Value |
Default Value |
---|---|---|---|---|
|
string |
Fallback user to use if no model is found for a user. |
|
|
|
boolean |
Include generic models in the results. |
|
|
|
boolean |
Include individual models in the results. |
|
|
|
list |
List of users to include in the results. |
|
|
|
list |
List of users to exclude from the results. |
|
|
|
string |
Column name for the user ID. |
|
|
Example JSON Configuration#
{
"cache_dir": "/tmp/cache",
"timestamp_column_name": "timestamp",
"pre_filter_options": {
"enable_task_filtering": true,
"filter_task_type": "task_a",
"enable_data_filtering": true,
"filter_data_type": "type_a"
},
"batching_options": {
"end_time": "2022-01-01T00:00:00Z",
"iso_date_regex_pattern": "\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}Z",
"parser_kwargs": {},
"period": "1D",
"sampling_rate_s": 1.0,
"start_time": "2021-01-01T00:00:00Z"
},
"user_splitting_options": {
"fallback_username": "generic",
"include_generic": true,
"include_individual": true,
"only_users": [
"user_a",
"user_b"
],
"skip_users": [
"user_c"
],
"userid_column_name": "user_id"
},
"supported_loaders": {}
}