This module function allows for the consolidation of multiple dfp pipeline modules relevant to the inference process into a single module.
Parameter |
Type |
Description |
Example Value |
Default Value |
---|---|---|---|---|
|
dictionary |
Options for batching files. |
See below |
|
|
string |
Directory used for caching intermediate results. |
“/tmp/cache” |
|
|
dictionary |
Criteria for filtering detections. |
- |
|
|
dictionary |
Options for configuring the inference process. |
See below |
|
|
dictionary |
Options for preprocessing data. |
- |
|
|
dictionary |
Options for aggregating data by stream. |
See below |
|
|
string |
Name of the column containing timestamps. |
“timestamp” |
|
|
dictionary |
Options for splitting data by user. |
See below |
|
|
dictionary |
Options for writing results to a file. |
- |
|
batching_options
Parameter |
Type |
Description |
Example Value |
Default Value |
---|---|---|---|---|
|
string |
End time of the time range to process. |
“2022-01-01T00:00:00Z” |
|
|
string |
ISO date regex pattern. |
“\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z” |
|
|
dict |
Keyword arguments to pass to the parser. |
- |
|
|
string |
Time period to batch the data. |
“1D” |
|
|
float |
Sampling rate in seconds. |
“1.0” |
|
|
string |
Start time of the time range to process. |
“2021-01-01T00:00:00Z” |
|
user_splitting_options
Parameter |
Type |
Description |
Example Value |
Default Value |
---|---|---|---|---|
|
string |
Fallback user to use if no model is found for a user. |
“generic_user” |
|
|
boolean |
Include generic models in the results. |
true |
|
|
boolean |
Include individual models in the results. |
true |
|
|
list |
List of users to include in the results. |
[“user_a”,”user_b”] |
|
|
list |
List of users to exclude from the results. |
[“user_c”] |
|
|
string |
Column |
“name for the user ID.” |
|
Parameter |
Type |
Description |
Example Value |
Default Value |
---|---|---|---|---|
|
string |
The user ID to use if the user ID is not found |
“batch” |
|
|
int |
Minimum history to trigger a new training event |
1 |
|
|
int |
Maximum history to include in a new training event |
0 |
|
|
string |
Name of the column containing timestamps |
“timestamp” |
|
|
string |
Lookback timespan for training data in a new training event |
“60d” |
|
|
bool |
Whether or not to cache streaming data to disk |
false |
|
|
string |
Directory to use for caching streaming data |
“./.cache” |
|
Parameter |
Type |
Description |
Example Value |
Default Value |
---|---|---|---|---|
|
string |
Formatter for model names |
“user_{username}_model” |
|
|
string |
Fallback user to use if no model is found for a user |
“generic_user” |
|
|
string |
Name of the timestamp column |
“timestamp” |
|
{
"timestamp_column_name": "timestamp",
"cache_dir": "/tmp/cache",
"batching_options": {
"end_time": "2022-01-01T00:00:00Z",
"iso_date_regex_pattern": "\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}Z",
"parser_kwargs": {},
"period": "1D",
"sampling_rate_s": 1.0,
"start_time": "2021-01-01T00:00:00Z"
},
"user_splitting_options": {
"fallback_username": "generic",
"include_generic": true,
"include_individual": true,
"only_users": [
"user_a",
"user_b"
],
"skip_users": [
"user_c"
],
"userid_column_name": "user_id"
},
"stream_aggregation_options": {
"timestamp_column_name": "timestamp",
"cache_mode": "MEMORY",
"trigger_on_min_history": true,
"aggregation_span": "1D",
"trigger_on_min_increment": true,
"cache_to_disk": false
},
"preprocessing_options": {},
"inference_options": {
"model_name_formatter": "{model_name}",
"fallback_username": "generic",
"timestamp_column_name": "timestamp"
},
"detection_criteria": {},
"write_to_file_options": {}
}