series_monthly_decompose_anomalies_fl()
Applies to: ✅ Microsoft Fabric ✅ Azure Data Explorer ✅ Azure Monitor ✅ Microsoft Sentinel
Detect anomalous points in a daily series with monthly seasonality.
The function series_monthly_decompose_anomalies_fl()
is a user-defined function (UDF) that detects anomalies in multiple time series that have monthly seasonality. The function is built on top of series_decompose_anomalies(). The challenge is that the length of a month is variable between 28 to 31 days, so building a baseline by using series_decompose_anomalies() out of the box detects fixed seasonality thus fails to match spikes or other patterns that occur in the 1st or other day in each month.
Syntax
series_monthly_decompose_anomalies_fl(
threshold)
Learn more about syntax conventions.
Parameters
Name | Type | Required | Description |
---|---|---|---|
threshold | real |
Anomaly threshold. Default is 1.5. |
Function definition
You can define the function by either embedding its code as a query-defined function, or creating it as a stored function in your database, as follows:
Define the function using the following let statement. No permissions are required.
Important
A let statement can't run on its own. It must be followed by a tabular expression statement. To run a working example of series_clean_anomalies_fl()
, see Example.
let series_monthly_decompose_anomalies_fl=(tbl:(_key:string, _date:datetime, _val:real), threshold:real=1.5)
{
let _tbl=materialize(tbl
| extend _year=getyear(_date), _dom = dayofmonth(_date), _moy=monthofyear(_date), _doy=dayofyear(_date)
| extend _vdoy = 31*(_moy-1)+_dom // virtual day of year (assuming all months have 31 days)
);
let median_tbl = _tbl | summarize p50=percentiles(_val, 50) by _key, _dom;
let keys = _tbl | summarize by _key | extend dummy=1;
let years = _tbl | summarize by _year | extend dummy=1;
let vdoys = range _vdoy from 0 to 31*12-1 step 1 | extend _moy=_vdoy/31+1, _vdom=_vdoy%31+1, _vdoy=_vdoy+1 | extend dummy=1
| join kind=fullouter years on dummy | join kind=fullouter keys on dummy | project-away dummy, dummy1, dummy2;
vdoys
| join kind=leftouter _tbl on _key, _year, _vdoy
| project-away _key1, _year1, _moy1, _vdoy1
| extend _adoy=31*12*_year+_doy, _vadoy = 31*12*_year+_vdoy
| partition by _key (as T
| where _vadoy >= toscalar(T | summarize (_adoy, _vadoy)=arg_min(_adoy, _vadoy) | project _vadoy) and
_vadoy <= toscalar(T | summarize (_adoy, _vadoy)=arg_max(_adoy, _vadoy) | project _vadoy)
)
| join kind=inner median_tbl on _key, $left._vdom == $right._dom
| extend _vval = coalesce(_val, p50)
//| order by _key asc, _vadoy asc // for debugging
| make-series _vval=avg(_vval), _date=any(_date) default=datetime(null) on _vadoy step 1 by _key
| extend (anomalies, score, baseline) = series_decompose_anomalies(_vval, threshold, 31)
| mv-expand _date to typeof(datetime), _vval to typeof(real), _vadoy to typeof(long), anomalies to typeof(int), score to typeof(real), baseline to typeof(real)
| project-away _vadoy
| project-rename _val=_vval
| where isnotnull(_date)
};
// Write your query to use the function here.
Example
The input table must contain _key
, _date
and _val
columns. The query builds a set of time series of _val
per each _key
and adds anomalies, score and baseline columns.
To use a query-defined function, invoke it after the embedded function definition.
let series_monthly_decompose_anomalies_fl=(tbl:(_key:string, _date:datetime, _val:real), threshold:real=1.5)
{
let _tbl=materialize(tbl
| extend _year=getyear(_date), _dom = dayofmonth(_date), _moy=monthofyear(_date), _doy=dayofyear(_date)
| extend _vdoy = 31*(_moy-1)+_dom // virtual day of year (assuming all months have 31 days)
);
let median_tbl = _tbl | summarize p50=percentiles(_val, 50) by _key, _dom;
let keys = _tbl | summarize by _key | extend dummy=1;
let years = _tbl | summarize by _year | extend dummy=1;
let vdoys = range _vdoy from 0 to 31*12-1 step 1 | extend _moy=_vdoy/31+1, _vdom=_vdoy%31+1, _vdoy=_vdoy+1 | extend dummy=1
| join kind=fullouter years on dummy | join kind=fullouter keys on dummy | project-away dummy, dummy1, dummy2;
vdoys
| join kind=leftouter _tbl on _key, _year, _vdoy
| project-away _key1, _year1, _moy1, _vdoy1
| extend _adoy=31*12*_year+_doy, _vadoy = 31*12*_year+_vdoy
| partition by _key (as T
| where _vadoy >= toscalar(T | summarize (_adoy, _vadoy)=arg_min(_adoy, _vadoy) | project _vadoy) and
_vadoy <= toscalar(T | summarize (_adoy, _vadoy)=arg_max(_adoy, _vadoy) | project _vadoy)
)
| join kind=inner median_tbl on _key, $left._vdom == $right._dom
| extend _vval = coalesce(_val, p50)
//| order by _key asc, _vadoy asc // for debugging
| make-series _vval=avg(_vval), _date=any(_date) default=datetime(null) on _vadoy step 1 by _key
| extend (anomalies, score, baseline) = series_decompose_anomalies(_vval, threshold, 31)
| mv-expand _date to typeof(datetime), _vval to typeof(real), _vadoy to typeof(long), anomalies to typeof(int), score to typeof(real), baseline to typeof(real)
| project-away _vadoy
| project-rename _val=_vval
| where isnotnull(_date)
};
demo_monthly_ts
| project _key=key, _date=ts, _val=val
| invoke series_monthly_decompose_anomalies_fl()
| project-rename key=_key, ts=_date, val=_val
| render anomalychart with(anomalycolumns=anomalies, xcolumn=ts, ycolumns=val)
Output
Series A with monthly anomalies:
Series B with monthly anomalies: