series_monthly_decompose_anomalies_fl()

Článek
08/12/2024

Applies to: ✅ Microsoft Fabric ✅ Azure Data Explorer ✅ Azure Monitor ✅ Microsoft Sentinel

Detect anomalous points in a daily series with monthly seasonality.

The function series_monthly_decompose_anomalies_fl() is a user-defined function (UDF) that detects anomalies in multiple time series that have monthly seasonality. The function is built on top of series_decompose_anomalies(). The challenge is that the length of a month is variable between 28 to 31 days, so building a baseline by using series_decompose_anomalies() out of the box detects fixed seasonality thus fails to match spikes or other patterns that occur in the 1st or other day in each month.

Syntax

series_monthly_decompose_anomalies_fl(threshold)

Learn more about syntax conventions.

Parameters

Name	Type	Required	Description
threshold	`real`		Anomaly threshold. Default is 1.5.

Function definition

You can define the function by either embedding its code as a query-defined function, or creating it as a stored function in your database, as follows:

Query-defined
Stored

Define the function using the following let statement. No permissions are required.

Important

A let statement can't run on its own. It must be followed by a tabular expression statement. To run a working example of series_clean_anomalies_fl(), see Example.

let series_monthly_decompose_anomalies_fl=(tbl:(_key:string, _date:datetime, _val:real), threshold:real=1.5)
{
    let _tbl=materialize(tbl
    | extend _year=getyear(_date), _dom = dayofmonth(_date), _moy=monthofyear(_date), _doy=dayofyear(_date)
    | extend _vdoy = 31*(_moy-1)+_dom                  //  virtual day of year (assuming all months have 31 days)
    );
    let median_tbl = _tbl | summarize p50=percentiles(_val, 50) by _key, _dom;
    let keys = _tbl | summarize by _key | extend dummy=1;
    let years = _tbl | summarize by _year | extend dummy=1;
    let vdoys = range _vdoy from 0 to 31*12-1 step 1 | extend _moy=_vdoy/31+1, _vdom=_vdoy%31+1, _vdoy=_vdoy+1 | extend dummy=1
    | join kind=fullouter years on dummy | join kind=fullouter keys on dummy | project-away dummy, dummy1, dummy2;
    vdoys
    | join kind=leftouter _tbl on _key, _year, _vdoy
    | project-away _key1, _year1, _moy1, _vdoy1
    | extend _adoy=31*12*_year+_doy, _vadoy = 31*12*_year+_vdoy
    | partition by _key (as T
        | where _vadoy >= toscalar(T | summarize (_adoy, _vadoy)=arg_min(_adoy, _vadoy) | project _vadoy) and 
          _vadoy <= toscalar(T | summarize (_adoy, _vadoy)=arg_max(_adoy, _vadoy) | project _vadoy)
    )
    | join kind=inner median_tbl on _key, $left._vdom == $right._dom
    | extend _vval = coalesce(_val, p50)
    //| order by _key asc, _vadoy asc     //  for debugging
    | make-series _vval=avg(_vval), _date=any(_date) default=datetime(null) on _vadoy step 1 by _key
    | extend (anomalies, score, baseline) = series_decompose_anomalies(_vval, threshold, 31)
    | mv-expand _date to typeof(datetime), _vval to typeof(real), _vadoy to typeof(long), anomalies to typeof(int), score to typeof(real), baseline to typeof(real)
    | project-away _vadoy
    | project-rename _val=_vval
    | where isnotnull(_date)
};
// Write your query to use the function here.

Define the stored function once using the following .create function. Database User permissions are required.

Important

You must run this code to create the function before you can use the function as shown in the Example.

.create-or-alter function with (folder = "Packages\\Series", docstring = "Anomaly Detection for daily time series with monthly seasonality")
series_monthly_decompose_anomalies_fl(tbl:(_key:string, _date:datetime, _val:real), threshold:real=1.5)
{
    let _tbl=materialize(tbl
    | extend _year=getyear(_date), _dom = dayofmonth(_date), _moy=monthofyear(_date), _doy=dayofyear(_date)
    | extend _vdoy = 31*(_moy-1)+_dom                  //  virtual day of year (assuming all months have 31 days)
    );
    let median_tbl = _tbl | summarize p50=percentiles(_val, 50) by _key, _dom;
    let keys = _tbl | summarize by _key | extend dummy=1;
    let years = _tbl | summarize by _year | extend dummy=1;
    let vdoys = range _vdoy from 0 to 31*12-1 step 1 | extend _moy=_vdoy/31+1, _vdom=_vdoy%31+1, _vdoy=_vdoy+1 | extend dummy=1
    | join kind=fullouter years on dummy | join kind=fullouter keys on dummy | project-away dummy, dummy1, dummy2;
    vdoys
    | join kind=leftouter _tbl on _key, _year, _vdoy
    | project-away _key1, _year1, _moy1, _vdoy1
    | extend _adoy=31*12*_year+_doy, _vadoy = 31*12*_year+_vdoy
    | partition by _key (as T
        | where _vadoy >= toscalar(T | summarize (_adoy, _vadoy)=arg_min(_adoy, _vadoy) | project _vadoy) and 
          _vadoy <= toscalar(T | summarize (_adoy, _vadoy)=arg_max(_adoy, _vadoy) | project _vadoy)
    )
    | join kind=inner median_tbl on _key, $left._vdom == $right._dom
    | extend _vval = coalesce(_val, p50)
    //| order by _key asc, _vadoy asc     //  for debugging
    | make-series _vval=avg(_vval), _date=any(_date) default=datetime(null) on _vadoy step 1 by _key
    | extend (anomalies, score, baseline) = series_decompose_anomalies(_vval, threshold, 31)
    | mv-expand _date to typeof(datetime), _vval to typeof(real), _vadoy to typeof(long), anomalies to typeof(int), score to typeof(real), baseline to typeof(real)
    | project-away _vadoy
    | project-rename _val=_vval
    | where isnotnull(_date)
}

Example

The input table must contain _key, _date and _val columns. The query builds a set of time series of _val per each _key and adds anomalies, score and baseline columns.

Query-defined
Stored

To use a query-defined function, invoke it after the embedded function definition.

Run the query

let series_monthly_decompose_anomalies_fl=(tbl:(_key:string, _date:datetime, _val:real), threshold:real=1.5)
{
    let _tbl=materialize(tbl
    | extend _year=getyear(_date), _dom = dayofmonth(_date), _moy=monthofyear(_date), _doy=dayofyear(_date)
    | extend _vdoy = 31*(_moy-1)+_dom                  //  virtual day of year (assuming all months have 31 days)
    );
    let median_tbl = _tbl | summarize p50=percentiles(_val, 50) by _key, _dom;
    let keys = _tbl | summarize by _key | extend dummy=1;
    let years = _tbl | summarize by _year | extend dummy=1;
    let vdoys = range _vdoy from 0 to 31*12-1 step 1 | extend _moy=_vdoy/31+1, _vdom=_vdoy%31+1, _vdoy=_vdoy+1 | extend dummy=1
    | join kind=fullouter years on dummy | join kind=fullouter keys on dummy | project-away dummy, dummy1, dummy2;
    vdoys
    | join kind=leftouter _tbl on _key, _year, _vdoy
    | project-away _key1, _year1, _moy1, _vdoy1
    | extend _adoy=31*12*_year+_doy, _vadoy = 31*12*_year+_vdoy
    | partition by _key (as T
        | where _vadoy >= toscalar(T | summarize (_adoy, _vadoy)=arg_min(_adoy, _vadoy) | project _vadoy) and 
          _vadoy <= toscalar(T | summarize (_adoy, _vadoy)=arg_max(_adoy, _vadoy) | project _vadoy)
    )
    | join kind=inner median_tbl on _key, $left._vdom == $right._dom
    | extend _vval = coalesce(_val, p50)
    //| order by _key asc, _vadoy asc     //  for debugging
    | make-series _vval=avg(_vval), _date=any(_date) default=datetime(null) on _vadoy step 1 by _key
    | extend (anomalies, score, baseline) = series_decompose_anomalies(_vval, threshold, 31)
    | mv-expand _date to typeof(datetime), _vval to typeof(real), _vadoy to typeof(long), anomalies to typeof(int), score to typeof(real), baseline to typeof(real)
    | project-away _vadoy
    | project-rename _val=_vval
    | where isnotnull(_date)
};
demo_monthly_ts
| project _key=key, _date=ts, _val=val
| invoke series_monthly_decompose_anomalies_fl()
| project-rename key=_key, ts=_date, val=_val
| render anomalychart with(anomalycolumns=anomalies, xcolumn=ts, ycolumns=val)

Important

For this example to run successfully, you must first run the Function definition code to store the function.

demo_monthly_ts
| project _key=key, _date=ts, _val=val
| invoke series_monthly_decompose_anomalies_fl()
| project-rename key=_key, ts=_date, val=_val
| render anomalychart with(anomalycolumns=anomalies, xcolumn=ts, ycolumns=val)

Output

Series A with monthly anomalies:

Series B with monthly anomalies:

Sdílet prostřednictvím

series_monthly_decompose_anomalies_fl()

Syntax

Parameters

Function definition

Example

Váš názor

Další materiály