Azure Monitor Baseline Alerts
Download AlertsGlossaryGitHubGitHub IssuesToggle Dark/Light/Auto modeToggle Dark/Light/Auto modeToggle Dark/Light/Auto modeBack to homepage

accounts

NameTypeDescription
AzureOpenAIContextTokensCacheMatchRateMetricPercentage of the prompt tokens hit the cache, avaiable for PTU-managed.
AzureOpenAIProvisionedManagedUtilizationV2MetricUtilization % for a provisoned-managed deployment, calculated as (PTUs consumed / PTUs deployed) x 100.
AzureOpenAITimeToResponseMetricRecommended latency (responsiveness) measure for streaming requests. Time in milliseconds.

Dashboards:

Click a tab to view the dashboard template

{
  "__inputs": [],
  "__elements": {},
  "__requires": [
    {
      "type": "panel",
      "id": "bargauge",
      "name": "Bar gauge",
      "version": ""
    },
    {
      "type": "grafana",
      "id": "grafana",
      "name": "Grafana",
      "version": "9.5.12"
    },
    {
      "type": "datasource",
      "id": "grafana-azure-monitor-datasource",
      "name": "Azure Monitor",
      "version": "1.0.0"
    }
  ],
  "title": "Accounts",
  "editable": true,
  "links": [],
  "liveNow": false,
  "panels": [
    {
      "title": "AzureOpenAIContextTokensCacheMatchRate",
      "datasource": {
        "type": "grafana-azure-monitor-datasource",
        "uid": "${ds}"
      },
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "thresholds"
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "#808080",
                "value": null
              },
              {
                
                "color": "dark-green",                
                "value": 0
              },
              {
                
                "color": "dark-red",                
                "value": 75
              }
            ]
          }
        },
        "overrides": []
      },
      "gridPos": {
        "h": 8,
        "w": 12,
        "x": 0,
        "y": 0
      },
      "id": 1,
      "options": {
        "displayMode": "basic",
        "minVizHeight": 10,
        "minVizWidth": 0,
        "orientation": "horizontal",
        "reduceOptions": {
          "calcs": [
            "lastNotNull"
          ],
          "fields": "",
          "values": true
        },
        "showUnfilled": true,
        "valueMode": "color"
      },
      "pluginVersion": "9.5.12",
      "targets": [
        {
          "azureLogAnalytics": {

            "query": "AzureMetrics\r\n| where _ResourceId has 'Microsoft.CognitiveServices/accounts'\r\n| where MetricName has 'AzureOpenAIContextTokensCacheMatchRate'\r\n| summarize metric = avg(Total) by _ResourceId, Resource",
            "resources": [
              "/subscriptions/$sub"
            ]
          },
          "azureMonitor": {
            "allowedTimeGrainsMs": [],
            "timeGrain": "auto"
          },
          "datasource": {
            "type": "grafana-azure-monitor-datasource",
            "uid": "${ds}"
          },
          "queryType": "Azure Log Analytics",
          "refId": "A"
        }
      ],
      "transformations": [
        {
          "id": "organize",
          "options": {
            "excludeByName": {
              "_ResourceId": true
            },
            "indexByName": {},
            "renameByName": {}
          }
        }
      ],
      "type": "bargauge"
    },
    {
      "title": "AzureOpenAIProvisionedManagedUtilizationV2",
      "datasource": {
        "type": "grafana-azure-monitor-datasource",
        "uid": "${ds}"
      },
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "thresholds"
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "#808080",
                "value": null
              },
              {
                
                "color": "dark-green",                
                "value": 0
              },
              {
                
                "color": "dark-red",                
                "value": 80
              }
            ]
          }
        },
        "overrides": []
      },
      "gridPos": {
        "h": 8,
        "w": 12,
        "x": 12,
        "y": 0
      },
      "id": 2,
      "options": {
        "displayMode": "basic",
        "minVizHeight": 10,
        "minVizWidth": 0,
        "orientation": "horizontal",
        "reduceOptions": {
          "calcs": [
            "lastNotNull"
          ],
          "fields": "",
          "values": true
        },
        "showUnfilled": true,
        "valueMode": "color"
      },
      "pluginVersion": "9.5.12",
      "targets": [
        {
          "azureLogAnalytics": {

            "query": "AzureMetrics\r\n| where _ResourceId has 'Microsoft.CognitiveServices/accounts'\r\n| where MetricName has 'AzureOpenAIProvisionedManagedUtilizationV2'\r\n| summarize metric = avg(Total) by _ResourceId, Resource",
            "resources": [
              "/subscriptions/$sub"
            ]
          },
          "azureMonitor": {
            "allowedTimeGrainsMs": [],
            "timeGrain": "auto"
          },
          "datasource": {
            "type": "grafana-azure-monitor-datasource",
            "uid": "${ds}"
          },
          "queryType": "Azure Log Analytics",
          "refId": "A"
        }
      ],
      "transformations": [
        {
          "id": "organize",
          "options": {
            "excludeByName": {
              "_ResourceId": true
            },
            "indexByName": {},
            "renameByName": {}
          }
        }
      ],
      "type": "bargauge"
    },
    {
      "title": "AzureOpenAITimeToResponse",
      "datasource": {
        "type": "grafana-azure-monitor-datasource",
        "uid": "${ds}"
      },
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "thresholds"
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "#808080",
                "value": null
              },
              {
                
                "color": "dark-green",                
                "value": 0
              },
              {
                
                "color": "dark-red",                
                "value": 200
              }
            ]
          }
        },
        "overrides": []
      },
      "gridPos": {
        "h": 8,
        "w": 12,
        "x": 0,
        "y": 8
      },
      "id": 3,
      "options": {
        "displayMode": "basic",
        "minVizHeight": 10,
        "minVizWidth": 0,
        "orientation": "horizontal",
        "reduceOptions": {
          "calcs": [
            "lastNotNull"
          ],
          "fields": "",
          "values": true
        },
        "showUnfilled": true,
        "valueMode": "color"
      },
      "pluginVersion": "9.5.12",
      "targets": [
        {
          "azureLogAnalytics": {

            "query": "AzureMetrics\r\n| where _ResourceId has 'Microsoft.CognitiveServices/accounts'\r\n| where MetricName has 'AzureOpenAITimeToResponse'\r\n| summarize metric = avg(Total) by _ResourceId, Resource",
            "resources": [
              "/subscriptions/$sub"
            ]
          },
          "azureMonitor": {
            "allowedTimeGrainsMs": [],
            "timeGrain": "auto"
          },
          "datasource": {
            "type": "grafana-azure-monitor-datasource",
            "uid": "${ds}"
          },
          "queryType": "Azure Log Analytics",
          "refId": "A"
        }
      ],
      "transformations": [
        {
          "id": "organize",
          "options": {
            "excludeByName": {
              "_ResourceId": true
            },
            "indexByName": {},
            "renameByName": {}
          }
        }
      ],
      "type": "bargauge"
    }
  ],
  "refresh": "",
  "schemaVersion": 38,
  "style": "dark",
  "tags": [],
  "templating": {
    "list": [
      {
        "current": {},
        "hide": 0,
        "includeAll": false,
        "label": "Datasource",
        "multi": false,
        "name": "ds",
        "options": [],
        "query": "grafana-azure-monitor-datasource",
        "queryValue": "",
        "refresh": 1,
        "regex": "",
        "skipUrlSync": false,
        "type": "datasource"
      },
      {
        "current": {},
        "datasource": {
          "type": "grafana-azure-monitor-datasource",
          "uid": "${ds}"
        },
        "definition": "",
        "hide": 0,
        "includeAll": false,
        "label": "Subscription",
        "multi": false,
        "name": "sub",
        "options": [],
        "query": {
          "azureLogAnalytics": {
            "query": "",
            "resources": []
          },
          "queryType": "Azure Subscriptions",
          "refId": "A"
        },
        "refresh": 1,
        "regex": "",
        "skipUrlSync": false,
        "sort": 0,
        "type": "query"
      }
    ]
  },
  "time": {
    "from": "now-6h",
    "to": "now"
  },
  "timepicker": {},
  "timezone": "",
  "version": null
}



AzureOpenAIContextTokensCacheMatchRate - Metric Alert

Percentage of the prompt tokens hit the cache, avaiable for PTU-managed.

Properties:

criterionTypeStaticThresholdCriterion
evaluationFrequencyPT1M
metricNameAzureOpenAIContextTokensCacheMatchRate
metricNamespaceMicrosoft.CognitiveServices/accounts
operatorGreaterThan
severity2
threshold75
timeAggregationTotal
windowSizePT5M

References:

Templates:

{
  "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#",
  "contentVersion": "1.0.0.0",
  "parameters": {
    "alertName": {
      "type": "string",
      "minLength": 1,
      "metadata": {
        "description": "Name of the alert"
      }
    },
    "alertDescription": {
      "type": "string",
      "defaultValue": "Percentage of the prompt tokens hit the cache, avaiable for PTU-managed.",
      "metadata": {
        "description": "Description of alert"
      }
    },
    "targetResourceId": {
      "type": "string",
      "minLength": 1,
      "metadata": {
        "description": "List of Azure resource Ids seperated by a comma. For example - /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroup/resource-group-name/Microsoft.compute/virtualMachines/vm-name"
      }
    },
    "targetResourceRegion": {
      "type": "string",
      "metadata": {
        "description": "Azure region in which target resources to be monitored are in (without spaces). For example: EastUS"
      }
    },
    "targetResourceType": {
      "type": "string",
      "minLength": 1,
      "metadata": {
        "description": "Resource type of target resources to be monitored."
      }
    },
    "isEnabled": {
      "type": "bool",
      "defaultValue": true,
      "metadata": {
        "description": "Specifies whether the alert is enabled"
      }
    },
    "alertSeverity": {
      "type": "int",
      "defaultValue": 2,
      "allowedValues": [
        0,
        1,
        2,
        3,
        4
      ],
      "metadata": {
        "description": "Severity of alert {0,1,2,3,4}"
      }
    },
    "operator": {
      "type": "string",
      "defaultValue": "GreaterThan",
      "allowedValues": [
        "Equals",
        "GreaterThan",
        "GreaterThanOrEqual",
        "LessThan",
        "LessThanOrEqual"
      ],
      "metadata": {
        "description": "Operator comparing the current value with the threshold value."
      }
    },
    "threshold": {
      "type": "string",
      "defaultValue": "75",
      "metadata": {
        "description": "The threshold value at which the alert is activated."
      }
    },
    "timeAggregation": {
      "type": "string",
      "defaultValue": "Total",
      "allowedValues": [
        "Average",
        "Minimum",
        "Maximum",
        "Total",
        "Count"
      ],
      "metadata": {
        "description": "How the data that is collected should be combined over time."
      }
    },
    "windowSize": {
      "type": "string",
      "defaultValue": "PT5M",
      "allowedValues": [
        "PT1M",
        "PT5M",
        "PT15M",
        "PT30M",
        "PT1H",
        "PT6H",
        "PT12H",
        "PT24H",
        "PT1D"
      ],
      "metadata": {
        "description": "Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format."
      }
    },
    "evaluationFrequency": {
      "type": "string",
      "defaultValue": "PT1M",
      "allowedValues": [
        "PT1M",
        "PT5M",
        "PT15M",
        "PT30M",
        "PT1H"
      ],
      "metadata": {
        "description": "how often the metric alert is evaluated represented in ISO 8601 duration format"
      }
    },
    "currentDateTimeUtcNow": {
      "type": "string",
      "defaultValue": "[utcNow()]",
      "metadata": {
          "description": "The current date and time using the utcNow function. Used for deployment name uniqueness"
      }
    },
    "telemetryOptOut": {
      "type": "string",
      "defaultValue": "No",
      "allowedValues": [
          "Yes",
          "No"
      ],
      "metadata": {
          "description": "The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry."
      }
    }
  },
  "variables": {
    "pidDeploymentName": "[take(concat('pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-', uniqueString(resourceGroup().id, parameters('alertName'), parameters('currentDateTimeUtcNow'))), 64)]",
    "varTargetResourceId": "[split(parameters('targetResourceId'), ',')]"
  },
  "resources": [
    {
      "type": "Microsoft.Insights/metricAlerts",
      "apiVersion": "2018-03-01",
      "name": "[parameters('alertName')]",
      "location": "global",
      "tags": {
        "_deployed_by_amba": true
      },
      "properties": {
        "description": "[parameters('alertDescription')]",
        "scopes": "[variables('varTargetResourceId')]",
        "targetResourceType": "[parameters('targetResourceType')]",
        "targetResourceRegion": "[parameters('targetResourceRegion')]",
        "severity": "[parameters('alertSeverity')]",
        "enabled": "[parameters('isEnabled')]",
        "evaluationFrequency": "[parameters('evaluationFrequency')]",
        "windowSize": "[parameters('windowSize')]",
        "criteria": {
          "odata.type": "Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria",
          "allOf": [
            {
              "name": "1st criterion",
              "metricName": "AzureOpenAIContextTokensCacheMatchRate",
              "dimensions": [],
              "operator": "[parameters('operator')]",
              "threshold": "[parameters('threshold')]",
              "timeAggregation": "[parameters('timeAggregation')]",
              "criterionType": "StaticThresholdCriterion"
            }
          ]
        }
      }
    },
    {
      "condition": "[equals(parameters('telemetryOptOut'), 'No')]",
      "apiVersion": "2020-06-01",
      "name": "[variables('pidDeploymentName')]",
      "type": "Microsoft.Resources/deployments",
      "properties": {
          "mode": "Incremental",
          "template": {
              "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#",
              "contentVersion": "1.0.0.0",
              "resources": []
          }
      }
    }
  ]
}
@description('Name of the alert')
@minLength(1)
param alertName string

@description('Description of alert')
param alertDescription string = 'Percentage of the prompt tokens hit the cache, avaiable for PTU-managed.'

@description('Array of Azure resource Ids. For example - /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroup/resource-group-name/Microsoft.compute/virtualMachines/vm-name')
@minLength(1)
param targetResourceId array

@description('Azure region in which target resources to be monitored are in (without spaces). For example: EastUS')
param targetResourceRegion string

@description('Resource type of target resources to be monitored.')
@minLength(1)
param targetResourceType string

@description('Specifies whether the alert is enabled')
param isEnabled bool = true

@description('Severity of alert {0,1,2,3,4}')
@allowed([
  0
  1
  2
  3
  4
])
param alertSeverity int = 2

@description('Operator comparing the current value with the threshold value.')
@allowed([
  'Equals'
  'GreaterThan'
  'GreaterThanOrEqual'
  'LessThan'
  'LessThanOrEqual'
])
param operator string = 'GreaterThan'

@description('The threshold value at which the alert is activated.')
param threshold int = 75

@description('How the data that is collected should be combined over time.')
@allowed([
  'Average'
  'Minimum'
  'Maximum'
  'Total'
  'Count'
])
param timeAggregation string = 'Total'

@description('Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format.')
@allowed([
  'PT1M'
  'PT5M'
  'PT15M'
  'PT30M'
  'PT1H'
  'PT6H'
  'PT12H'
  'PT24H'
  'P1D'
])
param windowSize string = 'PT5M'

@description('how often the metric alert is evaluated represented in ISO 8601 duration format')
@allowed([
  'PT1M'
  'PT5M'
  'PT15M'
  'PT30M'
  'PT1H'
])
param evaluationFrequency string = 'PT1M'

@description('"The current date and time using the utcNow function. Used for deployment name uniqueness')
param currentDateTimeUtcNow string = utcNow()

@description('The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry.')
@allowed([
  'Yes'
  'No'
])
param telemetryOptOut string = 'No'

resource metricAlert 'Microsoft.Insights/metricAlerts@2018-03-01' = {
  name: alertName
  location: 'global'
  tags: {
    _deployed_by_amba: 'true'
  }
  properties: {
    description: alertDescription
    scopes: targetResourceId
    targetResourceType: targetResourceType
    targetResourceRegion: targetResourceRegion
    severity: alertSeverity
    enabled: isEnabled
    evaluationFrequency: evaluationFrequency
    windowSize: windowSize
    criteria: {
      'odata.type': 'Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria'
      allOf: [
        {
          name: '1st criterion'
          metricName: 'AzureOpenAIContextTokensCacheMatchRate'
          dimensions: [[]]
          operator: operator
          threshold: threshold
          timeAggregation: timeAggregation
          criterionType: 'StaticThresholdCriterion'
        }
      ]
    }
  }
}

var ambaTelemetryPidName = 'pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-${uniqueString(resourceGroup().id, alertName, currentDateTimeUtcNow)}'
resource ambaTelemetryPid 'Microsoft.Resources/deployments@2020-06-01' =  if (telemetryOptOut == 'No') {
  name: ambaTelemetryPidName
  tags: {
    _deployed_by_amba: 'true'
  }
  properties: {
    mode: 'Incremental'
    template: {
      '$schema': 'https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#'
      contentVersion: '1.0.0.0'
      resources: []
    }
  }
}



AzureOpenAIProvisionedManagedUtilizationV2 - Metric Alert

Utilization % for a provisoned-managed deployment, calculated as (PTUs consumed / PTUs deployed) x 100.

Properties:

criterionTypeStaticThresholdCriterion
evaluationFrequencyPT1M
metricNameAzureOpenAIProvisionedManagedUtilizationV2
metricNamespaceMicrosoft.CognitiveServices/accounts
operatorGreaterThan
severity2
threshold80
timeAggregationTotal
windowSizePT5M

References:

Templates:

{
  "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#",
  "contentVersion": "1.0.0.0",
  "parameters": {
    "alertName": {
      "type": "string",
      "minLength": 1,
      "metadata": {
        "description": "Name of the alert"
      }
    },
    "alertDescription": {
      "type": "string",
      "defaultValue": "Utilization % for a provisoned-managed deployment, calculated as (PTUs consumed / PTUs deployed) x 100.",
      "metadata": {
        "description": "Description of alert"
      }
    },
    "targetResourceId": {
      "type": "string",
      "minLength": 1,
      "metadata": {
        "description": "List of Azure resource Ids seperated by a comma. For example - /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroup/resource-group-name/Microsoft.compute/virtualMachines/vm-name"
      }
    },
    "targetResourceRegion": {
      "type": "string",
      "metadata": {
        "description": "Azure region in which target resources to be monitored are in (without spaces). For example: EastUS"
      }
    },
    "targetResourceType": {
      "type": "string",
      "minLength": 1,
      "metadata": {
        "description": "Resource type of target resources to be monitored."
      }
    },
    "isEnabled": {
      "type": "bool",
      "defaultValue": true,
      "metadata": {
        "description": "Specifies whether the alert is enabled"
      }
    },
    "alertSeverity": {
      "type": "int",
      "defaultValue": 2,
      "allowedValues": [
        0,
        1,
        2,
        3,
        4
      ],
      "metadata": {
        "description": "Severity of alert {0,1,2,3,4}"
      }
    },
    "operator": {
      "type": "string",
      "defaultValue": "GreaterThan",
      "allowedValues": [
        "Equals",
        "GreaterThan",
        "GreaterThanOrEqual",
        "LessThan",
        "LessThanOrEqual"
      ],
      "metadata": {
        "description": "Operator comparing the current value with the threshold value."
      }
    },
    "threshold": {
      "type": "string",
      "defaultValue": "80",
      "metadata": {
        "description": "The threshold value at which the alert is activated."
      }
    },
    "timeAggregation": {
      "type": "string",
      "defaultValue": "Total",
      "allowedValues": [
        "Average",
        "Minimum",
        "Maximum",
        "Total",
        "Count"
      ],
      "metadata": {
        "description": "How the data that is collected should be combined over time."
      }
    },
    "windowSize": {
      "type": "string",
      "defaultValue": "PT5M",
      "allowedValues": [
        "PT1M",
        "PT5M",
        "PT15M",
        "PT30M",
        "PT1H",
        "PT6H",
        "PT12H",
        "PT24H",
        "PT1D"
      ],
      "metadata": {
        "description": "Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format."
      }
    },
    "evaluationFrequency": {
      "type": "string",
      "defaultValue": "PT1M",
      "allowedValues": [
        "PT1M",
        "PT5M",
        "PT15M",
        "PT30M",
        "PT1H"
      ],
      "metadata": {
        "description": "how often the metric alert is evaluated represented in ISO 8601 duration format"
      }
    },
    "currentDateTimeUtcNow": {
      "type": "string",
      "defaultValue": "[utcNow()]",
      "metadata": {
          "description": "The current date and time using the utcNow function. Used for deployment name uniqueness"
      }
    },
    "telemetryOptOut": {
      "type": "string",
      "defaultValue": "No",
      "allowedValues": [
          "Yes",
          "No"
      ],
      "metadata": {
          "description": "The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry."
      }
    }
  },
  "variables": {
    "pidDeploymentName": "[take(concat('pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-', uniqueString(resourceGroup().id, parameters('alertName'), parameters('currentDateTimeUtcNow'))), 64)]",
    "varTargetResourceId": "[split(parameters('targetResourceId'), ',')]"
  },
  "resources": [
    {
      "type": "Microsoft.Insights/metricAlerts",
      "apiVersion": "2018-03-01",
      "name": "[parameters('alertName')]",
      "location": "global",
      "tags": {
        "_deployed_by_amba": true
      },
      "properties": {
        "description": "[parameters('alertDescription')]",
        "scopes": "[variables('varTargetResourceId')]",
        "targetResourceType": "[parameters('targetResourceType')]",
        "targetResourceRegion": "[parameters('targetResourceRegion')]",
        "severity": "[parameters('alertSeverity')]",
        "enabled": "[parameters('isEnabled')]",
        "evaluationFrequency": "[parameters('evaluationFrequency')]",
        "windowSize": "[parameters('windowSize')]",
        "criteria": {
          "odata.type": "Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria",
          "allOf": [
            {
              "name": "1st criterion",
              "metricName": "AzureOpenAIProvisionedManagedUtilizationV2",
              "dimensions": [],
              "operator": "[parameters('operator')]",
              "threshold": "[parameters('threshold')]",
              "timeAggregation": "[parameters('timeAggregation')]",
              "criterionType": "StaticThresholdCriterion"
            }
          ]
        }
      }
    },
    {
      "condition": "[equals(parameters('telemetryOptOut'), 'No')]",
      "apiVersion": "2020-06-01",
      "name": "[variables('pidDeploymentName')]",
      "type": "Microsoft.Resources/deployments",
      "properties": {
          "mode": "Incremental",
          "template": {
              "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#",
              "contentVersion": "1.0.0.0",
              "resources": []
          }
      }
    }
  ]
}
@description('Name of the alert')
@minLength(1)
param alertName string

@description('Description of alert')
param alertDescription string = 'Utilization % for a provisoned-managed deployment, calculated as (PTUs consumed / PTUs deployed) x 100.'

@description('Array of Azure resource Ids. For example - /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroup/resource-group-name/Microsoft.compute/virtualMachines/vm-name')
@minLength(1)
param targetResourceId array

@description('Azure region in which target resources to be monitored are in (without spaces). For example: EastUS')
param targetResourceRegion string

@description('Resource type of target resources to be monitored.')
@minLength(1)
param targetResourceType string

@description('Specifies whether the alert is enabled')
param isEnabled bool = true

@description('Severity of alert {0,1,2,3,4}')
@allowed([
  0
  1
  2
  3
  4
])
param alertSeverity int = 2

@description('Operator comparing the current value with the threshold value.')
@allowed([
  'Equals'
  'GreaterThan'
  'GreaterThanOrEqual'
  'LessThan'
  'LessThanOrEqual'
])
param operator string = 'GreaterThan'

@description('The threshold value at which the alert is activated.')
param threshold int = 80

@description('How the data that is collected should be combined over time.')
@allowed([
  'Average'
  'Minimum'
  'Maximum'
  'Total'
  'Count'
])
param timeAggregation string = 'Total'

@description('Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format.')
@allowed([
  'PT1M'
  'PT5M'
  'PT15M'
  'PT30M'
  'PT1H'
  'PT6H'
  'PT12H'
  'PT24H'
  'P1D'
])
param windowSize string = 'PT5M'

@description('how often the metric alert is evaluated represented in ISO 8601 duration format')
@allowed([
  'PT1M'
  'PT5M'
  'PT15M'
  'PT30M'
  'PT1H'
])
param evaluationFrequency string = 'PT1M'

@description('"The current date and time using the utcNow function. Used for deployment name uniqueness')
param currentDateTimeUtcNow string = utcNow()

@description('The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry.')
@allowed([
  'Yes'
  'No'
])
param telemetryOptOut string = 'No'

resource metricAlert 'Microsoft.Insights/metricAlerts@2018-03-01' = {
  name: alertName
  location: 'global'
  tags: {
    _deployed_by_amba: 'true'
  }
  properties: {
    description: alertDescription
    scopes: targetResourceId
    targetResourceType: targetResourceType
    targetResourceRegion: targetResourceRegion
    severity: alertSeverity
    enabled: isEnabled
    evaluationFrequency: evaluationFrequency
    windowSize: windowSize
    criteria: {
      'odata.type': 'Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria'
      allOf: [
        {
          name: '1st criterion'
          metricName: 'AzureOpenAIProvisionedManagedUtilizationV2'
          dimensions: [[]]
          operator: operator
          threshold: threshold
          timeAggregation: timeAggregation
          criterionType: 'StaticThresholdCriterion'
        }
      ]
    }
  }
}

var ambaTelemetryPidName = 'pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-${uniqueString(resourceGroup().id, alertName, currentDateTimeUtcNow)}'
resource ambaTelemetryPid 'Microsoft.Resources/deployments@2020-06-01' =  if (telemetryOptOut == 'No') {
  name: ambaTelemetryPidName
  tags: {
    _deployed_by_amba: 'true'
  }
  properties: {
    mode: 'Incremental'
    template: {
      '$schema': 'https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#'
      contentVersion: '1.0.0.0'
      resources: []
    }
  }
}



AzureOpenAITimeToResponse - Metric Alert

Recommended latency (responsiveness) measure for streaming requests. Time in milliseconds.

Properties:

criterionTypeStaticThresholdCriterion
evaluationFrequencyPT1M
metricNameAzureOpenAITimeToResponse
metricNamespaceMicrosoft.CognitiveServices/accounts
operatorGreaterThan
severity2
threshold200
timeAggregationTotal
windowSizePT5M

References:

Templates:

{
  "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#",
  "contentVersion": "1.0.0.0",
  "parameters": {
    "alertName": {
      "type": "string",
      "minLength": 1,
      "metadata": {
        "description": "Name of the alert"
      }
    },
    "alertDescription": {
      "type": "string",
      "defaultValue": "Recommended latency (responsiveness) measure for streaming requests. Time in milliseconds.",
      "metadata": {
        "description": "Description of alert"
      }
    },
    "targetResourceId": {
      "type": "string",
      "minLength": 1,
      "metadata": {
        "description": "List of Azure resource Ids seperated by a comma. For example - /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroup/resource-group-name/Microsoft.compute/virtualMachines/vm-name"
      }
    },
    "targetResourceRegion": {
      "type": "string",
      "metadata": {
        "description": "Azure region in which target resources to be monitored are in (without spaces). For example: EastUS"
      }
    },
    "targetResourceType": {
      "type": "string",
      "minLength": 1,
      "metadata": {
        "description": "Resource type of target resources to be monitored."
      }
    },
    "isEnabled": {
      "type": "bool",
      "defaultValue": true,
      "metadata": {
        "description": "Specifies whether the alert is enabled"
      }
    },
    "alertSeverity": {
      "type": "int",
      "defaultValue": 2,
      "allowedValues": [
        0,
        1,
        2,
        3,
        4
      ],
      "metadata": {
        "description": "Severity of alert {0,1,2,3,4}"
      }
    },
    "operator": {
      "type": "string",
      "defaultValue": "GreaterThan",
      "allowedValues": [
        "Equals",
        "GreaterThan",
        "GreaterThanOrEqual",
        "LessThan",
        "LessThanOrEqual"
      ],
      "metadata": {
        "description": "Operator comparing the current value with the threshold value."
      }
    },
    "threshold": {
      "type": "string",
      "defaultValue": "200",
      "metadata": {
        "description": "The threshold value at which the alert is activated."
      }
    },
    "timeAggregation": {
      "type": "string",
      "defaultValue": "Total",
      "allowedValues": [
        "Average",
        "Minimum",
        "Maximum",
        "Total",
        "Count"
      ],
      "metadata": {
        "description": "How the data that is collected should be combined over time."
      }
    },
    "windowSize": {
      "type": "string",
      "defaultValue": "PT5M",
      "allowedValues": [
        "PT1M",
        "PT5M",
        "PT15M",
        "PT30M",
        "PT1H",
        "PT6H",
        "PT12H",
        "PT24H",
        "PT1D"
      ],
      "metadata": {
        "description": "Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format."
      }
    },
    "evaluationFrequency": {
      "type": "string",
      "defaultValue": "PT1M",
      "allowedValues": [
        "PT1M",
        "PT5M",
        "PT15M",
        "PT30M",
        "PT1H"
      ],
      "metadata": {
        "description": "how often the metric alert is evaluated represented in ISO 8601 duration format"
      }
    },
    "currentDateTimeUtcNow": {
      "type": "string",
      "defaultValue": "[utcNow()]",
      "metadata": {
          "description": "The current date and time using the utcNow function. Used for deployment name uniqueness"
      }
    },
    "telemetryOptOut": {
      "type": "string",
      "defaultValue": "No",
      "allowedValues": [
          "Yes",
          "No"
      ],
      "metadata": {
          "description": "The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry."
      }
    }
  },
  "variables": {
    "pidDeploymentName": "[take(concat('pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-', uniqueString(resourceGroup().id, parameters('alertName'), parameters('currentDateTimeUtcNow'))), 64)]",
    "varTargetResourceId": "[split(parameters('targetResourceId'), ',')]"
  },
  "resources": [
    {
      "type": "Microsoft.Insights/metricAlerts",
      "apiVersion": "2018-03-01",
      "name": "[parameters('alertName')]",
      "location": "global",
      "tags": {
        "_deployed_by_amba": true
      },
      "properties": {
        "description": "[parameters('alertDescription')]",
        "scopes": "[variables('varTargetResourceId')]",
        "targetResourceType": "[parameters('targetResourceType')]",
        "targetResourceRegion": "[parameters('targetResourceRegion')]",
        "severity": "[parameters('alertSeverity')]",
        "enabled": "[parameters('isEnabled')]",
        "evaluationFrequency": "[parameters('evaluationFrequency')]",
        "windowSize": "[parameters('windowSize')]",
        "criteria": {
          "odata.type": "Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria",
          "allOf": [
            {
              "name": "1st criterion",
              "metricName": "AzureOpenAITimeToResponse",
              "dimensions": [],
              "operator": "[parameters('operator')]",
              "threshold": "[parameters('threshold')]",
              "timeAggregation": "[parameters('timeAggregation')]",
              "criterionType": "StaticThresholdCriterion"
            }
          ]
        }
      }
    },
    {
      "condition": "[equals(parameters('telemetryOptOut'), 'No')]",
      "apiVersion": "2020-06-01",
      "name": "[variables('pidDeploymentName')]",
      "type": "Microsoft.Resources/deployments",
      "properties": {
          "mode": "Incremental",
          "template": {
              "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#",
              "contentVersion": "1.0.0.0",
              "resources": []
          }
      }
    }
  ]
}
@description('Name of the alert')
@minLength(1)
param alertName string

@description('Description of alert')
param alertDescription string = 'Recommended latency (responsiveness) measure for streaming requests. Time in milliseconds.'

@description('Array of Azure resource Ids. For example - /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroup/resource-group-name/Microsoft.compute/virtualMachines/vm-name')
@minLength(1)
param targetResourceId array

@description('Azure region in which target resources to be monitored are in (without spaces). For example: EastUS')
param targetResourceRegion string

@description('Resource type of target resources to be monitored.')
@minLength(1)
param targetResourceType string

@description('Specifies whether the alert is enabled')
param isEnabled bool = true

@description('Severity of alert {0,1,2,3,4}')
@allowed([
  0
  1
  2
  3
  4
])
param alertSeverity int = 2

@description('Operator comparing the current value with the threshold value.')
@allowed([
  'Equals'
  'GreaterThan'
  'GreaterThanOrEqual'
  'LessThan'
  'LessThanOrEqual'
])
param operator string = 'GreaterThan'

@description('The threshold value at which the alert is activated.')
param threshold int = 200

@description('How the data that is collected should be combined over time.')
@allowed([
  'Average'
  'Minimum'
  'Maximum'
  'Total'
  'Count'
])
param timeAggregation string = 'Total'

@description('Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format.')
@allowed([
  'PT1M'
  'PT5M'
  'PT15M'
  'PT30M'
  'PT1H'
  'PT6H'
  'PT12H'
  'PT24H'
  'P1D'
])
param windowSize string = 'PT5M'

@description('how often the metric alert is evaluated represented in ISO 8601 duration format')
@allowed([
  'PT1M'
  'PT5M'
  'PT15M'
  'PT30M'
  'PT1H'
])
param evaluationFrequency string = 'PT1M'

@description('"The current date and time using the utcNow function. Used for deployment name uniqueness')
param currentDateTimeUtcNow string = utcNow()

@description('The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry.')
@allowed([
  'Yes'
  'No'
])
param telemetryOptOut string = 'No'

resource metricAlert 'Microsoft.Insights/metricAlerts@2018-03-01' = {
  name: alertName
  location: 'global'
  tags: {
    _deployed_by_amba: 'true'
  }
  properties: {
    description: alertDescription
    scopes: targetResourceId
    targetResourceType: targetResourceType
    targetResourceRegion: targetResourceRegion
    severity: alertSeverity
    enabled: isEnabled
    evaluationFrequency: evaluationFrequency
    windowSize: windowSize
    criteria: {
      'odata.type': 'Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria'
      allOf: [
        {
          name: '1st criterion'
          metricName: 'AzureOpenAITimeToResponse'
          dimensions: [[]]
          operator: operator
          threshold: threshold
          timeAggregation: timeAggregation
          criterionType: 'StaticThresholdCriterion'
        }
      ]
    }
  }
}

var ambaTelemetryPidName = 'pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-${uniqueString(resourceGroup().id, alertName, currentDateTimeUtcNow)}'
resource ambaTelemetryPid 'Microsoft.Resources/deployments@2020-06-01' =  if (telemetryOptOut == 'No') {
  name: ambaTelemetryPidName
  tags: {
    _deployed_by_amba: 'true'
  }
  properties: {
    mode: 'Incremental'
    template: {
      '$schema': 'https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#'
      contentVersion: '1.0.0.0'
      resources: []
    }
  }
}