accounts
Name | Type | Description |
---|---|---|
AzureOpenAIContextTokensCacheMatchRate | Metric | Percentage of the prompt tokens hit the cache, avaiable for PTU-managed. |
AzureOpenAIProvisionedManagedUtilizationV2 | Metric | Utilization % for a provisoned-managed deployment, calculated as (PTUs consumed / PTUs deployed) x 100. |
AzureOpenAITimeToResponse | Metric | Recommended latency (responsiveness) measure for streaming requests. Time in milliseconds. |
Dashboards:
Click a tab to view the dashboard template
{
"__inputs": [],
"__elements": {},
"__requires": [
{
"type": "panel",
"id": "bargauge",
"name": "Bar gauge",
"version": ""
},
{
"type": "grafana",
"id": "grafana",
"name": "Grafana",
"version": "9.5.12"
},
{
"type": "datasource",
"id": "grafana-azure-monitor-datasource",
"name": "Azure Monitor",
"version": "1.0.0"
}
],
"title": "Accounts",
"editable": true,
"links": [],
"liveNow": false,
"panels": [
{
"title": "AzureOpenAIContextTokensCacheMatchRate",
"datasource": {
"type": "grafana-azure-monitor-datasource",
"uid": "${ds}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "#808080",
"value": null
},
{
"color": "dark-green",
"value": 0
},
{
"color": "dark-red",
"value": 75
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 0
},
"id": 1,
"options": {
"displayMode": "basic",
"minVizHeight": 10,
"minVizWidth": 0,
"orientation": "horizontal",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": true
},
"showUnfilled": true,
"valueMode": "color"
},
"pluginVersion": "9.5.12",
"targets": [
{
"azureLogAnalytics": {
"query": "AzureMetrics\r\n| where _ResourceId has 'Microsoft.CognitiveServices/accounts'\r\n| where MetricName has 'AzureOpenAIContextTokensCacheMatchRate'\r\n| summarize metric = avg(Total) by _ResourceId, Resource",
"resources": [
"/subscriptions/$sub"
]
},
"azureMonitor": {
"allowedTimeGrainsMs": [],
"timeGrain": "auto"
},
"datasource": {
"type": "grafana-azure-monitor-datasource",
"uid": "${ds}"
},
"queryType": "Azure Log Analytics",
"refId": "A"
}
],
"transformations": [
{
"id": "organize",
"options": {
"excludeByName": {
"_ResourceId": true
},
"indexByName": {},
"renameByName": {}
}
}
],
"type": "bargauge"
},
{
"title": "AzureOpenAIProvisionedManagedUtilizationV2",
"datasource": {
"type": "grafana-azure-monitor-datasource",
"uid": "${ds}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "#808080",
"value": null
},
{
"color": "dark-green",
"value": 0
},
{
"color": "dark-red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 0
},
"id": 2,
"options": {
"displayMode": "basic",
"minVizHeight": 10,
"minVizWidth": 0,
"orientation": "horizontal",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": true
},
"showUnfilled": true,
"valueMode": "color"
},
"pluginVersion": "9.5.12",
"targets": [
{
"azureLogAnalytics": {
"query": "AzureMetrics\r\n| where _ResourceId has 'Microsoft.CognitiveServices/accounts'\r\n| where MetricName has 'AzureOpenAIProvisionedManagedUtilizationV2'\r\n| summarize metric = avg(Total) by _ResourceId, Resource",
"resources": [
"/subscriptions/$sub"
]
},
"azureMonitor": {
"allowedTimeGrainsMs": [],
"timeGrain": "auto"
},
"datasource": {
"type": "grafana-azure-monitor-datasource",
"uid": "${ds}"
},
"queryType": "Azure Log Analytics",
"refId": "A"
}
],
"transformations": [
{
"id": "organize",
"options": {
"excludeByName": {
"_ResourceId": true
},
"indexByName": {},
"renameByName": {}
}
}
],
"type": "bargauge"
},
{
"title": "AzureOpenAITimeToResponse",
"datasource": {
"type": "grafana-azure-monitor-datasource",
"uid": "${ds}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "#808080",
"value": null
},
{
"color": "dark-green",
"value": 0
},
{
"color": "dark-red",
"value": 200
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 8
},
"id": 3,
"options": {
"displayMode": "basic",
"minVizHeight": 10,
"minVizWidth": 0,
"orientation": "horizontal",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": true
},
"showUnfilled": true,
"valueMode": "color"
},
"pluginVersion": "9.5.12",
"targets": [
{
"azureLogAnalytics": {
"query": "AzureMetrics\r\n| where _ResourceId has 'Microsoft.CognitiveServices/accounts'\r\n| where MetricName has 'AzureOpenAITimeToResponse'\r\n| summarize metric = avg(Total) by _ResourceId, Resource",
"resources": [
"/subscriptions/$sub"
]
},
"azureMonitor": {
"allowedTimeGrainsMs": [],
"timeGrain": "auto"
},
"datasource": {
"type": "grafana-azure-monitor-datasource",
"uid": "${ds}"
},
"queryType": "Azure Log Analytics",
"refId": "A"
}
],
"transformations": [
{
"id": "organize",
"options": {
"excludeByName": {
"_ResourceId": true
},
"indexByName": {},
"renameByName": {}
}
}
],
"type": "bargauge"
}
],
"refresh": "",
"schemaVersion": 38,
"style": "dark",
"tags": [],
"templating": {
"list": [
{
"current": {},
"hide": 0,
"includeAll": false,
"label": "Datasource",
"multi": false,
"name": "ds",
"options": [],
"query": "grafana-azure-monitor-datasource",
"queryValue": "",
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"type": "datasource"
},
{
"current": {},
"datasource": {
"type": "grafana-azure-monitor-datasource",
"uid": "${ds}"
},
"definition": "",
"hide": 0,
"includeAll": false,
"label": "Subscription",
"multi": false,
"name": "sub",
"options": [],
"query": {
"azureLogAnalytics": {
"query": "",
"resources": []
},
"queryType": "Azure Subscriptions",
"refId": "A"
},
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 0,
"type": "query"
}
]
},
"time": {
"from": "now-6h",
"to": "now"
},
"timepicker": {},
"timezone": "",
"version": null
}
AzureOpenAIContextTokensCacheMatchRate - Metric Alert
Percentage of the prompt tokens hit the cache, avaiable for PTU-managed.
Properties:
criterionType | StaticThresholdCriterion |
evaluationFrequency | PT1M |
metricName | AzureOpenAIContextTokensCacheMatchRate |
metricNamespace | Microsoft.CognitiveServices/accounts |
operator | GreaterThan |
severity | 2 |
threshold | 75 |
timeAggregation | Total |
windowSize | PT5M |
References:
Templates:
{
"$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#",
"contentVersion": "1.0.0.0",
"parameters": {
"alertName": {
"type": "string",
"minLength": 1,
"metadata": {
"description": "Name of the alert"
}
},
"alertDescription": {
"type": "string",
"defaultValue": "Percentage of the prompt tokens hit the cache, avaiable for PTU-managed.",
"metadata": {
"description": "Description of alert"
}
},
"targetResourceId": {
"type": "string",
"minLength": 1,
"metadata": {
"description": "List of Azure resource Ids seperated by a comma. For example - /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroup/resource-group-name/Microsoft.compute/virtualMachines/vm-name"
}
},
"targetResourceRegion": {
"type": "string",
"metadata": {
"description": "Azure region in which target resources to be monitored are in (without spaces). For example: EastUS"
}
},
"targetResourceType": {
"type": "string",
"minLength": 1,
"metadata": {
"description": "Resource type of target resources to be monitored."
}
},
"isEnabled": {
"type": "bool",
"defaultValue": true,
"metadata": {
"description": "Specifies whether the alert is enabled"
}
},
"alertSeverity": {
"type": "int",
"defaultValue": 2,
"allowedValues": [
0,
1,
2,
3,
4
],
"metadata": {
"description": "Severity of alert {0,1,2,3,4}"
}
},
"operator": {
"type": "string",
"defaultValue": "GreaterThan",
"allowedValues": [
"Equals",
"GreaterThan",
"GreaterThanOrEqual",
"LessThan",
"LessThanOrEqual"
],
"metadata": {
"description": "Operator comparing the current value with the threshold value."
}
},
"threshold": {
"type": "string",
"defaultValue": "75",
"metadata": {
"description": "The threshold value at which the alert is activated."
}
},
"timeAggregation": {
"type": "string",
"defaultValue": "Total",
"allowedValues": [
"Average",
"Minimum",
"Maximum",
"Total",
"Count"
],
"metadata": {
"description": "How the data that is collected should be combined over time."
}
},
"windowSize": {
"type": "string",
"defaultValue": "PT5M",
"allowedValues": [
"PT1M",
"PT5M",
"PT15M",
"PT30M",
"PT1H",
"PT6H",
"PT12H",
"PT24H",
"PT1D"
],
"metadata": {
"description": "Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format."
}
},
"evaluationFrequency": {
"type": "string",
"defaultValue": "PT1M",
"allowedValues": [
"PT1M",
"PT5M",
"PT15M",
"PT30M",
"PT1H"
],
"metadata": {
"description": "how often the metric alert is evaluated represented in ISO 8601 duration format"
}
},
"currentDateTimeUtcNow": {
"type": "string",
"defaultValue": "[utcNow()]",
"metadata": {
"description": "The current date and time using the utcNow function. Used for deployment name uniqueness"
}
},
"telemetryOptOut": {
"type": "string",
"defaultValue": "No",
"allowedValues": [
"Yes",
"No"
],
"metadata": {
"description": "The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry."
}
}
},
"variables": {
"pidDeploymentName": "[take(concat('pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-', uniqueString(resourceGroup().id, parameters('alertName'), parameters('currentDateTimeUtcNow'))), 64)]",
"varTargetResourceId": "[split(parameters('targetResourceId'), ',')]"
},
"resources": [
{
"type": "Microsoft.Insights/metricAlerts",
"apiVersion": "2018-03-01",
"name": "[parameters('alertName')]",
"location": "global",
"tags": {
"_deployed_by_amba": true
},
"properties": {
"description": "[parameters('alertDescription')]",
"scopes": "[variables('varTargetResourceId')]",
"targetResourceType": "[parameters('targetResourceType')]",
"targetResourceRegion": "[parameters('targetResourceRegion')]",
"severity": "[parameters('alertSeverity')]",
"enabled": "[parameters('isEnabled')]",
"evaluationFrequency": "[parameters('evaluationFrequency')]",
"windowSize": "[parameters('windowSize')]",
"criteria": {
"odata.type": "Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria",
"allOf": [
{
"name": "1st criterion",
"metricName": "AzureOpenAIContextTokensCacheMatchRate",
"dimensions": [],
"operator": "[parameters('operator')]",
"threshold": "[parameters('threshold')]",
"timeAggregation": "[parameters('timeAggregation')]",
"criterionType": "StaticThresholdCriterion"
}
]
}
}
},
{
"condition": "[equals(parameters('telemetryOptOut'), 'No')]",
"apiVersion": "2020-06-01",
"name": "[variables('pidDeploymentName')]",
"type": "Microsoft.Resources/deployments",
"properties": {
"mode": "Incremental",
"template": {
"$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#",
"contentVersion": "1.0.0.0",
"resources": []
}
}
}
]
}
@description('Name of the alert')
@minLength(1)
param alertName string
@description('Description of alert')
param alertDescription string = 'Percentage of the prompt tokens hit the cache, avaiable for PTU-managed.'
@description('Array of Azure resource Ids. For example - /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroup/resource-group-name/Microsoft.compute/virtualMachines/vm-name')
@minLength(1)
param targetResourceId array
@description('Azure region in which target resources to be monitored are in (without spaces). For example: EastUS')
param targetResourceRegion string
@description('Resource type of target resources to be monitored.')
@minLength(1)
param targetResourceType string
@description('Specifies whether the alert is enabled')
param isEnabled bool = true
@description('Severity of alert {0,1,2,3,4}')
@allowed([
0
1
2
3
4
])
param alertSeverity int = 2
@description('Operator comparing the current value with the threshold value.')
@allowed([
'Equals'
'GreaterThan'
'GreaterThanOrEqual'
'LessThan'
'LessThanOrEqual'
])
param operator string = 'GreaterThan'
@description('The threshold value at which the alert is activated.')
param threshold int = 75
@description('How the data that is collected should be combined over time.')
@allowed([
'Average'
'Minimum'
'Maximum'
'Total'
'Count'
])
param timeAggregation string = 'Total'
@description('Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format.')
@allowed([
'PT1M'
'PT5M'
'PT15M'
'PT30M'
'PT1H'
'PT6H'
'PT12H'
'PT24H'
'P1D'
])
param windowSize string = 'PT5M'
@description('how often the metric alert is evaluated represented in ISO 8601 duration format')
@allowed([
'PT1M'
'PT5M'
'PT15M'
'PT30M'
'PT1H'
])
param evaluationFrequency string = 'PT1M'
@description('"The current date and time using the utcNow function. Used for deployment name uniqueness')
param currentDateTimeUtcNow string = utcNow()
@description('The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry.')
@allowed([
'Yes'
'No'
])
param telemetryOptOut string = 'No'
resource metricAlert 'Microsoft.Insights/metricAlerts@2018-03-01' = {
name: alertName
location: 'global'
tags: {
_deployed_by_amba: 'true'
}
properties: {
description: alertDescription
scopes: targetResourceId
targetResourceType: targetResourceType
targetResourceRegion: targetResourceRegion
severity: alertSeverity
enabled: isEnabled
evaluationFrequency: evaluationFrequency
windowSize: windowSize
criteria: {
'odata.type': 'Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria'
allOf: [
{
name: '1st criterion'
metricName: 'AzureOpenAIContextTokensCacheMatchRate'
dimensions: [[]]
operator: operator
threshold: threshold
timeAggregation: timeAggregation
criterionType: 'StaticThresholdCriterion'
}
]
}
}
}
var ambaTelemetryPidName = 'pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-${uniqueString(resourceGroup().id, alertName, currentDateTimeUtcNow)}'
resource ambaTelemetryPid 'Microsoft.Resources/deployments@2020-06-01' = if (telemetryOptOut == 'No') {
name: ambaTelemetryPidName
tags: {
_deployed_by_amba: 'true'
}
properties: {
mode: 'Incremental'
template: {
'$schema': 'https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#'
contentVersion: '1.0.0.0'
resources: []
}
}
}
AzureOpenAIProvisionedManagedUtilizationV2 - Metric Alert
Utilization % for a provisoned-managed deployment, calculated as (PTUs consumed / PTUs deployed) x 100.
Properties:
criterionType | StaticThresholdCriterion |
evaluationFrequency | PT1M |
metricName | AzureOpenAIProvisionedManagedUtilizationV2 |
metricNamespace | Microsoft.CognitiveServices/accounts |
operator | GreaterThan |
severity | 2 |
threshold | 80 |
timeAggregation | Total |
windowSize | PT5M |
References:
Templates:
{
"$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#",
"contentVersion": "1.0.0.0",
"parameters": {
"alertName": {
"type": "string",
"minLength": 1,
"metadata": {
"description": "Name of the alert"
}
},
"alertDescription": {
"type": "string",
"defaultValue": "Utilization % for a provisoned-managed deployment, calculated as (PTUs consumed / PTUs deployed) x 100.",
"metadata": {
"description": "Description of alert"
}
},
"targetResourceId": {
"type": "string",
"minLength": 1,
"metadata": {
"description": "List of Azure resource Ids seperated by a comma. For example - /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroup/resource-group-name/Microsoft.compute/virtualMachines/vm-name"
}
},
"targetResourceRegion": {
"type": "string",
"metadata": {
"description": "Azure region in which target resources to be monitored are in (without spaces). For example: EastUS"
}
},
"targetResourceType": {
"type": "string",
"minLength": 1,
"metadata": {
"description": "Resource type of target resources to be monitored."
}
},
"isEnabled": {
"type": "bool",
"defaultValue": true,
"metadata": {
"description": "Specifies whether the alert is enabled"
}
},
"alertSeverity": {
"type": "int",
"defaultValue": 2,
"allowedValues": [
0,
1,
2,
3,
4
],
"metadata": {
"description": "Severity of alert {0,1,2,3,4}"
}
},
"operator": {
"type": "string",
"defaultValue": "GreaterThan",
"allowedValues": [
"Equals",
"GreaterThan",
"GreaterThanOrEqual",
"LessThan",
"LessThanOrEqual"
],
"metadata": {
"description": "Operator comparing the current value with the threshold value."
}
},
"threshold": {
"type": "string",
"defaultValue": "80",
"metadata": {
"description": "The threshold value at which the alert is activated."
}
},
"timeAggregation": {
"type": "string",
"defaultValue": "Total",
"allowedValues": [
"Average",
"Minimum",
"Maximum",
"Total",
"Count"
],
"metadata": {
"description": "How the data that is collected should be combined over time."
}
},
"windowSize": {
"type": "string",
"defaultValue": "PT5M",
"allowedValues": [
"PT1M",
"PT5M",
"PT15M",
"PT30M",
"PT1H",
"PT6H",
"PT12H",
"PT24H",
"PT1D"
],
"metadata": {
"description": "Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format."
}
},
"evaluationFrequency": {
"type": "string",
"defaultValue": "PT1M",
"allowedValues": [
"PT1M",
"PT5M",
"PT15M",
"PT30M",
"PT1H"
],
"metadata": {
"description": "how often the metric alert is evaluated represented in ISO 8601 duration format"
}
},
"currentDateTimeUtcNow": {
"type": "string",
"defaultValue": "[utcNow()]",
"metadata": {
"description": "The current date and time using the utcNow function. Used for deployment name uniqueness"
}
},
"telemetryOptOut": {
"type": "string",
"defaultValue": "No",
"allowedValues": [
"Yes",
"No"
],
"metadata": {
"description": "The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry."
}
}
},
"variables": {
"pidDeploymentName": "[take(concat('pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-', uniqueString(resourceGroup().id, parameters('alertName'), parameters('currentDateTimeUtcNow'))), 64)]",
"varTargetResourceId": "[split(parameters('targetResourceId'), ',')]"
},
"resources": [
{
"type": "Microsoft.Insights/metricAlerts",
"apiVersion": "2018-03-01",
"name": "[parameters('alertName')]",
"location": "global",
"tags": {
"_deployed_by_amba": true
},
"properties": {
"description": "[parameters('alertDescription')]",
"scopes": "[variables('varTargetResourceId')]",
"targetResourceType": "[parameters('targetResourceType')]",
"targetResourceRegion": "[parameters('targetResourceRegion')]",
"severity": "[parameters('alertSeverity')]",
"enabled": "[parameters('isEnabled')]",
"evaluationFrequency": "[parameters('evaluationFrequency')]",
"windowSize": "[parameters('windowSize')]",
"criteria": {
"odata.type": "Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria",
"allOf": [
{
"name": "1st criterion",
"metricName": "AzureOpenAIProvisionedManagedUtilizationV2",
"dimensions": [],
"operator": "[parameters('operator')]",
"threshold": "[parameters('threshold')]",
"timeAggregation": "[parameters('timeAggregation')]",
"criterionType": "StaticThresholdCriterion"
}
]
}
}
},
{
"condition": "[equals(parameters('telemetryOptOut'), 'No')]",
"apiVersion": "2020-06-01",
"name": "[variables('pidDeploymentName')]",
"type": "Microsoft.Resources/deployments",
"properties": {
"mode": "Incremental",
"template": {
"$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#",
"contentVersion": "1.0.0.0",
"resources": []
}
}
}
]
}
@description('Name of the alert')
@minLength(1)
param alertName string
@description('Description of alert')
param alertDescription string = 'Utilization % for a provisoned-managed deployment, calculated as (PTUs consumed / PTUs deployed) x 100.'
@description('Array of Azure resource Ids. For example - /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroup/resource-group-name/Microsoft.compute/virtualMachines/vm-name')
@minLength(1)
param targetResourceId array
@description('Azure region in which target resources to be monitored are in (without spaces). For example: EastUS')
param targetResourceRegion string
@description('Resource type of target resources to be monitored.')
@minLength(1)
param targetResourceType string
@description('Specifies whether the alert is enabled')
param isEnabled bool = true
@description('Severity of alert {0,1,2,3,4}')
@allowed([
0
1
2
3
4
])
param alertSeverity int = 2
@description('Operator comparing the current value with the threshold value.')
@allowed([
'Equals'
'GreaterThan'
'GreaterThanOrEqual'
'LessThan'
'LessThanOrEqual'
])
param operator string = 'GreaterThan'
@description('The threshold value at which the alert is activated.')
param threshold int = 80
@description('How the data that is collected should be combined over time.')
@allowed([
'Average'
'Minimum'
'Maximum'
'Total'
'Count'
])
param timeAggregation string = 'Total'
@description('Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format.')
@allowed([
'PT1M'
'PT5M'
'PT15M'
'PT30M'
'PT1H'
'PT6H'
'PT12H'
'PT24H'
'P1D'
])
param windowSize string = 'PT5M'
@description('how often the metric alert is evaluated represented in ISO 8601 duration format')
@allowed([
'PT1M'
'PT5M'
'PT15M'
'PT30M'
'PT1H'
])
param evaluationFrequency string = 'PT1M'
@description('"The current date and time using the utcNow function. Used for deployment name uniqueness')
param currentDateTimeUtcNow string = utcNow()
@description('The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry.')
@allowed([
'Yes'
'No'
])
param telemetryOptOut string = 'No'
resource metricAlert 'Microsoft.Insights/metricAlerts@2018-03-01' = {
name: alertName
location: 'global'
tags: {
_deployed_by_amba: 'true'
}
properties: {
description: alertDescription
scopes: targetResourceId
targetResourceType: targetResourceType
targetResourceRegion: targetResourceRegion
severity: alertSeverity
enabled: isEnabled
evaluationFrequency: evaluationFrequency
windowSize: windowSize
criteria: {
'odata.type': 'Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria'
allOf: [
{
name: '1st criterion'
metricName: 'AzureOpenAIProvisionedManagedUtilizationV2'
dimensions: [[]]
operator: operator
threshold: threshold
timeAggregation: timeAggregation
criterionType: 'StaticThresholdCriterion'
}
]
}
}
}
var ambaTelemetryPidName = 'pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-${uniqueString(resourceGroup().id, alertName, currentDateTimeUtcNow)}'
resource ambaTelemetryPid 'Microsoft.Resources/deployments@2020-06-01' = if (telemetryOptOut == 'No') {
name: ambaTelemetryPidName
tags: {
_deployed_by_amba: 'true'
}
properties: {
mode: 'Incremental'
template: {
'$schema': 'https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#'
contentVersion: '1.0.0.0'
resources: []
}
}
}
AzureOpenAITimeToResponse - Metric Alert
Recommended latency (responsiveness) measure for streaming requests. Time in milliseconds.
Properties:
criterionType | StaticThresholdCriterion |
evaluationFrequency | PT1M |
metricName | AzureOpenAITimeToResponse |
metricNamespace | Microsoft.CognitiveServices/accounts |
operator | GreaterThan |
severity | 2 |
threshold | 200 |
timeAggregation | Total |
windowSize | PT5M |
References:
Templates:
{
"$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#",
"contentVersion": "1.0.0.0",
"parameters": {
"alertName": {
"type": "string",
"minLength": 1,
"metadata": {
"description": "Name of the alert"
}
},
"alertDescription": {
"type": "string",
"defaultValue": "Recommended latency (responsiveness) measure for streaming requests. Time in milliseconds.",
"metadata": {
"description": "Description of alert"
}
},
"targetResourceId": {
"type": "string",
"minLength": 1,
"metadata": {
"description": "List of Azure resource Ids seperated by a comma. For example - /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroup/resource-group-name/Microsoft.compute/virtualMachines/vm-name"
}
},
"targetResourceRegion": {
"type": "string",
"metadata": {
"description": "Azure region in which target resources to be monitored are in (without spaces). For example: EastUS"
}
},
"targetResourceType": {
"type": "string",
"minLength": 1,
"metadata": {
"description": "Resource type of target resources to be monitored."
}
},
"isEnabled": {
"type": "bool",
"defaultValue": true,
"metadata": {
"description": "Specifies whether the alert is enabled"
}
},
"alertSeverity": {
"type": "int",
"defaultValue": 2,
"allowedValues": [
0,
1,
2,
3,
4
],
"metadata": {
"description": "Severity of alert {0,1,2,3,4}"
}
},
"operator": {
"type": "string",
"defaultValue": "GreaterThan",
"allowedValues": [
"Equals",
"GreaterThan",
"GreaterThanOrEqual",
"LessThan",
"LessThanOrEqual"
],
"metadata": {
"description": "Operator comparing the current value with the threshold value."
}
},
"threshold": {
"type": "string",
"defaultValue": "200",
"metadata": {
"description": "The threshold value at which the alert is activated."
}
},
"timeAggregation": {
"type": "string",
"defaultValue": "Total",
"allowedValues": [
"Average",
"Minimum",
"Maximum",
"Total",
"Count"
],
"metadata": {
"description": "How the data that is collected should be combined over time."
}
},
"windowSize": {
"type": "string",
"defaultValue": "PT5M",
"allowedValues": [
"PT1M",
"PT5M",
"PT15M",
"PT30M",
"PT1H",
"PT6H",
"PT12H",
"PT24H",
"PT1D"
],
"metadata": {
"description": "Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format."
}
},
"evaluationFrequency": {
"type": "string",
"defaultValue": "PT1M",
"allowedValues": [
"PT1M",
"PT5M",
"PT15M",
"PT30M",
"PT1H"
],
"metadata": {
"description": "how often the metric alert is evaluated represented in ISO 8601 duration format"
}
},
"currentDateTimeUtcNow": {
"type": "string",
"defaultValue": "[utcNow()]",
"metadata": {
"description": "The current date and time using the utcNow function. Used for deployment name uniqueness"
}
},
"telemetryOptOut": {
"type": "string",
"defaultValue": "No",
"allowedValues": [
"Yes",
"No"
],
"metadata": {
"description": "The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry."
}
}
},
"variables": {
"pidDeploymentName": "[take(concat('pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-', uniqueString(resourceGroup().id, parameters('alertName'), parameters('currentDateTimeUtcNow'))), 64)]",
"varTargetResourceId": "[split(parameters('targetResourceId'), ',')]"
},
"resources": [
{
"type": "Microsoft.Insights/metricAlerts",
"apiVersion": "2018-03-01",
"name": "[parameters('alertName')]",
"location": "global",
"tags": {
"_deployed_by_amba": true
},
"properties": {
"description": "[parameters('alertDescription')]",
"scopes": "[variables('varTargetResourceId')]",
"targetResourceType": "[parameters('targetResourceType')]",
"targetResourceRegion": "[parameters('targetResourceRegion')]",
"severity": "[parameters('alertSeverity')]",
"enabled": "[parameters('isEnabled')]",
"evaluationFrequency": "[parameters('evaluationFrequency')]",
"windowSize": "[parameters('windowSize')]",
"criteria": {
"odata.type": "Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria",
"allOf": [
{
"name": "1st criterion",
"metricName": "AzureOpenAITimeToResponse",
"dimensions": [],
"operator": "[parameters('operator')]",
"threshold": "[parameters('threshold')]",
"timeAggregation": "[parameters('timeAggregation')]",
"criterionType": "StaticThresholdCriterion"
}
]
}
}
},
{
"condition": "[equals(parameters('telemetryOptOut'), 'No')]",
"apiVersion": "2020-06-01",
"name": "[variables('pidDeploymentName')]",
"type": "Microsoft.Resources/deployments",
"properties": {
"mode": "Incremental",
"template": {
"$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#",
"contentVersion": "1.0.0.0",
"resources": []
}
}
}
]
}
@description('Name of the alert')
@minLength(1)
param alertName string
@description('Description of alert')
param alertDescription string = 'Recommended latency (responsiveness) measure for streaming requests. Time in milliseconds.'
@description('Array of Azure resource Ids. For example - /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroup/resource-group-name/Microsoft.compute/virtualMachines/vm-name')
@minLength(1)
param targetResourceId array
@description('Azure region in which target resources to be monitored are in (without spaces). For example: EastUS')
param targetResourceRegion string
@description('Resource type of target resources to be monitored.')
@minLength(1)
param targetResourceType string
@description('Specifies whether the alert is enabled')
param isEnabled bool = true
@description('Severity of alert {0,1,2,3,4}')
@allowed([
0
1
2
3
4
])
param alertSeverity int = 2
@description('Operator comparing the current value with the threshold value.')
@allowed([
'Equals'
'GreaterThan'
'GreaterThanOrEqual'
'LessThan'
'LessThanOrEqual'
])
param operator string = 'GreaterThan'
@description('The threshold value at which the alert is activated.')
param threshold int = 200
@description('How the data that is collected should be combined over time.')
@allowed([
'Average'
'Minimum'
'Maximum'
'Total'
'Count'
])
param timeAggregation string = 'Total'
@description('Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format.')
@allowed([
'PT1M'
'PT5M'
'PT15M'
'PT30M'
'PT1H'
'PT6H'
'PT12H'
'PT24H'
'P1D'
])
param windowSize string = 'PT5M'
@description('how often the metric alert is evaluated represented in ISO 8601 duration format')
@allowed([
'PT1M'
'PT5M'
'PT15M'
'PT30M'
'PT1H'
])
param evaluationFrequency string = 'PT1M'
@description('"The current date and time using the utcNow function. Used for deployment name uniqueness')
param currentDateTimeUtcNow string = utcNow()
@description('The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry.')
@allowed([
'Yes'
'No'
])
param telemetryOptOut string = 'No'
resource metricAlert 'Microsoft.Insights/metricAlerts@2018-03-01' = {
name: alertName
location: 'global'
tags: {
_deployed_by_amba: 'true'
}
properties: {
description: alertDescription
scopes: targetResourceId
targetResourceType: targetResourceType
targetResourceRegion: targetResourceRegion
severity: alertSeverity
enabled: isEnabled
evaluationFrequency: evaluationFrequency
windowSize: windowSize
criteria: {
'odata.type': 'Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria'
allOf: [
{
name: '1st criterion'
metricName: 'AzureOpenAITimeToResponse'
dimensions: [[]]
operator: operator
threshold: threshold
timeAggregation: timeAggregation
criterionType: 'StaticThresholdCriterion'
}
]
}
}
}
var ambaTelemetryPidName = 'pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-${uniqueString(resourceGroup().id, alertName, currentDateTimeUtcNow)}'
resource ambaTelemetryPid 'Microsoft.Resources/deployments@2020-06-01' = if (telemetryOptOut == 'No') {
name: ambaTelemetryPidName
tags: {
_deployed_by_amba: 'true'
}
properties: {
mode: 'Incremental'
template: {
'$schema': 'https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#'
contentVersion: '1.0.0.0'
resources: []
}
}
}