Azure Monitor Baseline Alerts
Download AlertsGlossaryGitHubGitHub IssuesToggle Dark/Light/Auto modeToggle Dark/Light/Auto modeToggle Dark/Light/Auto modeBack to homepage

batchAccounts

The Alert state column shows the state that the alert is deployed with by default. This default value maps to the following principles:

- Enabled: The alert is deployed as enabled by default and has to be considered a Must Have alert
- Disabled: The alert is deployed as disabled by default and has to be considered a Nice to Have alert. Customers can enable the alert up to their choice.
NameTypeAlert stateDescription
OfflineNodeCountMetricEnabledNumber of offline nodes
PreemptedNodeCountMetricEnabledNumber of preempted nodes
RebootingNodeCountMetricEnabledNumber of rebooting nodes
TaskFailEventMetricEnabledTotal number of tasks that have completed in a failed state
UnusableNodeCountMetricEnabledNumber of unusable nodes

Dashboards:

Click a tab to view the dashboard template

{
  "__inputs": [],
  "__elements": {},
  "__requires": [
    {
      "type": "panel",
      "id": "bargauge",
      "name": "Bar gauge",
      "version": ""
    },
    {
      "type": "grafana",
      "id": "grafana",
      "name": "Grafana",
      "version": "9.5.12"
    },
    {
      "type": "datasource",
      "id": "grafana-azure-monitor-datasource",
      "name": "Azure Monitor",
      "version": "1.0.0"
    }
  ],
  "title": "Batch accounts",
  "editable": true,
  "links": [],
  "liveNow": false,
  "panels": [
    {
      "title": "OfflineNodeCount",
      "datasource": {
        "type": "grafana-azure-monitor-datasource",
        "uid": "${ds}"
      },
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "thresholds"
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "#808080",
                "value": null
              },
              {
                
                "color": "dark-green",                
                "value": 0
              },
              {
                
                "color": "dark-red",                
                "value": 0
              }
            ]
          }
        },
        "overrides": []
      },
      "gridPos": {
        "h": 8,
        "w": 12,
        "x": 0,
        "y": 0
      },
      "id": 1,
      "options": {
        "displayMode": "basic",
        "minVizHeight": 10,
        "minVizWidth": 0,
        "orientation": "horizontal",
        "reduceOptions": {
          "calcs": [
            "lastNotNull"
          ],
          "fields": "",
          "values": true
        },
        "showUnfilled": true,
        "valueMode": "color"
      },
      "pluginVersion": "9.5.12",
      "targets": [
        {
          "azureLogAnalytics": {

            "query": "AzureMetrics\r\n| where _ResourceId has 'Microsoft.Batch/batchAccounts'\r\n| where MetricName has 'OfflineNodeCount'\r\n| summarize metric = avg(Total) by _ResourceId, Resource",
            "resources": [
              "/subscriptions/$sub"
            ]
          },
          "azureMonitor": {
            "allowedTimeGrainsMs": [],
            "timeGrain": "auto"
          },
          "datasource": {
            "type": "grafana-azure-monitor-datasource",
            "uid": "${ds}"
          },
          "queryType": "Azure Log Analytics",
          "refId": "A"
        }
      ],
      "transformations": [
        {
          "id": "organize",
          "options": {
            "excludeByName": {
              "_ResourceId": true
            },
            "indexByName": {},
            "renameByName": {}
          }
        }
      ],
      "type": "bargauge"
    },
    {
      "title": "PreemptedNodeCount",
      "datasource": {
        "type": "grafana-azure-monitor-datasource",
        "uid": "${ds}"
      },
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "thresholds"
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "#808080",
                "value": null
              },
              {
                
                "color": "dark-green",                
                "value": 0
              },
              {
                
                "color": "dark-red",                
                "value": 0
              }
            ]
          }
        },
        "overrides": []
      },
      "gridPos": {
        "h": 8,
        "w": 12,
        "x": 12,
        "y": 0
      },
      "id": 2,
      "options": {
        "displayMode": "basic",
        "minVizHeight": 10,
        "minVizWidth": 0,
        "orientation": "horizontal",
        "reduceOptions": {
          "calcs": [
            "lastNotNull"
          ],
          "fields": "",
          "values": true
        },
        "showUnfilled": true,
        "valueMode": "color"
      },
      "pluginVersion": "9.5.12",
      "targets": [
        {
          "azureLogAnalytics": {

            "query": "AzureMetrics\r\n| where _ResourceId has 'Microsoft.Batch/batchAccounts'\r\n| where MetricName has 'PreemptedNodeCount'\r\n| summarize metric = avg(Total) by _ResourceId, Resource",
            "resources": [
              "/subscriptions/$sub"
            ]
          },
          "azureMonitor": {
            "allowedTimeGrainsMs": [],
            "timeGrain": "auto"
          },
          "datasource": {
            "type": "grafana-azure-monitor-datasource",
            "uid": "${ds}"
          },
          "queryType": "Azure Log Analytics",
          "refId": "A"
        }
      ],
      "transformations": [
        {
          "id": "organize",
          "options": {
            "excludeByName": {
              "_ResourceId": true
            },
            "indexByName": {},
            "renameByName": {}
          }
        }
      ],
      "type": "bargauge"
    },
    {
      "title": "RebootingNodeCount",
      "datasource": {
        "type": "grafana-azure-monitor-datasource",
        "uid": "${ds}"
      },
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "thresholds"
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "#808080",
                "value": null
              },
              {
                
                "color": "dark-green",                
                "value": 0
              },
              {
                
                "color": "dark-red",                
                "value": 0
              }
            ]
          }
        },
        "overrides": []
      },
      "gridPos": {
        "h": 8,
        "w": 12,
        "x": 0,
        "y": 8
      },
      "id": 3,
      "options": {
        "displayMode": "basic",
        "minVizHeight": 10,
        "minVizWidth": 0,
        "orientation": "horizontal",
        "reduceOptions": {
          "calcs": [
            "lastNotNull"
          ],
          "fields": "",
          "values": true
        },
        "showUnfilled": true,
        "valueMode": "color"
      },
      "pluginVersion": "9.5.12",
      "targets": [
        {
          "azureLogAnalytics": {

            "query": "AzureMetrics\r\n| where _ResourceId has 'Microsoft.Batch/batchAccounts'\r\n| where MetricName has 'RebootingNodeCount'\r\n| summarize metric = avg(Total) by _ResourceId, Resource",
            "resources": [
              "/subscriptions/$sub"
            ]
          },
          "azureMonitor": {
            "allowedTimeGrainsMs": [],
            "timeGrain": "auto"
          },
          "datasource": {
            "type": "grafana-azure-monitor-datasource",
            "uid": "${ds}"
          },
          "queryType": "Azure Log Analytics",
          "refId": "A"
        }
      ],
      "transformations": [
        {
          "id": "organize",
          "options": {
            "excludeByName": {
              "_ResourceId": true
            },
            "indexByName": {},
            "renameByName": {}
          }
        }
      ],
      "type": "bargauge"
    },
    {
      "title": "TaskFailEvent",
      "datasource": {
        "type": "grafana-azure-monitor-datasource",
        "uid": "${ds}"
      },
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "thresholds"
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "#808080",
                "value": null
              },
              {
                
                "color": "dark-green",                
                "value": 0
              },
              {
                
                "color": "dark-red",                
                "value": 0
              }
            ]
          }
        },
        "overrides": []
      },
      "gridPos": {
        "h": 8,
        "w": 12,
        "x": 12,
        "y": 8
      },
      "id": 4,
      "options": {
        "displayMode": "basic",
        "minVizHeight": 10,
        "minVizWidth": 0,
        "orientation": "horizontal",
        "reduceOptions": {
          "calcs": [
            "lastNotNull"
          ],
          "fields": "",
          "values": true
        },
        "showUnfilled": true,
        "valueMode": "color"
      },
      "pluginVersion": "9.5.12",
      "targets": [
        {
          "azureLogAnalytics": {

            "query": "AzureMetrics\r\n| where _ResourceId has 'Microsoft.Batch/batchAccounts'\r\n| where MetricName has 'TaskFailEvent'\r\n| summarize metric = avg(Total) by _ResourceId, Resource",
            "resources": [
              "/subscriptions/$sub"
            ]
          },
          "azureMonitor": {
            "allowedTimeGrainsMs": [],
            "timeGrain": "auto"
          },
          "datasource": {
            "type": "grafana-azure-monitor-datasource",
            "uid": "${ds}"
          },
          "queryType": "Azure Log Analytics",
          "refId": "A"
        }
      ],
      "transformations": [
        {
          "id": "organize",
          "options": {
            "excludeByName": {
              "_ResourceId": true
            },
            "indexByName": {},
            "renameByName": {}
          }
        }
      ],
      "type": "bargauge"
    },
    {
      "title": "UnusableNodeCount",
      "datasource": {
        "type": "grafana-azure-monitor-datasource",
        "uid": "${ds}"
      },
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "thresholds"
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "#808080",
                "value": null
              },
              {
                
                "color": "dark-green",                
                "value": 0
              },
              {
                
                "color": "dark-red",                
                "value": 2.5
              }
            ]
          }
        },
        "overrides": []
      },
      "gridPos": {
        "h": 8,
        "w": 12,
        "x": 0,
        "y": 16
      },
      "id": 5,
      "options": {
        "displayMode": "basic",
        "minVizHeight": 10,
        "minVizWidth": 0,
        "orientation": "horizontal",
        "reduceOptions": {
          "calcs": [
            "lastNotNull"
          ],
          "fields": "",
          "values": true
        },
        "showUnfilled": true,
        "valueMode": "color"
      },
      "pluginVersion": "9.5.12",
      "targets": [
        {
          "azureLogAnalytics": {

            "query": "AzureMetrics\r\n| where _ResourceId has 'Microsoft.Batch/batchAccounts'\r\n| where MetricName has 'UnusableNodeCount'\r\n| summarize metric = avg(Total) by _ResourceId, Resource",
            "resources": [
              "/subscriptions/$sub"
            ]
          },
          "azureMonitor": {
            "allowedTimeGrainsMs": [],
            "timeGrain": "auto"
          },
          "datasource": {
            "type": "grafana-azure-monitor-datasource",
            "uid": "${ds}"
          },
          "queryType": "Azure Log Analytics",
          "refId": "A"
        }
      ],
      "transformations": [
        {
          "id": "organize",
          "options": {
            "excludeByName": {
              "_ResourceId": true
            },
            "indexByName": {},
            "renameByName": {}
          }
        }
      ],
      "type": "bargauge"
    }
  ],
  "refresh": "",
  "schemaVersion": 38,
  "style": "dark",
  "tags": [],
  "templating": {
    "list": [
      {
        "current": {},
        "hide": 0,
        "includeAll": false,
        "label": "Datasource",
        "multi": false,
        "name": "ds",
        "options": [],
        "query": "grafana-azure-monitor-datasource",
        "queryValue": "",
        "refresh": 1,
        "regex": "",
        "skipUrlSync": false,
        "type": "datasource"
      },
      {
        "current": {},
        "datasource": {
          "type": "grafana-azure-monitor-datasource",
          "uid": "${ds}"
        },
        "definition": "",
        "hide": 0,
        "includeAll": false,
        "label": "Subscription",
        "multi": false,
        "name": "sub",
        "options": [],
        "query": {
          "azureLogAnalytics": {
            "query": "",
            "resources": []
          },
          "queryType": "Azure Subscriptions",
          "refId": "A"
        },
        "refresh": 1,
        "regex": "",
        "skipUrlSync": false,
        "sort": 0,
        "type": "query"
      }
    ]
  },
  "time": {
    "from": "now-6h",
    "to": "now"
  },
  "timepicker": {},
  "timezone": "",
  "version": null
}



OfflineNodeCount - Metric Alert

Number of offline nodes

Properties:

autoMitigatefalse
criterionTypeStaticThresholdCriterion
enabledtrue
evaluationFrequencyPT1M
metricNameOfflineNodeCount
metricNamespaceMicrosoft.Batch/batchAccounts
operatorGreaterThan
severity3
threshold0
timeAggregationTotal
windowSizePT5M

References:

Templates:

{
  "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#",
  "contentVersion": "1.0.0.0",
  "parameters": {
    "alertName": {
      "type": "string",
      "minLength": 1,
      "metadata": {
        "description": "Name of the alert"
      }
    },
    "alertDescription": {
      "type": "string",
      "defaultValue": "Number of offline nodes",
      "metadata": {
        "description": "Description of alert"
      }
    },
    "targetResourceId": {
      "type": "string",
      "minLength": 1,
      "metadata": {
        "description": "List of Azure resource Ids seperated by a comma. For example - /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroup/resource-group-name/Microsoft.compute/virtualMachines/vm-name"
      }
    },
    "targetResourceRegion": {
      "type": "string",
      "metadata": {
        "description": "Azure region in which target resources to be monitored are in (without spaces). For example: EastUS"
      }
    },
    "targetResourceType": {
      "type": "string",
      "minLength": 1,
      "metadata": {
        "description": "Resource type of target resources to be monitored."
      }
    },
    "isEnabled": {
      "type": "bool",
      "defaultValue": true,
      "metadata": {
        "description": "Specifies whether the alert is enabled"
      }
    },
    "alertSeverity": {
      "type": "int",
      "defaultValue": 3,
      "allowedValues": [
        0,
        1,
        2,
        3,
        4
      ],
      "metadata": {
        "description": "Severity of alert {0,1,2,3,4}"
      }
    },
    "operator": {
      "type": "string",
      "defaultValue": "GreaterThan",
      "allowedValues": [
        "Equals",
        "GreaterThan",
        "GreaterThanOrEqual",
        "LessThan",
        "LessThanOrEqual"
      ],
      "metadata": {
        "description": "Operator comparing the current value with the threshold value."
      }
    },
    "threshold": {
      "type": "string",
      "defaultValue": "0",
      "metadata": {
        "description": "The threshold value at which the alert is activated."
      }
    },
    "timeAggregation": {
      "type": "string",
      "defaultValue": "Total",
      "allowedValues": [
        "Average",
        "Minimum",
        "Maximum",
        "Total",
        "Count"
      ],
      "metadata": {
        "description": "How the data that is collected should be combined over time."
      }
    },
    "windowSize": {
      "type": "string",
      "defaultValue": "PT5M",
      "allowedValues": [
        "PT1M",
        "PT5M",
        "PT15M",
        "PT30M",
        "PT1H",
        "PT6H",
        "PT12H",
        "P1D"
      ],
      "metadata": {
        "description": "Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format."
      }
    },
    "evaluationFrequency": {
      "type": "string",
      "defaultValue": "PT1M",
      "allowedValues": [
        "PT1M",
        "PT5M",
        "PT15M",
        "PT30M",
        "PT1H"
      ],
      "metadata": {
        "description": "how often the metric alert is evaluated represented in ISO 8601 duration format"
      }
    },
    "currentDateTimeUtcNow": {
      "type": "string",
      "defaultValue": "[utcNow()]",
      "metadata": {
          "description": "The current date and time using the utcNow function. Used for deployment name uniqueness"
      }
    },
    "telemetryOptOut": {
      "type": "string",
      "defaultValue": "No",
      "allowedValues": [
          "Yes",
          "No"
      ],
      "metadata": {
          "description": "The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry."
      }
    }
  },
  "variables": {
    "pidDeploymentName": "[take(concat('pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-', uniqueString(resourceGroup().id, parameters('alertName'), parameters('currentDateTimeUtcNow'))), 64)]",
    "varTargetResourceId": "[split(parameters('targetResourceId'), ',')]"
  },
  "resources": [
    {
      "type": "Microsoft.Insights/metricAlerts",
      "apiVersion": "2018-03-01",
      "name": "[parameters('alertName')]",
      "location": "global",
      "tags": {
        "_deployed_by_amba": true
      },
      "properties": {
        "description": "[parameters('alertDescription')]",
        "scopes": "[variables('varTargetResourceId')]",
        "targetResourceType": "[parameters('targetResourceType')]",
        "targetResourceRegion": "[parameters('targetResourceRegion')]",
        "severity": "[parameters('alertSeverity')]",
        "enabled": "[parameters('isEnabled')]",
        "evaluationFrequency": "[parameters('evaluationFrequency')]",
        "windowSize": "[parameters('windowSize')]",
        "criteria": {
          "odata.type": "Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria",
          "allOf": [
            {
              "name": "1st criterion",
              "metricName": "OfflineNodeCount",
              "dimensions": [],
              "operator": "[parameters('operator')]",
              "threshold": "[parameters('threshold')]",
              "timeAggregation": "[parameters('timeAggregation')]",
              "criterionType": "StaticThresholdCriterion"
            }
          ]
        }
      }
    },
    {
      "condition": "[equals(parameters('telemetryOptOut'), 'No')]",
      "apiVersion": "2023-07-01",
      "name": "[variables('pidDeploymentName')]",
      "type": "Microsoft.Resources/deployments",
      "properties": {
          "mode": "Incremental",
          "template": {
              "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#",
              "contentVersion": "1.0.0.0",
              "resources": []
          }
      }
    }
  ]
}
@description('Name of the alert')
@minLength(1)
param alertName string

@description('Description of alert')
param alertDescription string = 'Number of offline nodes'

@description('Array of Azure resource Ids. For example - /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroup/resource-group-name/Microsoft.compute/virtualMachines/vm-name')
@minLength(1)
param targetResourceId array

@description('Azure region in which target resources to be monitored are in (without spaces). For example: EastUS')
param targetResourceRegion string

@description('Resource type of target resources to be monitored.')
@minLength(1)
param targetResourceType string

@description('Specifies whether the alert is enabled')
param isEnabled bool = true

@description('Severity of alert {0,1,2,3,4}')
@allowed([
  0
  1
  2
  3
  4
])
param alertSeverity int = 3

@description('Operator comparing the current value with the threshold value.')
@allowed([
  'Equals'
  'GreaterThan'
  'GreaterThanOrEqual'
  'LessThan'
  'LessThanOrEqual'
])
param operator string = 'GreaterThan'

@description('The threshold value at which the alert is activated.')
param threshold int = 0

@description('How the data that is collected should be combined over time.')
@allowed([
  'Average'
  'Minimum'
  'Maximum'
  'Total'
  'Count'
])
param timeAggregation string = 'Total'

@description('Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format.')
@allowed([
  'PT1M'
  'PT5M'
  'PT15M'
  'PT30M'
  'PT1H'
  'PT6H'
  'PT12H'
  'P1D'
])
param windowSize string = 'PT5M'

@description('how often the metric alert is evaluated represented in ISO 8601 duration format')
@allowed([
  'PT1M'
  'PT5M'
  'PT15M'
  'PT30M'
  'PT1H'
])
param evaluationFrequency string = 'PT1M'

@description('"The current date and time using the utcNow function. Used for deployment name uniqueness')
param currentDateTimeUtcNow string = utcNow()

@description('The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry.')
@allowed([
  'Yes'
  'No'
])
param telemetryOptOut string = 'No'

resource metricAlert 'Microsoft.Insights/metricAlerts@2018-03-01' = {
  name: alertName
  location: 'global'
  tags: {
    _deployed_by_amba: 'true'
  }
  properties: {
    description: alertDescription
    scopes: targetResourceId
    targetResourceType: targetResourceType
    targetResourceRegion: targetResourceRegion
    severity: alertSeverity
    enabled: isEnabled
    evaluationFrequency: evaluationFrequency
    windowSize: windowSize
    criteria: {
      'odata.type': 'Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria'
      allOf: [
        {
          name: '1st criterion'
          metricName: 'OfflineNodeCount'
          dimensions: []
          operator: operator
          threshold: threshold
          timeAggregation: timeAggregation
          criterionType: 'StaticThresholdCriterion'
        }
      ]
    }
  }
}

var ambaTelemetryPidName = 'pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-${uniqueString(resourceGroup().id, alertName, currentDateTimeUtcNow)}'
resource ambaTelemetryPid 'Microsoft.Resources/deployments@2023-07-01' =  if (telemetryOptOut == 'No') {
  name: ambaTelemetryPidName
  tags: {
    _deployed_by_amba: 'true'
  }
  properties: {
    mode: 'Incremental'
    template: {
      '$schema': 'https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#'
      contentVersion: '1.0.0.0'
      resources: []
    }
  }
}
{
    "mode": "All",
    "parameters": {
      "severity": {
        "type": "String",
        "metadata": {
          "displayName": "Severity",
          "description": "Severity of the Alert"
        },
        "allowedValues": [
          "0",
          "1",
          "2",
          "3",
          "4"
        ],
        "defaultValue": "3"
      },
      "windowSize": {
        "type": "String",
        "metadata": {
          "displayName": "Window Size",
          "description": "Window size for the alert"
        },
        "allowedValues": [
          "PT1M",
          "PT5M",
          "PT15M",
          "PT30M",
          "PT1H",
          "PT6H",
          "PT12H",
          "P1D"
        ],
        "defaultValue": "PT5M"
      },
      "evaluationFrequency": {
        "type": "String",
        "metadata": {
          "displayName": "Evaluation Frequency",
          "description": "Evaluation frequency for the alert"
        },
        "allowedValues": [
          "PT1M",
          "PT5M",
          "PT15M",
          "PT30M",
          "PT1H"
        ],
        "defaultValue": "PT1M"
      },
      "autoMitigate": {
        "type": "String",
        "metadata": {
          "displayName": "Auto Mitigate",
          "description": "Auto Mitigate for the alert"
        },
        "allowedValues": [
          "true",
          "false"
        ],
        "defaultValue": "true"
      },
      "enabled": {
        "type": "String",
        "metadata": {
          "displayName": "Alert State",
          "description": "Alert state for the alert"
        },
        "allowedValues": [
          "true",
          "false"
        ],
        "defaultValue": "true"
      },
      "threshold": {
        "type": "String",
        "metadata": {
          "displayName": "Threshold",
          "description": "Threshold for the alert"
        },
        "defaultValue": "0"
      },
      "effect": {
        "type": "String",
        "metadata": {
          "displayName": "Effect",
          "description": "Effect of the policy"
        },
        "allowedValues": [
          "deployIfNotExists",
          "disabled"
        ],
        "defaultValue": "deployIfNotExists"
      },
      "MonitorDisableTagName": {
        "type": "String",
        "metadata": {
          "displayName": "Monitoring disabled tag name",
          "description": "Tag name used to disable monitoring at the resource level. Set to true if monitoring should be disabled."
        },
        "defaultValue": "MonitorDisable"
      },
      "MonitorDisableTagValues": {
        "type": "Array",
        "metadata": {
          "displayName": "Monitoring disabled tag values(s)",
          "description": "Tag value(s) used to disable monitoring at the resource level. Set to true if monitoring should be disabled."
        },
        "defaultValue": [
          "true",
          "Test",
          "Dev",
          "Sandbox"
        ]
      }
    },
    "policyRule": {
      "if": {
        "allOf": [
          {
            "field": "type",
            "equals": "Microsoft.Batch/batchAccounts"
          },
          {
            "field": "[concat('tags[', parameters('MonitorDisableTagName'), ']')]",
            "notIn": "[parameters('MonitorDisableTagValues')]"
          }
        ]
      },
      "then": {
        "effect": "[parameters('effect')]",
        "details": {
          "roleDefinitionIds": [
            "/providers/Microsoft.Authorization/roleDefinitions/b24988ac-6180-42a0-ab88-20f7382dd24c"
          ],
          "type": "Microsoft.Insights/metricAlerts",
          "name": "[concat(replace(field('fullName'),'/','-'), '-OfflineNodeCount')]",
          "existenceCondition": {
            "allOf": [
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria.allOf[*].metricNamespace",
                "equals": "Microsoft.Batch/batchAccounts"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria.allOf[*].metricName",
                "equals": "OfflineNodeCount"
              },
              {
                "field": "Microsoft.Insights/metricalerts/scopes[*]",
                "equals": "[field('id')]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/enabled",
                "equals": "[parameters('enabled')]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/evaluationFrequency",
                "equals": "[parameters('evaluationFrequency')]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/windowSize",
                "equals": "[parameters('windowSize')]"
              },
              {
                "field": "Microsoft.Insights/metricalerts/severity",
                "equals": "[parameters('severity')]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/autoMitigate",
                "equals": "[parameters('autoMitigate')]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft-Azure-Monitor-SingleResourceMultipleMetricCriteria.allOf[*].timeAggregation",
                "equals": "Total"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria.allOf[*].StaticThresholdCriterion.operator",
                "equals": "GreaterThan"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria.allOf[*].StaticThresholdCriterion.threshold",
                "equals": "[if(contains(field('tags'), '_amba-OfflineNodeCount-threshold-Override_'), field('tags._amba-OfflineNodeCount-threshold-Override_'), parameters('threshold'))]"
              }
            ]
          },
          "deployment": {
            "properties": {
              "mode": "incremental",
              "template": {
                "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#",
                "contentVersion": "1.0.0.0",
                "parameters": {
                  "resourceName": {
                    "type": "String",
                    "metadata": {
                      "displayName": "resourceName",
                      "description": "Name of the resource"
                    }
                  },
                  "resourceId": {
                    "type": "String",
                    "metadata": {
                      "displayName": "resourceId",
                      "description": "Resource ID of the resource emitting the metric that will be used for the comparison"
                    }
                  },
                  "severity": {
                    "type": "String"
                  },
                  "windowSize": {
                    "type": "String"
                  },
                  "evaluationFrequency": {
                    "type": "String"
                  },
                  "autoMitigate": {
                    "type": "String"
                  },
                  "enabled": {
                    "type": "String"
                  },
                  "threshold": {
                    "type": "String"
                  }
                },
                "variables": {},
                "resources": [
                  {
                    "type": "Microsoft.Insights/metricAlerts",
                    "apiVersion": "2018-03-01",
                    "name": "[concat(parameters('resourceName'), '-OfflineNodeCount')]",
                    "location": "global",
                    "tags": {
                      "_deployed_by_amba": true
                    },
                    "properties": {
                      "description": "Metric Alert for Batch batchAccounts OfflineNodeCount",
                      "severity": "[parameters('severity')]",
                      "enabled": "[parameters('enabled')]",
                      "scopes": [
                        "[parameters('resourceId')]"
                      ],
                      "evaluationFrequency": "[parameters('evaluationFrequency')]",
                      "windowSize": "[parameters('windowSize')]",
                      "criteria": {
                        "allOf": [
                          {
                            "name": "OfflineNodeCount",
                            "metricNamespace": "Microsoft.Batch/batchAccounts",
                            "metricName": "OfflineNodeCount",
                            "operator": "GreaterThan",
                            "threshold": "[parameters('threshold')]",
                            "timeAggregation": "Total",
                            "criterionType": "StaticThresholdCriterion"
                          }
                        ],
                        "odata.type": "Microsoft.Azure.Monitor.SingleResourceMultipleMetricCriteria"
                      },
                      "autoMitigate": "[parameters('autoMitigate')]",
                      "parameters": {
                        "severity": {
                          "value": "[parameters('severity')]"
                        },
                        "windowSize": {
                          "value": "[parameters('windowSize')]"
                        },
                        "evaluationFrequency": {
                          "value": "[parameters('evaluationFrequency')]"
                        },
                        "autoMitigate": {
                          "value": "[parameters('autoMitigate')]"
                        },
                        "enabled": {
                          "value": "[parameters('enabled')]"
                        },
                        "threshold": {
                          "value": "[parameters('threshold')]"
                        }
                      }
                    }
                  }
                ]
              },
              "parameters": {
                "resourceName": {
                  "value": "[replace(field('fullName'),'/','-')]"
                },
                "resourceId": {
                  "value": "[field('id')]"
                },
                "severity": {
                  "value": "[parameters('severity')]"
                },
                "windowSize": {
                  "value": "[parameters('windowSize')]"
                },
                "evaluationFrequency": {
                  "value": "[parameters('evaluationFrequency')]"
                },
                "autoMitigate": {
                  "value": "[parameters('autoMitigate')]"
                },
                "enabled": {
                  "value": "[parameters('enabled')]"
                },
                "threshold": {
                  "value": "[if(contains(field('tags'), '_amba-OfflineNodeCount-threshold-Override_'), field('tags._amba-OfflineNodeCount-threshold-Override_'), parameters('threshold'))]"
                }
              }
            }
          }
        }
      }
    }
  }



PreemptedNodeCount - Metric Alert

Number of preempted nodes

Properties:

autoMitigatefalse
criterionTypeStaticThresholdCriterion
enabledtrue
evaluationFrequencyPT1M
metricNamePreemptedNodeCount
metricNamespaceMicrosoft.Batch/batchAccounts
operatorGreaterThan
severity1
threshold0
timeAggregationTotal
windowSizePT5M

References:

Templates:

{
  "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#",
  "contentVersion": "1.0.0.0",
  "parameters": {
    "alertName": {
      "type": "string",
      "minLength": 1,
      "metadata": {
        "description": "Name of the alert"
      }
    },
    "alertDescription": {
      "type": "string",
      "defaultValue": "Number of preempted nodes",
      "metadata": {
        "description": "Description of alert"
      }
    },
    "targetResourceId": {
      "type": "string",
      "minLength": 1,
      "metadata": {
        "description": "List of Azure resource Ids seperated by a comma. For example - /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroup/resource-group-name/Microsoft.compute/virtualMachines/vm-name"
      }
    },
    "targetResourceRegion": {
      "type": "string",
      "metadata": {
        "description": "Azure region in which target resources to be monitored are in (without spaces). For example: EastUS"
      }
    },
    "targetResourceType": {
      "type": "string",
      "minLength": 1,
      "metadata": {
        "description": "Resource type of target resources to be monitored."
      }
    },
    "isEnabled": {
      "type": "bool",
      "defaultValue": true,
      "metadata": {
        "description": "Specifies whether the alert is enabled"
      }
    },
    "alertSeverity": {
      "type": "int",
      "defaultValue": 1,
      "allowedValues": [
        0,
        1,
        2,
        3,
        4
      ],
      "metadata": {
        "description": "Severity of alert {0,1,2,3,4}"
      }
    },
    "operator": {
      "type": "string",
      "defaultValue": "GreaterThan",
      "allowedValues": [
        "Equals",
        "GreaterThan",
        "GreaterThanOrEqual",
        "LessThan",
        "LessThanOrEqual"
      ],
      "metadata": {
        "description": "Operator comparing the current value with the threshold value."
      }
    },
    "threshold": {
      "type": "string",
      "defaultValue": "0",
      "metadata": {
        "description": "The threshold value at which the alert is activated."
      }
    },
    "timeAggregation": {
      "type": "string",
      "defaultValue": "Total",
      "allowedValues": [
        "Average",
        "Minimum",
        "Maximum",
        "Total",
        "Count"
      ],
      "metadata": {
        "description": "How the data that is collected should be combined over time."
      }
    },
    "windowSize": {
      "type": "string",
      "defaultValue": "PT5M",
      "allowedValues": [
        "PT1M",
        "PT5M",
        "PT15M",
        "PT30M",
        "PT1H",
        "PT6H",
        "PT12H",
        "P1D"
      ],
      "metadata": {
        "description": "Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format."
      }
    },
    "evaluationFrequency": {
      "type": "string",
      "defaultValue": "PT1M",
      "allowedValues": [
        "PT1M",
        "PT5M",
        "PT15M",
        "PT30M",
        "PT1H"
      ],
      "metadata": {
        "description": "how often the metric alert is evaluated represented in ISO 8601 duration format"
      }
    },
    "currentDateTimeUtcNow": {
      "type": "string",
      "defaultValue": "[utcNow()]",
      "metadata": {
          "description": "The current date and time using the utcNow function. Used for deployment name uniqueness"
      }
    },
    "telemetryOptOut": {
      "type": "string",
      "defaultValue": "No",
      "allowedValues": [
          "Yes",
          "No"
      ],
      "metadata": {
          "description": "The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry."
      }
    }
  },
  "variables": {
    "pidDeploymentName": "[take(concat('pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-', uniqueString(resourceGroup().id, parameters('alertName'), parameters('currentDateTimeUtcNow'))), 64)]",
    "varTargetResourceId": "[split(parameters('targetResourceId'), ',')]"
  },
  "resources": [
    {
      "type": "Microsoft.Insights/metricAlerts",
      "apiVersion": "2018-03-01",
      "name": "[parameters('alertName')]",
      "location": "global",
      "tags": {
        "_deployed_by_amba": true
      },
      "properties": {
        "description": "[parameters('alertDescription')]",
        "scopes": "[variables('varTargetResourceId')]",
        "targetResourceType": "[parameters('targetResourceType')]",
        "targetResourceRegion": "[parameters('targetResourceRegion')]",
        "severity": "[parameters('alertSeverity')]",
        "enabled": "[parameters('isEnabled')]",
        "evaluationFrequency": "[parameters('evaluationFrequency')]",
        "windowSize": "[parameters('windowSize')]",
        "criteria": {
          "odata.type": "Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria",
          "allOf": [
            {
              "name": "1st criterion",
              "metricName": "PreemptedNodeCount",
              "dimensions": [],
              "operator": "[parameters('operator')]",
              "threshold": "[parameters('threshold')]",
              "timeAggregation": "[parameters('timeAggregation')]",
              "criterionType": "StaticThresholdCriterion"
            }
          ]
        }
      }
    },
    {
      "condition": "[equals(parameters('telemetryOptOut'), 'No')]",
      "apiVersion": "2023-07-01",
      "name": "[variables('pidDeploymentName')]",
      "type": "Microsoft.Resources/deployments",
      "properties": {
          "mode": "Incremental",
          "template": {
              "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#",
              "contentVersion": "1.0.0.0",
              "resources": []
          }
      }
    }
  ]
}
@description('Name of the alert')
@minLength(1)
param alertName string

@description('Description of alert')
param alertDescription string = 'Number of preempted nodes'

@description('Array of Azure resource Ids. For example - /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroup/resource-group-name/Microsoft.compute/virtualMachines/vm-name')
@minLength(1)
param targetResourceId array

@description('Azure region in which target resources to be monitored are in (without spaces). For example: EastUS')
param targetResourceRegion string

@description('Resource type of target resources to be monitored.')
@minLength(1)
param targetResourceType string

@description('Specifies whether the alert is enabled')
param isEnabled bool = true

@description('Severity of alert {0,1,2,3,4}')
@allowed([
  0
  1
  2
  3
  4
])
param alertSeverity int = 1

@description('Operator comparing the current value with the threshold value.')
@allowed([
  'Equals'
  'GreaterThan'
  'GreaterThanOrEqual'
  'LessThan'
  'LessThanOrEqual'
])
param operator string = 'GreaterThan'

@description('The threshold value at which the alert is activated.')
param threshold int = 0

@description('How the data that is collected should be combined over time.')
@allowed([
  'Average'
  'Minimum'
  'Maximum'
  'Total'
  'Count'
])
param timeAggregation string = 'Total'

@description('Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format.')
@allowed([
  'PT1M'
  'PT5M'
  'PT15M'
  'PT30M'
  'PT1H'
  'PT6H'
  'PT12H'
  'P1D'
])
param windowSize string = 'PT5M'

@description('how often the metric alert is evaluated represented in ISO 8601 duration format')
@allowed([
  'PT1M'
  'PT5M'
  'PT15M'
  'PT30M'
  'PT1H'
])
param evaluationFrequency string = 'PT1M'

@description('"The current date and time using the utcNow function. Used for deployment name uniqueness')
param currentDateTimeUtcNow string = utcNow()

@description('The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry.')
@allowed([
  'Yes'
  'No'
])
param telemetryOptOut string = 'No'

resource metricAlert 'Microsoft.Insights/metricAlerts@2018-03-01' = {
  name: alertName
  location: 'global'
  tags: {
    _deployed_by_amba: 'true'
  }
  properties: {
    description: alertDescription
    scopes: targetResourceId
    targetResourceType: targetResourceType
    targetResourceRegion: targetResourceRegion
    severity: alertSeverity
    enabled: isEnabled
    evaluationFrequency: evaluationFrequency
    windowSize: windowSize
    criteria: {
      'odata.type': 'Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria'
      allOf: [
        {
          name: '1st criterion'
          metricName: 'PreemptedNodeCount'
          dimensions: []
          operator: operator
          threshold: threshold
          timeAggregation: timeAggregation
          criterionType: 'StaticThresholdCriterion'
        }
      ]
    }
  }
}

var ambaTelemetryPidName = 'pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-${uniqueString(resourceGroup().id, alertName, currentDateTimeUtcNow)}'
resource ambaTelemetryPid 'Microsoft.Resources/deployments@2023-07-01' =  if (telemetryOptOut == 'No') {
  name: ambaTelemetryPidName
  tags: {
    _deployed_by_amba: 'true'
  }
  properties: {
    mode: 'Incremental'
    template: {
      '$schema': 'https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#'
      contentVersion: '1.0.0.0'
      resources: []
    }
  }
}
{
    "mode": "All",
    "parameters": {
      "severity": {
        "type": "String",
        "metadata": {
          "displayName": "Severity",
          "description": "Severity of the Alert"
        },
        "allowedValues": [
          "0",
          "1",
          "2",
          "3",
          "4"
        ],
        "defaultValue": "1"
      },
      "windowSize": {
        "type": "String",
        "metadata": {
          "displayName": "Window Size",
          "description": "Window size for the alert"
        },
        "allowedValues": [
          "PT1M",
          "PT5M",
          "PT15M",
          "PT30M",
          "PT1H",
          "PT6H",
          "PT12H",
          "P1D"
        ],
        "defaultValue": "PT5M"
      },
      "evaluationFrequency": {
        "type": "String",
        "metadata": {
          "displayName": "Evaluation Frequency",
          "description": "Evaluation frequency for the alert"
        },
        "allowedValues": [
          "PT1M",
          "PT5M",
          "PT15M",
          "PT30M",
          "PT1H"
        ],
        "defaultValue": "PT1M"
      },
      "autoMitigate": {
        "type": "String",
        "metadata": {
          "displayName": "Auto Mitigate",
          "description": "Auto Mitigate for the alert"
        },
        "allowedValues": [
          "true",
          "false"
        ],
        "defaultValue": "true"
      },
      "enabled": {
        "type": "String",
        "metadata": {
          "displayName": "Alert State",
          "description": "Alert state for the alert"
        },
        "allowedValues": [
          "true",
          "false"
        ],
        "defaultValue": "true"
      },
      "threshold": {
        "type": "String",
        "metadata": {
          "displayName": "Threshold",
          "description": "Threshold for the alert"
        },
        "defaultValue": "0"
      },
      "effect": {
        "type": "String",
        "metadata": {
          "displayName": "Effect",
          "description": "Effect of the policy"
        },
        "allowedValues": [
          "deployIfNotExists",
          "disabled"
        ],
        "defaultValue": "deployIfNotExists"
      },
      "MonitorDisableTagName": {
        "type": "String",
        "metadata": {
          "displayName": "Monitoring disabled tag name",
          "description": "Tag name used to disable monitoring at the resource level. Set to true if monitoring should be disabled."
        },
        "defaultValue": "MonitorDisable"
      },
      "MonitorDisableTagValues": {
        "type": "Array",
        "metadata": {
          "displayName": "Monitoring disabled tag values(s)",
          "description": "Tag value(s) used to disable monitoring at the resource level. Set to true if monitoring should be disabled."
        },
        "defaultValue": [
          "true",
          "Test",
          "Dev",
          "Sandbox"
        ]
      }
    },
    "policyRule": {
      "if": {
        "allOf": [
          {
            "field": "type",
            "equals": "Microsoft.Batch/batchAccounts"
          },
          {
            "field": "[concat('tags[', parameters('MonitorDisableTagName'), ']')]",
            "notIn": "[parameters('MonitorDisableTagValues')]"
          }
        ]
      },
      "then": {
        "effect": "[parameters('effect')]",
        "details": {
          "roleDefinitionIds": [
            "/providers/Microsoft.Authorization/roleDefinitions/b24988ac-6180-42a0-ab88-20f7382dd24c"
          ],
          "type": "Microsoft.Insights/metricAlerts",
          "name": "[concat(replace(field('fullName'),'/','-'), '-PreemptedNodeCount')]",
          "existenceCondition": {
            "allOf": [
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria.allOf[*].metricNamespace",
                "equals": "Microsoft.Batch/batchAccounts"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria.allOf[*].metricName",
                "equals": "PreemptedNodeCount"
              },
              {
                "field": "Microsoft.Insights/metricalerts/scopes[*]",
                "equals": "[field('id')]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/enabled",
                "equals": "[parameters('enabled')]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/evaluationFrequency",
                "equals": "[parameters('evaluationFrequency')]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/windowSize",
                "equals": "[parameters('windowSize')]"
              },
              {
                "field": "Microsoft.Insights/metricalerts/severity",
                "equals": "[parameters('severity')]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/autoMitigate",
                "equals": "[parameters('autoMitigate')]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft-Azure-Monitor-SingleResourceMultipleMetricCriteria.allOf[*].timeAggregation",
                "equals": "Total"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria.allOf[*].StaticThresholdCriterion.operator",
                "equals": "GreaterThan"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria.allOf[*].StaticThresholdCriterion.threshold",
                "equals": "[if(contains(field('tags'), '_amba-PreemptedNodeCount-threshold-Override_'), field('tags._amba-PreemptedNodeCount-threshold-Override_'), parameters('threshold'))]"
              }
            ]
          },
          "deployment": {
            "properties": {
              "mode": "incremental",
              "template": {
                "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#",
                "contentVersion": "1.0.0.0",
                "parameters": {
                  "resourceName": {
                    "type": "String",
                    "metadata": {
                      "displayName": "resourceName",
                      "description": "Name of the resource"
                    }
                  },
                  "resourceId": {
                    "type": "String",
                    "metadata": {
                      "displayName": "resourceId",
                      "description": "Resource ID of the resource emitting the metric that will be used for the comparison"
                    }
                  },
                  "severity": {
                    "type": "String"
                  },
                  "windowSize": {
                    "type": "String"
                  },
                  "evaluationFrequency": {
                    "type": "String"
                  },
                  "autoMitigate": {
                    "type": "String"
                  },
                  "enabled": {
                    "type": "String"
                  },
                  "threshold": {
                    "type": "String"
                  }
                },
                "variables": {},
                "resources": [
                  {
                    "type": "Microsoft.Insights/metricAlerts",
                    "apiVersion": "2018-03-01",
                    "name": "[concat(parameters('resourceName'), '-PreemptedNodeCount')]",
                    "location": "global",
                    "tags": {
                      "_deployed_by_amba": true
                    },
                    "properties": {
                      "description": "Metric Alert for Batch batchAccounts PreemptedNodeCount",
                      "severity": "[parameters('severity')]",
                      "enabled": "[parameters('enabled')]",
                      "scopes": [
                        "[parameters('resourceId')]"
                      ],
                      "evaluationFrequency": "[parameters('evaluationFrequency')]",
                      "windowSize": "[parameters('windowSize')]",
                      "criteria": {
                        "allOf": [
                          {
                            "name": "PreemptedNodeCount",
                            "metricNamespace": "Microsoft.Batch/batchAccounts",
                            "metricName": "PreemptedNodeCount",
                            "operator": "GreaterThan",
                            "threshold": "[parameters('threshold')]",
                            "timeAggregation": "Total",
                            "criterionType": "StaticThresholdCriterion"
                          }
                        ],
                        "odata.type": "Microsoft.Azure.Monitor.SingleResourceMultipleMetricCriteria"
                      },
                      "autoMitigate": "[parameters('autoMitigate')]",
                      "parameters": {
                        "severity": {
                          "value": "[parameters('severity')]"
                        },
                        "windowSize": {
                          "value": "[parameters('windowSize')]"
                        },
                        "evaluationFrequency": {
                          "value": "[parameters('evaluationFrequency')]"
                        },
                        "autoMitigate": {
                          "value": "[parameters('autoMitigate')]"
                        },
                        "enabled": {
                          "value": "[parameters('enabled')]"
                        },
                        "threshold": {
                          "value": "[parameters('threshold')]"
                        }
                      }
                    }
                  }
                ]
              },
              "parameters": {
                "resourceName": {
                  "value": "[replace(field('fullName'),'/','-')]"
                },
                "resourceId": {
                  "value": "[field('id')]"
                },
                "severity": {
                  "value": "[parameters('severity')]"
                },
                "windowSize": {
                  "value": "[parameters('windowSize')]"
                },
                "evaluationFrequency": {
                  "value": "[parameters('evaluationFrequency')]"
                },
                "autoMitigate": {
                  "value": "[parameters('autoMitigate')]"
                },
                "enabled": {
                  "value": "[parameters('enabled')]"
                },
                "threshold": {
                  "value": "[if(contains(field('tags'), '_amba-PreemptedNodeCount-threshold-Override_'), field('tags._amba-PreemptedNodeCount-threshold-Override_'), parameters('threshold'))]"
                }
              }
            }
          }
        }
      }
    }
  }



RebootingNodeCount - Metric Alert

Number of rebooting nodes

Properties:

autoMitigatefalse
criterionTypeStaticThresholdCriterion
enabledtrue
evaluationFrequencyPT1M
metricNameRebootingNodeCount
metricNamespaceMicrosoft.Batch/batchAccounts
operatorGreaterThan
severity1
threshold0
timeAggregationTotal
windowSizePT5M

References:

Templates:

{
  "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#",
  "contentVersion": "1.0.0.0",
  "parameters": {
    "alertName": {
      "type": "string",
      "minLength": 1,
      "metadata": {
        "description": "Name of the alert"
      }
    },
    "alertDescription": {
      "type": "string",
      "defaultValue": "Number of rebooting nodes",
      "metadata": {
        "description": "Description of alert"
      }
    },
    "targetResourceId": {
      "type": "string",
      "minLength": 1,
      "metadata": {
        "description": "List of Azure resource Ids seperated by a comma. For example - /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroup/resource-group-name/Microsoft.compute/virtualMachines/vm-name"
      }
    },
    "targetResourceRegion": {
      "type": "string",
      "metadata": {
        "description": "Azure region in which target resources to be monitored are in (without spaces). For example: EastUS"
      }
    },
    "targetResourceType": {
      "type": "string",
      "minLength": 1,
      "metadata": {
        "description": "Resource type of target resources to be monitored."
      }
    },
    "isEnabled": {
      "type": "bool",
      "defaultValue": true,
      "metadata": {
        "description": "Specifies whether the alert is enabled"
      }
    },
    "alertSeverity": {
      "type": "int",
      "defaultValue": 1,
      "allowedValues": [
        0,
        1,
        2,
        3,
        4
      ],
      "metadata": {
        "description": "Severity of alert {0,1,2,3,4}"
      }
    },
    "operator": {
      "type": "string",
      "defaultValue": "GreaterThan",
      "allowedValues": [
        "Equals",
        "GreaterThan",
        "GreaterThanOrEqual",
        "LessThan",
        "LessThanOrEqual"
      ],
      "metadata": {
        "description": "Operator comparing the current value with the threshold value."
      }
    },
    "threshold": {
      "type": "string",
      "defaultValue": "0",
      "metadata": {
        "description": "The threshold value at which the alert is activated."
      }
    },
    "timeAggregation": {
      "type": "string",
      "defaultValue": "Total",
      "allowedValues": [
        "Average",
        "Minimum",
        "Maximum",
        "Total",
        "Count"
      ],
      "metadata": {
        "description": "How the data that is collected should be combined over time."
      }
    },
    "windowSize": {
      "type": "string",
      "defaultValue": "PT5M",
      "allowedValues": [
        "PT1M",
        "PT5M",
        "PT15M",
        "PT30M",
        "PT1H",
        "PT6H",
        "PT12H",
        "P1D"
      ],
      "metadata": {
        "description": "Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format."
      }
    },
    "evaluationFrequency": {
      "type": "string",
      "defaultValue": "PT1M",
      "allowedValues": [
        "PT1M",
        "PT5M",
        "PT15M",
        "PT30M",
        "PT1H"
      ],
      "metadata": {
        "description": "how often the metric alert is evaluated represented in ISO 8601 duration format"
      }
    },
    "currentDateTimeUtcNow": {
      "type": "string",
      "defaultValue": "[utcNow()]",
      "metadata": {
          "description": "The current date and time using the utcNow function. Used for deployment name uniqueness"
      }
    },
    "telemetryOptOut": {
      "type": "string",
      "defaultValue": "No",
      "allowedValues": [
          "Yes",
          "No"
      ],
      "metadata": {
          "description": "The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry."
      }
    }
  },
  "variables": {
    "pidDeploymentName": "[take(concat('pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-', uniqueString(resourceGroup().id, parameters('alertName'), parameters('currentDateTimeUtcNow'))), 64)]",
    "varTargetResourceId": "[split(parameters('targetResourceId'), ',')]"
  },
  "resources": [
    {
      "type": "Microsoft.Insights/metricAlerts",
      "apiVersion": "2018-03-01",
      "name": "[parameters('alertName')]",
      "location": "global",
      "tags": {
        "_deployed_by_amba": true
      },
      "properties": {
        "description": "[parameters('alertDescription')]",
        "scopes": "[variables('varTargetResourceId')]",
        "targetResourceType": "[parameters('targetResourceType')]",
        "targetResourceRegion": "[parameters('targetResourceRegion')]",
        "severity": "[parameters('alertSeverity')]",
        "enabled": "[parameters('isEnabled')]",
        "evaluationFrequency": "[parameters('evaluationFrequency')]",
        "windowSize": "[parameters('windowSize')]",
        "criteria": {
          "odata.type": "Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria",
          "allOf": [
            {
              "name": "1st criterion",
              "metricName": "RebootingNodeCount",
              "dimensions": [],
              "operator": "[parameters('operator')]",
              "threshold": "[parameters('threshold')]",
              "timeAggregation": "[parameters('timeAggregation')]",
              "criterionType": "StaticThresholdCriterion"
            }
          ]
        }
      }
    },
    {
      "condition": "[equals(parameters('telemetryOptOut'), 'No')]",
      "apiVersion": "2023-07-01",
      "name": "[variables('pidDeploymentName')]",
      "type": "Microsoft.Resources/deployments",
      "properties": {
          "mode": "Incremental",
          "template": {
              "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#",
              "contentVersion": "1.0.0.0",
              "resources": []
          }
      }
    }
  ]
}
@description('Name of the alert')
@minLength(1)
param alertName string

@description('Description of alert')
param alertDescription string = 'Number of rebooting nodes'

@description('Array of Azure resource Ids. For example - /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroup/resource-group-name/Microsoft.compute/virtualMachines/vm-name')
@minLength(1)
param targetResourceId array

@description('Azure region in which target resources to be monitored are in (without spaces). For example: EastUS')
param targetResourceRegion string

@description('Resource type of target resources to be monitored.')
@minLength(1)
param targetResourceType string

@description('Specifies whether the alert is enabled')
param isEnabled bool = true

@description('Severity of alert {0,1,2,3,4}')
@allowed([
  0
  1
  2
  3
  4
])
param alertSeverity int = 1

@description('Operator comparing the current value with the threshold value.')
@allowed([
  'Equals'
  'GreaterThan'
  'GreaterThanOrEqual'
  'LessThan'
  'LessThanOrEqual'
])
param operator string = 'GreaterThan'

@description('The threshold value at which the alert is activated.')
param threshold int = 0

@description('How the data that is collected should be combined over time.')
@allowed([
  'Average'
  'Minimum'
  'Maximum'
  'Total'
  'Count'
])
param timeAggregation string = 'Total'

@description('Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format.')
@allowed([
  'PT1M'
  'PT5M'
  'PT15M'
  'PT30M'
  'PT1H'
  'PT6H'
  'PT12H'
  'P1D'
])
param windowSize string = 'PT5M'

@description('how often the metric alert is evaluated represented in ISO 8601 duration format')
@allowed([
  'PT1M'
  'PT5M'
  'PT15M'
  'PT30M'
  'PT1H'
])
param evaluationFrequency string = 'PT1M'

@description('"The current date and time using the utcNow function. Used for deployment name uniqueness')
param currentDateTimeUtcNow string = utcNow()

@description('The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry.')
@allowed([
  'Yes'
  'No'
])
param telemetryOptOut string = 'No'

resource metricAlert 'Microsoft.Insights/metricAlerts@2018-03-01' = {
  name: alertName
  location: 'global'
  tags: {
    _deployed_by_amba: 'true'
  }
  properties: {
    description: alertDescription
    scopes: targetResourceId
    targetResourceType: targetResourceType
    targetResourceRegion: targetResourceRegion
    severity: alertSeverity
    enabled: isEnabled
    evaluationFrequency: evaluationFrequency
    windowSize: windowSize
    criteria: {
      'odata.type': 'Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria'
      allOf: [
        {
          name: '1st criterion'
          metricName: 'RebootingNodeCount'
          dimensions: []
          operator: operator
          threshold: threshold
          timeAggregation: timeAggregation
          criterionType: 'StaticThresholdCriterion'
        }
      ]
    }
  }
}

var ambaTelemetryPidName = 'pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-${uniqueString(resourceGroup().id, alertName, currentDateTimeUtcNow)}'
resource ambaTelemetryPid 'Microsoft.Resources/deployments@2023-07-01' =  if (telemetryOptOut == 'No') {
  name: ambaTelemetryPidName
  tags: {
    _deployed_by_amba: 'true'
  }
  properties: {
    mode: 'Incremental'
    template: {
      '$schema': 'https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#'
      contentVersion: '1.0.0.0'
      resources: []
    }
  }
}
{
    "mode": "All",
    "parameters": {
      "severity": {
        "type": "String",
        "metadata": {
          "displayName": "Severity",
          "description": "Severity of the Alert"
        },
        "allowedValues": [
          "0",
          "1",
          "2",
          "3",
          "4"
        ],
        "defaultValue": "1"
      },
      "windowSize": {
        "type": "String",
        "metadata": {
          "displayName": "Window Size",
          "description": "Window size for the alert"
        },
        "allowedValues": [
          "PT1M",
          "PT5M",
          "PT15M",
          "PT30M",
          "PT1H",
          "PT6H",
          "PT12H",
          "P1D"
        ],
        "defaultValue": "PT5M"
      },
      "evaluationFrequency": {
        "type": "String",
        "metadata": {
          "displayName": "Evaluation Frequency",
          "description": "Evaluation frequency for the alert"
        },
        "allowedValues": [
          "PT1M",
          "PT5M",
          "PT15M",
          "PT30M",
          "PT1H"
        ],
        "defaultValue": "PT1M"
      },
      "autoMitigate": {
        "type": "String",
        "metadata": {
          "displayName": "Auto Mitigate",
          "description": "Auto Mitigate for the alert"
        },
        "allowedValues": [
          "true",
          "false"
        ],
        "defaultValue": "true"
      },
      "enabled": {
        "type": "String",
        "metadata": {
          "displayName": "Alert State",
          "description": "Alert state for the alert"
        },
        "allowedValues": [
          "true",
          "false"
        ],
        "defaultValue": "true"
      },
      "threshold": {
        "type": "String",
        "metadata": {
          "displayName": "Threshold",
          "description": "Threshold for the alert"
        },
        "defaultValue": "0"
      },
      "effect": {
        "type": "String",
        "metadata": {
          "displayName": "Effect",
          "description": "Effect of the policy"
        },
        "allowedValues": [
          "deployIfNotExists",
          "disabled"
        ],
        "defaultValue": "deployIfNotExists"
      },
      "MonitorDisableTagName": {
        "type": "String",
        "metadata": {
          "displayName": "Monitoring disabled tag name",
          "description": "Tag name used to disable monitoring at the resource level. Set to true if monitoring should be disabled."
        },
        "defaultValue": "MonitorDisable"
      },
      "MonitorDisableTagValues": {
        "type": "Array",
        "metadata": {
          "displayName": "Monitoring disabled tag values(s)",
          "description": "Tag value(s) used to disable monitoring at the resource level. Set to true if monitoring should be disabled."
        },
        "defaultValue": [
          "true",
          "Test",
          "Dev",
          "Sandbox"
        ]
      }
    },
    "policyRule": {
      "if": {
        "allOf": [
          {
            "field": "type",
            "equals": "Microsoft.Batch/batchAccounts"
          },
          {
            "field": "[concat('tags[', parameters('MonitorDisableTagName'), ']')]",
            "notIn": "[parameters('MonitorDisableTagValues')]"
          }
        ]
      },
      "then": {
        "effect": "[parameters('effect')]",
        "details": {
          "roleDefinitionIds": [
            "/providers/Microsoft.Authorization/roleDefinitions/b24988ac-6180-42a0-ab88-20f7382dd24c"
          ],
          "type": "Microsoft.Insights/metricAlerts",
          "name": "[concat(replace(field('fullName'),'/','-'), '-RebootingNodeCount')]",
          "existenceCondition": {
            "allOf": [
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria.allOf[*].metricNamespace",
                "equals": "Microsoft.Batch/batchAccounts"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria.allOf[*].metricName",
                "equals": "RebootingNodeCount"
              },
              {
                "field": "Microsoft.Insights/metricalerts/scopes[*]",
                "equals": "[field('id')]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/enabled",
                "equals": "[parameters('enabled')]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/evaluationFrequency",
                "equals": "[parameters('evaluationFrequency')]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/windowSize",
                "equals": "[parameters('windowSize')]"
              },
              {
                "field": "Microsoft.Insights/metricalerts/severity",
                "equals": "[parameters('severity')]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/autoMitigate",
                "equals": "[parameters('autoMitigate')]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft-Azure-Monitor-SingleResourceMultipleMetricCriteria.allOf[*].timeAggregation",
                "equals": "Total"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria.allOf[*].StaticThresholdCriterion.operator",
                "equals": "GreaterThan"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria.allOf[*].StaticThresholdCriterion.threshold",
                "equals": "[if(contains(field('tags'), '_amba-RebootingNodeCount-threshold-Override_'), field('tags._amba-RebootingNodeCount-threshold-Override_'), parameters('threshold'))]"
              }
            ]
          },
          "deployment": {
            "properties": {
              "mode": "incremental",
              "template": {
                "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#",
                "contentVersion": "1.0.0.0",
                "parameters": {
                  "resourceName": {
                    "type": "String",
                    "metadata": {
                      "displayName": "resourceName",
                      "description": "Name of the resource"
                    }
                  },
                  "resourceId": {
                    "type": "String",
                    "metadata": {
                      "displayName": "resourceId",
                      "description": "Resource ID of the resource emitting the metric that will be used for the comparison"
                    }
                  },
                  "severity": {
                    "type": "String"
                  },
                  "windowSize": {
                    "type": "String"
                  },
                  "evaluationFrequency": {
                    "type": "String"
                  },
                  "autoMitigate": {
                    "type": "String"
                  },
                  "enabled": {
                    "type": "String"
                  },
                  "threshold": {
                    "type": "String"
                  }
                },
                "variables": {},
                "resources": [
                  {
                    "type": "Microsoft.Insights/metricAlerts",
                    "apiVersion": "2018-03-01",
                    "name": "[concat(parameters('resourceName'), '-RebootingNodeCount')]",
                    "location": "global",
                    "tags": {
                      "_deployed_by_amba": true
                    },
                    "properties": {
                      "description": "Metric Alert for Batch batchAccounts RebootingNodeCount",
                      "severity": "[parameters('severity')]",
                      "enabled": "[parameters('enabled')]",
                      "scopes": [
                        "[parameters('resourceId')]"
                      ],
                      "evaluationFrequency": "[parameters('evaluationFrequency')]",
                      "windowSize": "[parameters('windowSize')]",
                      "criteria": {
                        "allOf": [
                          {
                            "name": "RebootingNodeCount",
                            "metricNamespace": "Microsoft.Batch/batchAccounts",
                            "metricName": "RebootingNodeCount",
                            "operator": "GreaterThan",
                            "threshold": "[parameters('threshold')]",
                            "timeAggregation": "Total",
                            "criterionType": "StaticThresholdCriterion"
                          }
                        ],
                        "odata.type": "Microsoft.Azure.Monitor.SingleResourceMultipleMetricCriteria"
                      },
                      "autoMitigate": "[parameters('autoMitigate')]",
                      "parameters": {
                        "severity": {
                          "value": "[parameters('severity')]"
                        },
                        "windowSize": {
                          "value": "[parameters('windowSize')]"
                        },
                        "evaluationFrequency": {
                          "value": "[parameters('evaluationFrequency')]"
                        },
                        "autoMitigate": {
                          "value": "[parameters('autoMitigate')]"
                        },
                        "enabled": {
                          "value": "[parameters('enabled')]"
                        },
                        "threshold": {
                          "value": "[parameters('threshold')]"
                        }
                      }
                    }
                  }
                ]
              },
              "parameters": {
                "resourceName": {
                  "value": "[replace(field('fullName'),'/','-')]"
                },
                "resourceId": {
                  "value": "[field('id')]"
                },
                "severity": {
                  "value": "[parameters('severity')]"
                },
                "windowSize": {
                  "value": "[parameters('windowSize')]"
                },
                "evaluationFrequency": {
                  "value": "[parameters('evaluationFrequency')]"
                },
                "autoMitigate": {
                  "value": "[parameters('autoMitigate')]"
                },
                "enabled": {
                  "value": "[parameters('enabled')]"
                },
                "threshold": {
                  "value": "[if(contains(field('tags'), '_amba-RebootingNodeCount-threshold-Override_'), field('tags._amba-RebootingNodeCount-threshold-Override_'), parameters('threshold'))]"
                }
              }
            }
          }
        }
      }
    }
  }



TaskFailEvent - Metric Alert

Total number of tasks that have completed in a failed state

Properties:

autoMitigatefalse
criterionTypeStaticThresholdCriterion
enabledtrue
evaluationFrequencyPT1M
metricNameTaskFailEvent
metricNamespaceMicrosoft.Batch/batchAccounts
operatorGreaterThan
severity3
threshold0
timeAggregationTotal
windowSizePT5M

References:

Templates:

{
  "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#",
  "contentVersion": "1.0.0.0",
  "parameters": {
    "alertName": {
      "type": "string",
      "minLength": 1,
      "metadata": {
        "description": "Name of the alert"
      }
    },
    "alertDescription": {
      "type": "string",
      "defaultValue": "Total number of tasks that have completed in a failed state",
      "metadata": {
        "description": "Description of alert"
      }
    },
    "targetResourceId": {
      "type": "string",
      "minLength": 1,
      "metadata": {
        "description": "List of Azure resource Ids seperated by a comma. For example - /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroup/resource-group-name/Microsoft.compute/virtualMachines/vm-name"
      }
    },
    "targetResourceRegion": {
      "type": "string",
      "metadata": {
        "description": "Azure region in which target resources to be monitored are in (without spaces). For example: EastUS"
      }
    },
    "targetResourceType": {
      "type": "string",
      "minLength": 1,
      "metadata": {
        "description": "Resource type of target resources to be monitored."
      }
    },
    "isEnabled": {
      "type": "bool",
      "defaultValue": true,
      "metadata": {
        "description": "Specifies whether the alert is enabled"
      }
    },
    "alertSeverity": {
      "type": "int",
      "defaultValue": 3,
      "allowedValues": [
        0,
        1,
        2,
        3,
        4
      ],
      "metadata": {
        "description": "Severity of alert {0,1,2,3,4}"
      }
    },
    "operator": {
      "type": "string",
      "defaultValue": "GreaterThan",
      "allowedValues": [
        "Equals",
        "GreaterThan",
        "GreaterThanOrEqual",
        "LessThan",
        "LessThanOrEqual"
      ],
      "metadata": {
        "description": "Operator comparing the current value with the threshold value."
      }
    },
    "threshold": {
      "type": "string",
      "defaultValue": "0",
      "metadata": {
        "description": "The threshold value at which the alert is activated."
      }
    },
    "timeAggregation": {
      "type": "string",
      "defaultValue": "Total",
      "allowedValues": [
        "Average",
        "Minimum",
        "Maximum",
        "Total",
        "Count"
      ],
      "metadata": {
        "description": "How the data that is collected should be combined over time."
      }
    },
    "windowSize": {
      "type": "string",
      "defaultValue": "PT5M",
      "allowedValues": [
        "PT1M",
        "PT5M",
        "PT15M",
        "PT30M",
        "PT1H",
        "PT6H",
        "PT12H",
        "P1D"
      ],
      "metadata": {
        "description": "Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format."
      }
    },
    "evaluationFrequency": {
      "type": "string",
      "defaultValue": "PT1M",
      "allowedValues": [
        "PT1M",
        "PT5M",
        "PT15M",
        "PT30M",
        "PT1H"
      ],
      "metadata": {
        "description": "how often the metric alert is evaluated represented in ISO 8601 duration format"
      }
    },
    "currentDateTimeUtcNow": {
      "type": "string",
      "defaultValue": "[utcNow()]",
      "metadata": {
          "description": "The current date and time using the utcNow function. Used for deployment name uniqueness"
      }
    },
    "telemetryOptOut": {
      "type": "string",
      "defaultValue": "No",
      "allowedValues": [
          "Yes",
          "No"
      ],
      "metadata": {
          "description": "The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry."
      }
    }
  },
  "variables": {
    "pidDeploymentName": "[take(concat('pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-', uniqueString(resourceGroup().id, parameters('alertName'), parameters('currentDateTimeUtcNow'))), 64)]",
    "varTargetResourceId": "[split(parameters('targetResourceId'), ',')]"
  },
  "resources": [
    {
      "type": "Microsoft.Insights/metricAlerts",
      "apiVersion": "2018-03-01",
      "name": "[parameters('alertName')]",
      "location": "global",
      "tags": {
        "_deployed_by_amba": true
      },
      "properties": {
        "description": "[parameters('alertDescription')]",
        "scopes": "[variables('varTargetResourceId')]",
        "targetResourceType": "[parameters('targetResourceType')]",
        "targetResourceRegion": "[parameters('targetResourceRegion')]",
        "severity": "[parameters('alertSeverity')]",
        "enabled": "[parameters('isEnabled')]",
        "evaluationFrequency": "[parameters('evaluationFrequency')]",
        "windowSize": "[parameters('windowSize')]",
        "criteria": {
          "odata.type": "Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria",
          "allOf": [
            {
              "name": "1st criterion",
              "metricName": "TaskFailEvent",
              "dimensions": [],
              "operator": "[parameters('operator')]",
              "threshold": "[parameters('threshold')]",
              "timeAggregation": "[parameters('timeAggregation')]",
              "criterionType": "StaticThresholdCriterion"
            }
          ]
        }
      }
    },
    {
      "condition": "[equals(parameters('telemetryOptOut'), 'No')]",
      "apiVersion": "2023-07-01",
      "name": "[variables('pidDeploymentName')]",
      "type": "Microsoft.Resources/deployments",
      "properties": {
          "mode": "Incremental",
          "template": {
              "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#",
              "contentVersion": "1.0.0.0",
              "resources": []
          }
      }
    }
  ]
}
@description('Name of the alert')
@minLength(1)
param alertName string

@description('Description of alert')
param alertDescription string = 'Total number of tasks that have completed in a failed state'

@description('Array of Azure resource Ids. For example - /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroup/resource-group-name/Microsoft.compute/virtualMachines/vm-name')
@minLength(1)
param targetResourceId array

@description('Azure region in which target resources to be monitored are in (without spaces). For example: EastUS')
param targetResourceRegion string

@description('Resource type of target resources to be monitored.')
@minLength(1)
param targetResourceType string

@description('Specifies whether the alert is enabled')
param isEnabled bool = true

@description('Severity of alert {0,1,2,3,4}')
@allowed([
  0
  1
  2
  3
  4
])
param alertSeverity int = 3

@description('Operator comparing the current value with the threshold value.')
@allowed([
  'Equals'
  'GreaterThan'
  'GreaterThanOrEqual'
  'LessThan'
  'LessThanOrEqual'
])
param operator string = 'GreaterThan'

@description('The threshold value at which the alert is activated.')
param threshold int = 0

@description('How the data that is collected should be combined over time.')
@allowed([
  'Average'
  'Minimum'
  'Maximum'
  'Total'
  'Count'
])
param timeAggregation string = 'Total'

@description('Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format.')
@allowed([
  'PT1M'
  'PT5M'
  'PT15M'
  'PT30M'
  'PT1H'
  'PT6H'
  'PT12H'
  'P1D'
])
param windowSize string = 'PT5M'

@description('how often the metric alert is evaluated represented in ISO 8601 duration format')
@allowed([
  'PT1M'
  'PT5M'
  'PT15M'
  'PT30M'
  'PT1H'
])
param evaluationFrequency string = 'PT1M'

@description('"The current date and time using the utcNow function. Used for deployment name uniqueness')
param currentDateTimeUtcNow string = utcNow()

@description('The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry.')
@allowed([
  'Yes'
  'No'
])
param telemetryOptOut string = 'No'

resource metricAlert 'Microsoft.Insights/metricAlerts@2018-03-01' = {
  name: alertName
  location: 'global'
  tags: {
    _deployed_by_amba: 'true'
  }
  properties: {
    description: alertDescription
    scopes: targetResourceId
    targetResourceType: targetResourceType
    targetResourceRegion: targetResourceRegion
    severity: alertSeverity
    enabled: isEnabled
    evaluationFrequency: evaluationFrequency
    windowSize: windowSize
    criteria: {
      'odata.type': 'Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria'
      allOf: [
        {
          name: '1st criterion'
          metricName: 'TaskFailEvent'
          dimensions: []
          operator: operator
          threshold: threshold
          timeAggregation: timeAggregation
          criterionType: 'StaticThresholdCriterion'
        }
      ]
    }
  }
}

var ambaTelemetryPidName = 'pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-${uniqueString(resourceGroup().id, alertName, currentDateTimeUtcNow)}'
resource ambaTelemetryPid 'Microsoft.Resources/deployments@2023-07-01' =  if (telemetryOptOut == 'No') {
  name: ambaTelemetryPidName
  tags: {
    _deployed_by_amba: 'true'
  }
  properties: {
    mode: 'Incremental'
    template: {
      '$schema': 'https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#'
      contentVersion: '1.0.0.0'
      resources: []
    }
  }
}
{
    "mode": "All",
    "parameters": {
      "severity": {
        "type": "String",
        "metadata": {
          "displayName": "Severity",
          "description": "Severity of the Alert"
        },
        "allowedValues": [
          "0",
          "1",
          "2",
          "3",
          "4"
        ],
        "defaultValue": "3"
      },
      "windowSize": {
        "type": "String",
        "metadata": {
          "displayName": "Window Size",
          "description": "Window size for the alert"
        },
        "allowedValues": [
          "PT1M",
          "PT5M",
          "PT15M",
          "PT30M",
          "PT1H",
          "PT6H",
          "PT12H",
          "P1D"
        ],
        "defaultValue": "PT5M"
      },
      "evaluationFrequency": {
        "type": "String",
        "metadata": {
          "displayName": "Evaluation Frequency",
          "description": "Evaluation frequency for the alert"
        },
        "allowedValues": [
          "PT1M",
          "PT5M",
          "PT15M",
          "PT30M",
          "PT1H"
        ],
        "defaultValue": "PT1M"
      },
      "autoMitigate": {
        "type": "String",
        "metadata": {
          "displayName": "Auto Mitigate",
          "description": "Auto Mitigate for the alert"
        },
        "allowedValues": [
          "true",
          "false"
        ],
        "defaultValue": "true"
      },
      "enabled": {
        "type": "String",
        "metadata": {
          "displayName": "Alert State",
          "description": "Alert state for the alert"
        },
        "allowedValues": [
          "true",
          "false"
        ],
        "defaultValue": "true"
      },
      "threshold": {
        "type": "String",
        "metadata": {
          "displayName": "Threshold",
          "description": "Threshold for the alert"
        },
        "defaultValue": "0"
      },
      "effect": {
        "type": "String",
        "metadata": {
          "displayName": "Effect",
          "description": "Effect of the policy"
        },
        "allowedValues": [
          "deployIfNotExists",
          "disabled"
        ],
        "defaultValue": "deployIfNotExists"
      },
      "MonitorDisableTagName": {
        "type": "String",
        "metadata": {
          "displayName": "Monitoring disabled tag name",
          "description": "Tag name used to disable monitoring at the resource level. Set to true if monitoring should be disabled."
        },
        "defaultValue": "MonitorDisable"
      },
      "MonitorDisableTagValues": {
        "type": "Array",
        "metadata": {
          "displayName": "Monitoring disabled tag values(s)",
          "description": "Tag value(s) used to disable monitoring at the resource level. Set to true if monitoring should be disabled."
        },
        "defaultValue": [
          "true",
          "Test",
          "Dev",
          "Sandbox"
        ]
      }
    },
    "policyRule": {
      "if": {
        "allOf": [
          {
            "field": "type",
            "equals": "Microsoft.Batch/batchAccounts"
          },
          {
            "field": "[concat('tags[', parameters('MonitorDisableTagName'), ']')]",
            "notIn": "[parameters('MonitorDisableTagValues')]"
          }
        ]
      },
      "then": {
        "effect": "[parameters('effect')]",
        "details": {
          "roleDefinitionIds": [
            "/providers/Microsoft.Authorization/roleDefinitions/b24988ac-6180-42a0-ab88-20f7382dd24c"
          ],
          "type": "Microsoft.Insights/metricAlerts",
          "name": "[concat(replace(field('fullName'),'/','-'), '-TaskFailEvent')]",
          "existenceCondition": {
            "allOf": [
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria.allOf[*].metricNamespace",
                "equals": "Microsoft.Batch/batchAccounts"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria.allOf[*].metricName",
                "equals": "TaskFailEvent"
              },
              {
                "field": "Microsoft.Insights/metricalerts/scopes[*]",
                "equals": "[field('id')]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/enabled",
                "equals": "[parameters('enabled')]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/evaluationFrequency",
                "equals": "[parameters('evaluationFrequency')]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/windowSize",
                "equals": "[parameters('windowSize')]"
              },
              {
                "field": "Microsoft.Insights/metricalerts/severity",
                "equals": "[parameters('severity')]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/autoMitigate",
                "equals": "[parameters('autoMitigate')]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft-Azure-Monitor-SingleResourceMultipleMetricCriteria.allOf[*].timeAggregation",
                "equals": "Total"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria.allOf[*].StaticThresholdCriterion.operator",
                "equals": "GreaterThan"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria.allOf[*].StaticThresholdCriterion.threshold",
                "equals": "[if(contains(field('tags'), '_amba-TaskFailEvent-threshold-Override_'), field('tags._amba-TaskFailEvent-threshold-Override_'), parameters('threshold'))]"
              }
            ]
          },
          "deployment": {
            "properties": {
              "mode": "incremental",
              "template": {
                "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#",
                "contentVersion": "1.0.0.0",
                "parameters": {
                  "resourceName": {
                    "type": "String",
                    "metadata": {
                      "displayName": "resourceName",
                      "description": "Name of the resource"
                    }
                  },
                  "resourceId": {
                    "type": "String",
                    "metadata": {
                      "displayName": "resourceId",
                      "description": "Resource ID of the resource emitting the metric that will be used for the comparison"
                    }
                  },
                  "severity": {
                    "type": "String"
                  },
                  "windowSize": {
                    "type": "String"
                  },
                  "evaluationFrequency": {
                    "type": "String"
                  },
                  "autoMitigate": {
                    "type": "String"
                  },
                  "enabled": {
                    "type": "String"
                  },
                  "threshold": {
                    "type": "String"
                  }
                },
                "variables": {},
                "resources": [
                  {
                    "type": "Microsoft.Insights/metricAlerts",
                    "apiVersion": "2018-03-01",
                    "name": "[concat(parameters('resourceName'), '-TaskFailEvent')]",
                    "location": "global",
                    "tags": {
                      "_deployed_by_amba": true
                    },
                    "properties": {
                      "description": "Metric Alert for Batch batchAccounts TaskFailEvent",
                      "severity": "[parameters('severity')]",
                      "enabled": "[parameters('enabled')]",
                      "scopes": [
                        "[parameters('resourceId')]"
                      ],
                      "evaluationFrequency": "[parameters('evaluationFrequency')]",
                      "windowSize": "[parameters('windowSize')]",
                      "criteria": {
                        "allOf": [
                          {
                            "name": "TaskFailEvent",
                            "metricNamespace": "Microsoft.Batch/batchAccounts",
                            "metricName": "TaskFailEvent",
                            "operator": "GreaterThan",
                            "threshold": "[parameters('threshold')]",
                            "timeAggregation": "Total",
                            "criterionType": "StaticThresholdCriterion"
                          }
                        ],
                        "odata.type": "Microsoft.Azure.Monitor.SingleResourceMultipleMetricCriteria"
                      },
                      "autoMitigate": "[parameters('autoMitigate')]",
                      "parameters": {
                        "severity": {
                          "value": "[parameters('severity')]"
                        },
                        "windowSize": {
                          "value": "[parameters('windowSize')]"
                        },
                        "evaluationFrequency": {
                          "value": "[parameters('evaluationFrequency')]"
                        },
                        "autoMitigate": {
                          "value": "[parameters('autoMitigate')]"
                        },
                        "enabled": {
                          "value": "[parameters('enabled')]"
                        },
                        "threshold": {
                          "value": "[parameters('threshold')]"
                        }
                      }
                    }
                  }
                ]
              },
              "parameters": {
                "resourceName": {
                  "value": "[replace(field('fullName'),'/','-')]"
                },
                "resourceId": {
                  "value": "[field('id')]"
                },
                "severity": {
                  "value": "[parameters('severity')]"
                },
                "windowSize": {
                  "value": "[parameters('windowSize')]"
                },
                "evaluationFrequency": {
                  "value": "[parameters('evaluationFrequency')]"
                },
                "autoMitigate": {
                  "value": "[parameters('autoMitigate')]"
                },
                "enabled": {
                  "value": "[parameters('enabled')]"
                },
                "threshold": {
                  "value": "[if(contains(field('tags'), '_amba-TaskFailEvent-threshold-Override_'), field('tags._amba-TaskFailEvent-threshold-Override_'), parameters('threshold'))]"
                }
              }
            }
          }
        }
      }
    }
  }



UnusableNodeCount - Metric Alert

Number of unusable nodes

Properties:

autoMitigatefalse
criterionTypeStaticThresholdCriterion
enabledtrue
evaluationFrequencyPT1M
metricNameUnusableNodeCount
metricNamespaceMicrosoft.Batch/batchAccounts
operatorGreaterThan
severity2
threshold2.5
timeAggregationTotal
windowSizePT5M

References:

Templates:

{
  "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#",
  "contentVersion": "1.0.0.0",
  "parameters": {
    "alertName": {
      "type": "string",
      "minLength": 1,
      "metadata": {
        "description": "Name of the alert"
      }
    },
    "alertDescription": {
      "type": "string",
      "defaultValue": "Number of unusable nodes",
      "metadata": {
        "description": "Description of alert"
      }
    },
    "targetResourceId": {
      "type": "string",
      "minLength": 1,
      "metadata": {
        "description": "List of Azure resource Ids seperated by a comma. For example - /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroup/resource-group-name/Microsoft.compute/virtualMachines/vm-name"
      }
    },
    "targetResourceRegion": {
      "type": "string",
      "metadata": {
        "description": "Azure region in which target resources to be monitored are in (without spaces). For example: EastUS"
      }
    },
    "targetResourceType": {
      "type": "string",
      "minLength": 1,
      "metadata": {
        "description": "Resource type of target resources to be monitored."
      }
    },
    "isEnabled": {
      "type": "bool",
      "defaultValue": true,
      "metadata": {
        "description": "Specifies whether the alert is enabled"
      }
    },
    "alertSeverity": {
      "type": "int",
      "defaultValue": 2,
      "allowedValues": [
        0,
        1,
        2,
        3,
        4
      ],
      "metadata": {
        "description": "Severity of alert {0,1,2,3,4}"
      }
    },
    "operator": {
      "type": "string",
      "defaultValue": "GreaterThan",
      "allowedValues": [
        "Equals",
        "GreaterThan",
        "GreaterThanOrEqual",
        "LessThan",
        "LessThanOrEqual"
      ],
      "metadata": {
        "description": "Operator comparing the current value with the threshold value."
      }
    },
    "threshold": {
      "type": "string",
      "defaultValue": "2",
      "metadata": {
        "description": "The threshold value at which the alert is activated."
      }
    },
    "timeAggregation": {
      "type": "string",
      "defaultValue": "Total",
      "allowedValues": [
        "Average",
        "Minimum",
        "Maximum",
        "Total",
        "Count"
      ],
      "metadata": {
        "description": "How the data that is collected should be combined over time."
      }
    },
    "windowSize": {
      "type": "string",
      "defaultValue": "PT5M",
      "allowedValues": [
        "PT1M",
        "PT5M",
        "PT15M",
        "PT30M",
        "PT1H",
        "PT6H",
        "PT12H",
        "P1D"
      ],
      "metadata": {
        "description": "Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format."
      }
    },
    "evaluationFrequency": {
      "type": "string",
      "defaultValue": "PT1M",
      "allowedValues": [
        "PT1M",
        "PT5M",
        "PT15M",
        "PT30M",
        "PT1H"
      ],
      "metadata": {
        "description": "how often the metric alert is evaluated represented in ISO 8601 duration format"
      }
    },
    "currentDateTimeUtcNow": {
      "type": "string",
      "defaultValue": "[utcNow()]",
      "metadata": {
          "description": "The current date and time using the utcNow function. Used for deployment name uniqueness"
      }
    },
    "telemetryOptOut": {
      "type": "string",
      "defaultValue": "No",
      "allowedValues": [
          "Yes",
          "No"
      ],
      "metadata": {
          "description": "The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry."
      }
    }
  },
  "variables": {
    "pidDeploymentName": "[take(concat('pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-', uniqueString(resourceGroup().id, parameters('alertName'), parameters('currentDateTimeUtcNow'))), 64)]",
    "varTargetResourceId": "[split(parameters('targetResourceId'), ',')]"
  },
  "resources": [
    {
      "type": "Microsoft.Insights/metricAlerts",
      "apiVersion": "2018-03-01",
      "name": "[parameters('alertName')]",
      "location": "global",
      "tags": {
        "_deployed_by_amba": true
      },
      "properties": {
        "description": "[parameters('alertDescription')]",
        "scopes": "[variables('varTargetResourceId')]",
        "targetResourceType": "[parameters('targetResourceType')]",
        "targetResourceRegion": "[parameters('targetResourceRegion')]",
        "severity": "[parameters('alertSeverity')]",
        "enabled": "[parameters('isEnabled')]",
        "evaluationFrequency": "[parameters('evaluationFrequency')]",
        "windowSize": "[parameters('windowSize')]",
        "criteria": {
          "odata.type": "Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria",
          "allOf": [
            {
              "name": "1st criterion",
              "metricName": "UnusableNodeCount",
              "dimensions": [],
              "operator": "[parameters('operator')]",
              "threshold": "[parameters('threshold')]",
              "timeAggregation": "[parameters('timeAggregation')]",
              "criterionType": "StaticThresholdCriterion"
            }
          ]
        }
      }
    },
    {
      "condition": "[equals(parameters('telemetryOptOut'), 'No')]",
      "apiVersion": "2023-07-01",
      "name": "[variables('pidDeploymentName')]",
      "type": "Microsoft.Resources/deployments",
      "properties": {
          "mode": "Incremental",
          "template": {
              "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#",
              "contentVersion": "1.0.0.0",
              "resources": []
          }
      }
    }
  ]
}
@description('Name of the alert')
@minLength(1)
param alertName string

@description('Description of alert')
param alertDescription string = 'Number of unusable nodes'

@description('Array of Azure resource Ids. For example - /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroup/resource-group-name/Microsoft.compute/virtualMachines/vm-name')
@minLength(1)
param targetResourceId array

@description('Azure region in which target resources to be monitored are in (without spaces). For example: EastUS')
param targetResourceRegion string

@description('Resource type of target resources to be monitored.')
@minLength(1)
param targetResourceType string

@description('Specifies whether the alert is enabled')
param isEnabled bool = true

@description('Severity of alert {0,1,2,3,4}')
@allowed([
  0
  1
  2
  3
  4
])
param alertSeverity int = 2

@description('Operator comparing the current value with the threshold value.')
@allowed([
  'Equals'
  'GreaterThan'
  'GreaterThanOrEqual'
  'LessThan'
  'LessThanOrEqual'
])
param operator string = 'GreaterThan'

@description('The threshold value at which the alert is activated.')
param threshold int = 2

@description('How the data that is collected should be combined over time.')
@allowed([
  'Average'
  'Minimum'
  'Maximum'
  'Total'
  'Count'
])
param timeAggregation string = 'Total'

@description('Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format.')
@allowed([
  'PT1M'
  'PT5M'
  'PT15M'
  'PT30M'
  'PT1H'
  'PT6H'
  'PT12H'
  'P1D'
])
param windowSize string = 'PT5M'

@description('how often the metric alert is evaluated represented in ISO 8601 duration format')
@allowed([
  'PT1M'
  'PT5M'
  'PT15M'
  'PT30M'
  'PT1H'
])
param evaluationFrequency string = 'PT1M'

@description('"The current date and time using the utcNow function. Used for deployment name uniqueness')
param currentDateTimeUtcNow string = utcNow()

@description('The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry.')
@allowed([
  'Yes'
  'No'
])
param telemetryOptOut string = 'No'

resource metricAlert 'Microsoft.Insights/metricAlerts@2018-03-01' = {
  name: alertName
  location: 'global'
  tags: {
    _deployed_by_amba: 'true'
  }
  properties: {
    description: alertDescription
    scopes: targetResourceId
    targetResourceType: targetResourceType
    targetResourceRegion: targetResourceRegion
    severity: alertSeverity
    enabled: isEnabled
    evaluationFrequency: evaluationFrequency
    windowSize: windowSize
    criteria: {
      'odata.type': 'Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria'
      allOf: [
        {
          name: '1st criterion'
          metricName: 'UnusableNodeCount'
          dimensions: []
          operator: operator
          threshold: threshold
          timeAggregation: timeAggregation
          criterionType: 'StaticThresholdCriterion'
        }
      ]
    }
  }
}

var ambaTelemetryPidName = 'pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-${uniqueString(resourceGroup().id, alertName, currentDateTimeUtcNow)}'
resource ambaTelemetryPid 'Microsoft.Resources/deployments@2023-07-01' =  if (telemetryOptOut == 'No') {
  name: ambaTelemetryPidName
  tags: {
    _deployed_by_amba: 'true'
  }
  properties: {
    mode: 'Incremental'
    template: {
      '$schema': 'https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#'
      contentVersion: '1.0.0.0'
      resources: []
    }
  }
}
{
    "mode": "All",
    "parameters": {
      "severity": {
        "type": "String",
        "metadata": {
          "displayName": "Severity",
          "description": "Severity of the Alert"
        },
        "allowedValues": [
          "0",
          "1",
          "2",
          "3",
          "4"
        ],
        "defaultValue": "2"
      },
      "windowSize": {
        "type": "String",
        "metadata": {
          "displayName": "Window Size",
          "description": "Window size for the alert"
        },
        "allowedValues": [
          "PT1M",
          "PT5M",
          "PT15M",
          "PT30M",
          "PT1H",
          "PT6H",
          "PT12H",
          "P1D"
        ],
        "defaultValue": "PT5M"
      },
      "evaluationFrequency": {
        "type": "String",
        "metadata": {
          "displayName": "Evaluation Frequency",
          "description": "Evaluation frequency for the alert"
        },
        "allowedValues": [
          "PT1M",
          "PT5M",
          "PT15M",
          "PT30M",
          "PT1H"
        ],
        "defaultValue": "PT1M"
      },
      "autoMitigate": {
        "type": "String",
        "metadata": {
          "displayName": "Auto Mitigate",
          "description": "Auto Mitigate for the alert"
        },
        "allowedValues": [
          "true",
          "false"
        ],
        "defaultValue": "true"
      },
      "enabled": {
        "type": "String",
        "metadata": {
          "displayName": "Alert State",
          "description": "Alert state for the alert"
        },
        "allowedValues": [
          "true",
          "false"
        ],
        "defaultValue": "true"
      },
      "threshold": {
        "type": "String",
        "metadata": {
          "displayName": "Threshold",
          "description": "Threshold for the alert"
        },
        "defaultValue": "2.5"
      },
      "effect": {
        "type": "String",
        "metadata": {
          "displayName": "Effect",
          "description": "Effect of the policy"
        },
        "allowedValues": [
          "deployIfNotExists",
          "disabled"
        ],
        "defaultValue": "deployIfNotExists"
      },
      "MonitorDisableTagName": {
        "type": "String",
        "metadata": {
          "displayName": "Monitoring disabled tag name",
          "description": "Tag name used to disable monitoring at the resource level. Set to true if monitoring should be disabled."
        },
        "defaultValue": "MonitorDisable"
      },
      "MonitorDisableTagValues": {
        "type": "Array",
        "metadata": {
          "displayName": "Monitoring disabled tag values(s)",
          "description": "Tag value(s) used to disable monitoring at the resource level. Set to true if monitoring should be disabled."
        },
        "defaultValue": [
          "true",
          "Test",
          "Dev",
          "Sandbox"
        ]
      }
    },
    "policyRule": {
      "if": {
        "allOf": [
          {
            "field": "type",
            "equals": "Microsoft.Batch/batchAccounts"
          },
          {
            "field": "[concat('tags[', parameters('MonitorDisableTagName'), ']')]",
            "notIn": "[parameters('MonitorDisableTagValues')]"
          }
        ]
      },
      "then": {
        "effect": "[parameters('effect')]",
        "details": {
          "roleDefinitionIds": [
            "/providers/Microsoft.Authorization/roleDefinitions/b24988ac-6180-42a0-ab88-20f7382dd24c"
          ],
          "type": "Microsoft.Insights/metricAlerts",
          "name": "[concat(replace(field('fullName'),'/','-'), '-UnusableNodeCount')]",
          "existenceCondition": {
            "allOf": [
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria.allOf[*].metricNamespace",
                "equals": "Microsoft.Batch/batchAccounts"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria.allOf[*].metricName",
                "equals": "UnusableNodeCount"
              },
              {
                "field": "Microsoft.Insights/metricalerts/scopes[*]",
                "equals": "[field('id')]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/enabled",
                "equals": "[parameters('enabled')]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/evaluationFrequency",
                "equals": "[parameters('evaluationFrequency')]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/windowSize",
                "equals": "[parameters('windowSize')]"
              },
              {
                "field": "Microsoft.Insights/metricalerts/severity",
                "equals": "[parameters('severity')]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/autoMitigate",
                "equals": "[parameters('autoMitigate')]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft-Azure-Monitor-SingleResourceMultipleMetricCriteria.allOf[*].timeAggregation",
                "equals": "Total"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria.allOf[*].StaticThresholdCriterion.operator",
                "equals": "GreaterThan"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria.allOf[*].StaticThresholdCriterion.threshold",
                "equals": "[if(contains(field('tags'), '_amba-UnusableNodeCount-threshold-Override_'), field('tags._amba-UnusableNodeCount-threshold-Override_'), parameters('threshold'))]"
              }
            ]
          },
          "deployment": {
            "properties": {
              "mode": "incremental",
              "template": {
                "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#",
                "contentVersion": "1.0.0.0",
                "parameters": {
                  "resourceName": {
                    "type": "String",
                    "metadata": {
                      "displayName": "resourceName",
                      "description": "Name of the resource"
                    }
                  },
                  "resourceId": {
                    "type": "String",
                    "metadata": {
                      "displayName": "resourceId",
                      "description": "Resource ID of the resource emitting the metric that will be used for the comparison"
                    }
                  },
                  "severity": {
                    "type": "String"
                  },
                  "windowSize": {
                    "type": "String"
                  },
                  "evaluationFrequency": {
                    "type": "String"
                  },
                  "autoMitigate": {
                    "type": "String"
                  },
                  "enabled": {
                    "type": "String"
                  },
                  "threshold": {
                    "type": "String"
                  }
                },
                "variables": {},
                "resources": [
                  {
                    "type": "Microsoft.Insights/metricAlerts",
                    "apiVersion": "2018-03-01",
                    "name": "[concat(parameters('resourceName'), '-UnusableNodeCount')]",
                    "location": "global",
                    "tags": {
                      "_deployed_by_amba": true
                    },
                    "properties": {
                      "description": "Metric Alert for Batch batchAccounts UnusableNodeCount",
                      "severity": "[parameters('severity')]",
                      "enabled": "[parameters('enabled')]",
                      "scopes": [
                        "[parameters('resourceId')]"
                      ],
                      "evaluationFrequency": "[parameters('evaluationFrequency')]",
                      "windowSize": "[parameters('windowSize')]",
                      "criteria": {
                        "allOf": [
                          {
                            "name": "UnusableNodeCount",
                            "metricNamespace": "Microsoft.Batch/batchAccounts",
                            "metricName": "UnusableNodeCount",
                            "operator": "GreaterThan",
                            "threshold": "[parameters('threshold')]",
                            "timeAggregation": "Total",
                            "criterionType": "StaticThresholdCriterion"
                          }
                        ],
                        "odata.type": "Microsoft.Azure.Monitor.SingleResourceMultipleMetricCriteria"
                      },
                      "autoMitigate": "[parameters('autoMitigate')]",
                      "parameters": {
                        "severity": {
                          "value": "[parameters('severity')]"
                        },
                        "windowSize": {
                          "value": "[parameters('windowSize')]"
                        },
                        "evaluationFrequency": {
                          "value": "[parameters('evaluationFrequency')]"
                        },
                        "autoMitigate": {
                          "value": "[parameters('autoMitigate')]"
                        },
                        "enabled": {
                          "value": "[parameters('enabled')]"
                        },
                        "threshold": {
                          "value": "[parameters('threshold')]"
                        }
                      }
                    }
                  }
                ]
              },
              "parameters": {
                "resourceName": {
                  "value": "[replace(field('fullName'),'/','-')]"
                },
                "resourceId": {
                  "value": "[field('id')]"
                },
                "severity": {
                  "value": "[parameters('severity')]"
                },
                "windowSize": {
                  "value": "[parameters('windowSize')]"
                },
                "evaluationFrequency": {
                  "value": "[parameters('evaluationFrequency')]"
                },
                "autoMitigate": {
                  "value": "[parameters('autoMitigate')]"
                },
                "enabled": {
                  "value": "[parameters('enabled')]"
                },
                "threshold": {
                  "value": "[if(contains(field('tags'), '_amba-UnusableNodeCount-threshold-Override_'), field('tags._amba-UnusableNodeCount-threshold-Override_'), parameters('threshold'))]"
                }
              }
            }
          }
        }
      }
    }
  }