Azure Monitor Baseline Alerts
Download AlertsGlossaryGitHubGitHub IssuesToggle Dark/Light/Auto modeToggle Dark/Light/Auto modeToggle Dark/Light/Auto modeBack to homepage

vaults

NameTypeDescription
Backup Health MonitoringUnknownDeploy RecoveryVault BackupHealthMonitor Alert
BackupHealthEventMetricThe count of health events pertaining to backup job health
RestoreHealthEventMetricThe count of health events pertaining to restore job health

Dashboards:

Click a tab to view the dashboard template

{
  "__inputs": [],
  "__elements": {},
  "__requires": [
    {
      "type": "panel",
      "id": "bargauge",
      "name": "Bar gauge",
      "version": ""
    },
    {
      "type": "grafana",
      "id": "grafana",
      "name": "Grafana",
      "version": "9.5.12"
    },
    {
      "type": "datasource",
      "id": "grafana-azure-monitor-datasource",
      "name": "Azure Monitor",
      "version": "1.0.0"
    }
  ],
  "title": "Vaults",
  "editable": true,
  "links": [],
  "liveNow": false,
  "panels": [
    {
      "title": "BackupHealthEvent",
      "datasource": {
        "type": "grafana-azure-monitor-datasource",
        "uid": "${ds}"
      },
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "thresholds"
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "#808080",
                "value": null
              },
              {
                
                "color": "dark-green",                
                "value": 0
              },
              {
                
                "color": "dark-red",                
                "value": 0
              }
            ]
          }
        },
        "overrides": []
      },
      "gridPos": {
        "h": 8,
        "w": 12,
        "x": 0,
        "y": 0
      },
      "id": 1,
      "options": {
        "displayMode": "basic",
        "minVizHeight": 10,
        "minVizWidth": 0,
        "orientation": "horizontal",
        "reduceOptions": {
          "calcs": [
            "lastNotNull"
          ],
          "fields": "",
          "values": true
        },
        "showUnfilled": true,
        "valueMode": "color"
      },
      "pluginVersion": "9.5.12",
      "targets": [
        {
          "azureLogAnalytics": {

            "query": "AzureMetrics\r\n| where _ResourceId has 'Microsoft.RecoveryServices/vaults'\r\n| where MetricName has 'BackupHealthEvent'\r\n| summarize metric = avg(Count) by _ResourceId, Resource",
            "resources": [
              "/subscriptions/$sub"
            ]
          },
          "azureMonitor": {
            "allowedTimeGrainsMs": [],
            "timeGrain": "auto"
          },
          "datasource": {
            "type": "grafana-azure-monitor-datasource",
            "uid": "${ds}"
          },
          "queryType": "Azure Log Analytics",
          "refId": "A"
        }
      ],
      "transformations": [
        {
          "id": "organize",
          "options": {
            "excludeByName": {
              "_ResourceId": true
            },
            "indexByName": {},
            "renameByName": {}
          }
        }
      ],
      "type": "bargauge"
    },
    {
      "title": "RestoreHealthEvent",
      "datasource": {
        "type": "grafana-azure-monitor-datasource",
        "uid": "${ds}"
      },
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "thresholds"
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "#808080",
                "value": null
              },
              {
                
                "color": "dark-green",                
                "value": 0
              },
              {
                
                "color": "dark-red",                
                "value": 1
              }
            ]
          }
        },
        "overrides": []
      },
      "gridPos": {
        "h": 8,
        "w": 12,
        "x": 12,
        "y": 0
      },
      "id": 2,
      "options": {
        "displayMode": "basic",
        "minVizHeight": 10,
        "minVizWidth": 0,
        "orientation": "horizontal",
        "reduceOptions": {
          "calcs": [
            "lastNotNull"
          ],
          "fields": "",
          "values": true
        },
        "showUnfilled": true,
        "valueMode": "color"
      },
      "pluginVersion": "9.5.12",
      "targets": [
        {
          "azureLogAnalytics": {

            "query": "AzureMetrics\r\n| where _ResourceId has 'Microsoft.RecoveryServices/vaults'\r\n| where MetricName has 'RestoreHealthEvent'\r\n| summarize metric = avg(Count) by _ResourceId, Resource",
            "resources": [
              "/subscriptions/$sub"
            ]
          },
          "azureMonitor": {
            "allowedTimeGrainsMs": [],
            "timeGrain": "auto"
          },
          "datasource": {
            "type": "grafana-azure-monitor-datasource",
            "uid": "${ds}"
          },
          "queryType": "Azure Log Analytics",
          "refId": "A"
        }
      ],
      "transformations": [
        {
          "id": "organize",
          "options": {
            "excludeByName": {
              "_ResourceId": true
            },
            "indexByName": {},
            "renameByName": {}
          }
        }
      ],
      "type": "bargauge"
    }
  ],
  "refresh": "",
  "schemaVersion": 38,
  "style": "dark",
  "tags": [],
  "templating": {
    "list": [
      {
        "current": {},
        "hide": 0,
        "includeAll": false,
        "label": "Datasource",
        "multi": false,
        "name": "ds",
        "options": [],
        "query": "grafana-azure-monitor-datasource",
        "queryValue": "",
        "refresh": 1,
        "regex": "",
        "skipUrlSync": false,
        "type": "datasource"
      },
      {
        "current": {},
        "datasource": {
          "type": "grafana-azure-monitor-datasource",
          "uid": "${ds}"
        },
        "definition": "",
        "hide": 0,
        "includeAll": false,
        "label": "Subscription",
        "multi": false,
        "name": "sub",
        "options": [],
        "query": {
          "azureLogAnalytics": {
            "query": "",
            "resources": []
          },
          "queryType": "Azure Subscriptions",
          "refId": "A"
        },
        "refresh": 1,
        "regex": "",
        "skipUrlSync": false,
        "sort": 0,
        "type": "query"
      }
    ]
  },
  "time": {
    "from": "now-6h",
    "to": "now"
  },
  "timepicker": {},
  "timezone": "",
  "version": null
}



Backup Health Monitoring - Unknown Alert

Deploy RecoveryVault BackupHealthMonitor Alert

Properties:

categorySite Recovery
operationNameMicrosoft.RecoveryServices/vaults/monitoringSettings.azureMonitorAlertSettings.alertsForAllJobFailures"
status
[
  "Enabled"
]

References:

Templates:

{
  "type": "Microsoft.Authorization/policyDefinitions",
  "apiVersion": "2021-06-01",
  "name": "Deploy_RecoveryVault_BackupHealthMonitor_Alert",
  "properties": {
    "policyType": "Custom",
    "mode": "All",
    "displayName": "Deploy RV Backup Health Monitoring Alerts",
    "description": "Policy to audit/update Recovery Vault Backup Health Alerting to Azure monitor alerts",
    "metadata": {
      "version": "1.0.0",
      "category": "Site Recovery",
      "source": "https://github.com/Azure/azure-monitor-baseline-alerts/",
      "alzCloudEnvironments": [
        "AzureCloud"
      ],
      "_deployed_by_amba": "True"
    },
    "parameters": {
      "effect": {
        "type": "String",
        "metadata": {
          "displayName": "Effect",
          "description": "Effect of the policy"
        },
        "allowedValues": [
          "modify",
          "audit",
          "disabled"
        ],
        "defaultValue": "modify"
      },
      "MonitorDisable": {
        "type": "String",
        "metadata": {
          "displayName": "Effect",
          "description": "Tag name to disable monitoring resource. Set to true if monitoring should be disabled"
        },
        "defaultValue": "MonitorDisable"
      }
    },
    "policyRule": {
      "if": {
        "allOf": [
          {
            "field": "type",
            "equals": "Microsoft.RecoveryServices/Vaults"
          },
          {
            "field": "[[concat('tags[', parameters('MonitorDisable'), ']')]",
            "notEquals": "true"
          },
          {
            "field": "Microsoft.RecoveryServices/vaults/monitoringSettings.azureMonitorAlertSettings.alertsForAllJobFailures",
            "notEquals": "Enabled"
          }
        ]
      },
      "then": {
        "effect": "[[parameters('effect')]",
        "details": {
          "roleDefinitionIds": [
            "/providers/Microsoft.Authorization/roleDefinitions/b24988ac-6180-42a0-ab88-20f7382dd24c"
          ],
          "conflictEffect": "audit",
          "operations": [
            {
              "operation": "addOrReplace",
              "field": "Microsoft.RecoveryServices/vaults/monitoringSettings.classicAlertSettings.alertsForCriticalOperations",
              "value": "Disabled"
            },
            {
              "operation": "addOrReplace",
              "field": "Microsoft.RecoveryServices/vaults/monitoringSettings.azureMonitorAlertSettings.alertsForAllJobFailures",
              "value": "Enabled"
            }
          ]
        }
      }
    }
  }
}



BackupHealthEvent - Metric Alert

The count of health events pertaining to backup job health

Properties:

criterionTypeStaticThresholdCriterion
evaluationFrequencyPT1H
metricNameBackupHealthEvent
metricNamespaceMicrosoft.RecoveryServices/vaults
operatorGreaterThan
severity3
threshold0
timeAggregationCount
windowSizeP1D

References:

Templates:

{
  "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#",
  "contentVersion": "1.0.0.0",
  "parameters": {
    "alertName": {
      "type": "string",
      "minLength": 1,
      "metadata": {
        "description": "Name of the alert"
      }
    },
    "alertDescription": {
      "type": "string",
      "defaultValue": "The count of health events pertaining to backup job health",
      "metadata": {
        "description": "Description of alert"
      }
    },
    "targetResourceId": {
      "type": "string",
      "minLength": 1,
      "metadata": {
        "description": "List of Azure resource Ids seperated by a comma. For example - /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroup/resource-group-name/Microsoft.compute/virtualMachines/vm-name"
      }
    },
    "targetResourceRegion": {
      "type": "string",
      "metadata": {
        "description": "Azure region in which target resources to be monitored are in (without spaces). For example: EastUS"
      }
    },
    "targetResourceType": {
      "type": "string",
      "minLength": 1,
      "metadata": {
        "description": "Resource type of target resources to be monitored."
      }
    },
    "isEnabled": {
      "type": "bool",
      "defaultValue": true,
      "metadata": {
        "description": "Specifies whether the alert is enabled"
      }
    },
    "alertSeverity": {
      "type": "int",
      "defaultValue": 3,
      "allowedValues": [
        0,
        1,
        2,
        3,
        4
      ],
      "metadata": {
        "description": "Severity of alert {0,1,2,3,4}"
      }
    },
    "operator": {
      "type": "string",
      "defaultValue": "GreaterThan",
      "allowedValues": [
        "Equals",
        "GreaterThan",
        "GreaterThanOrEqual",
        "LessThan",
        "LessThanOrEqual"
      ],
      "metadata": {
        "description": "Operator comparing the current value with the threshold value."
      }
    },
    "threshold": {
      "type": "string",
      "defaultValue": "0",
      "metadata": {
        "description": "The threshold value at which the alert is activated."
      }
    },
    "timeAggregation": {
      "type": "string",
      "defaultValue": "Count",
      "allowedValues": [
        "Average",
        "Minimum",
        "Maximum",
        "Total",
        "Count"
      ],
      "metadata": {
        "description": "How the data that is collected should be combined over time."
      }
    },
    "windowSize": {
      "type": "string",
      "defaultValue": "P1D",
      "allowedValues": [
        "PT1M",
        "PT5M",
        "PT15M",
        "PT30M",
        "PT1H",
        "PT6H",
        "PT12H",
        "PT24H",
        "PT1D"
      ],
      "metadata": {
        "description": "Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format."
      }
    },
    "evaluationFrequency": {
      "type": "string",
      "defaultValue": "PT1H",
      "allowedValues": [
        "PT1M",
        "PT5M",
        "PT15M",
        "PT30M",
        "PT1H"
      ],
      "metadata": {
        "description": "how often the metric alert is evaluated represented in ISO 8601 duration format"
      }
    },
    "currentDateTimeUtcNow": {
      "type": "string",
      "defaultValue": "[utcNow()]",
      "metadata": {
          "description": "The current date and time using the utcNow function. Used for deployment name uniqueness"
      }
    },
    "telemetryOptOut": {
      "type": "string",
      "defaultValue": "No",
      "allowedValues": [
          "Yes",
          "No"
      ],
      "metadata": {
          "description": "The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry."
      }
    }
  },
  "variables": {
    "pidDeploymentName": "[take(concat('pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-', uniqueString(resourceGroup().id, parameters('alertName'), parameters('currentDateTimeUtcNow'))), 64)]",
    "varTargetResourceId": "[split(parameters('targetResourceId'), ',')]"
  },
  "resources": [
    {
      "type": "Microsoft.Insights/metricAlerts",
      "apiVersion": "2018-03-01",
      "name": "[parameters('alertName')]",
      "location": "global",
      "tags": {
        "_deployed_by_amba": true
      },
      "properties": {
        "description": "[parameters('alertDescription')]",
        "scopes": "[variables('varTargetResourceId')]",
        "targetResourceType": "[parameters('targetResourceType')]",
        "targetResourceRegion": "[parameters('targetResourceRegion')]",
        "severity": "[parameters('alertSeverity')]",
        "enabled": "[parameters('isEnabled')]",
        "evaluationFrequency": "[parameters('evaluationFrequency')]",
        "windowSize": "[parameters('windowSize')]",
        "criteria": {
          "odata.type": "Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria",
          "allOf": [
            {
              "name": "1st criterion",
              "metricName": "BackupHealthEvent",
              "dimensions": [],
              "operator": "[parameters('operator')]",
              "threshold": "[parameters('threshold')]",
              "timeAggregation": "[parameters('timeAggregation')]",
              "criterionType": "StaticThresholdCriterion"
            }
          ]
        }
      }
    },
    {
      "condition": "[equals(parameters('telemetryOptOut'), 'No')]",
      "apiVersion": "2023-07-01",
      "name": "[variables('pidDeploymentName')]",
      "type": "Microsoft.Resources/deployments",
      "properties": {
          "mode": "Incremental",
          "template": {
              "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#",
              "contentVersion": "1.0.0.0",
              "resources": []
          }
      }
    }
  ]
}
@description('Name of the alert')
@minLength(1)
param alertName string

@description('Description of alert')
param alertDescription string = 'The count of health events pertaining to backup job health'

@description('Array of Azure resource Ids. For example - /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroup/resource-group-name/Microsoft.compute/virtualMachines/vm-name')
@minLength(1)
param targetResourceId array

@description('Azure region in which target resources to be monitored are in (without spaces). For example: EastUS')
param targetResourceRegion string

@description('Resource type of target resources to be monitored.')
@minLength(1)
param targetResourceType string

@description('Specifies whether the alert is enabled')
param isEnabled bool = true

@description('Severity of alert {0,1,2,3,4}')
@allowed([
  0
  1
  2
  3
  4
])
param alertSeverity int = 3

@description('Operator comparing the current value with the threshold value.')
@allowed([
  'Equals'
  'GreaterThan'
  'GreaterThanOrEqual'
  'LessThan'
  'LessThanOrEqual'
])
param operator string = 'GreaterThan'

@description('The threshold value at which the alert is activated.')
param threshold int = 0

@description('How the data that is collected should be combined over time.')
@allowed([
  'Average'
  'Minimum'
  'Maximum'
  'Total'
  'Count'
])
param timeAggregation string = 'Count'

@description('Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format.')
@allowed([
  'PT1M'
  'PT5M'
  'PT15M'
  'PT30M'
  'PT1H'
  'PT6H'
  'PT12H'
  'PT24H'
  'P1D'
])
param windowSize string = 'P1D'

@description('how often the metric alert is evaluated represented in ISO 8601 duration format')
@allowed([
  'PT1M'
  'PT5M'
  'PT15M'
  'PT30M'
  'PT1H'
])
param evaluationFrequency string = 'PT1H'

@description('"The current date and time using the utcNow function. Used for deployment name uniqueness')
param currentDateTimeUtcNow string = utcNow()

@description('The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry.')
@allowed([
  'Yes'
  'No'
])
param telemetryOptOut string = 'No'

resource metricAlert 'Microsoft.Insights/metricAlerts@2018-03-01' = {
  name: alertName
  location: 'global'
  tags: {
    _deployed_by_amba: 'true'
  }
  properties: {
    description: alertDescription
    scopes: targetResourceId
    targetResourceType: targetResourceType
    targetResourceRegion: targetResourceRegion
    severity: alertSeverity
    enabled: isEnabled
    evaluationFrequency: evaluationFrequency
    windowSize: windowSize
    criteria: {
      'odata.type': 'Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria'
      allOf: [
        {
          name: '1st criterion'
          metricName: 'BackupHealthEvent'
          dimensions: [[]]
          operator: operator
          threshold: threshold
          timeAggregation: timeAggregation
          criterionType: 'StaticThresholdCriterion'
        }
      ]
    }
  }
}

var ambaTelemetryPidName = 'pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-${uniqueString(resourceGroup().id, alertName, currentDateTimeUtcNow)}'
resource ambaTelemetryPid 'Microsoft.Resources/deployments@2023-07-01' =  if (telemetryOptOut == 'No') {
  name: ambaTelemetryPidName
  tags: {
    _deployed_by_amba: 'true'
  }
  properties: {
    mode: 'Incremental'
    template: {
      '$schema': 'https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#'
      contentVersion: '1.0.0.0'
      resources: []
    }
  }
}
{
  "type": "Microsoft.Authorization/policyDefinitions",
  "apiVersion": "2021-06-01",
  "name": "8f9b2819-9c51-4f30-91fd-195b7bc5d7a5",
  "properties": {
    "policyType": "Custom",
    "mode": "All",
    "displayName": "Deploy RecoveryServices vaults BackupHealthEvent Alert",
    "description": "Policy to Audit/Deploy RecoveryServices vaults BackupHealthEvent Alert",
    "metadata": {
      "version": "1.0.0-preview",
      "category": "RecoveryServices",
      "preview": true,
      "source": "https://github.com/Azure/azure-monitor-baseline-alerts/",
      "alzCloudEnvironments": [
        "AzureCloud"
      ],
      "_deployed_by_amba": "True"
    },
    "parameters": {
      "severity": {
        "type": "String",
        "metadata": {
          "displayName": "Severity",
          "description": "Severity of the Alert"
        },
        "allowedValues": [
          "0",
          "1",
          "2",
          "3",
          "4"
        ],
        "defaultValue": "3"
      },
      "windowSize": {
        "type": "String",
        "metadata": {
          "displayName": "Window Size",
          "description": "Window size for the alert"
        },
        "allowedValues": [
          "PT1M",
          "PT5M",
          "PT15M",
          "PT30M",
          "PT1H",
          "PT6H",
          "PT12H",
          "P1D"
        ],
        "defaultValue": "P1D"
      },
      "evaluationFrequency": {
        "type": "String",
        "metadata": {
          "displayName": "Evaluation Frequency",
          "description": "Evaluation frequency for the alert"
        },
        "allowedValues": [
          "PT1M",
          "PT5M",
          "PT15M",
          "PT30M",
          "PT1H"
        ],
        "defaultValue": "PT1H"
      },
      "autoMitigate": {
        "type": "String",
        "metadata": {
          "displayName": "Auto Mitigate",
          "description": "Auto Mitigate for the alert"
        },
        "allowedValues": [
          "true",
          "false"
        ],
        "defaultValue": "true"
      },
      "enabled": {
        "type": "String",
        "metadata": {
          "displayName": "Alert State",
          "description": "Alert state for the alert"
        },
        "allowedValues": [
          "true",
          "false"
        ],
        "defaultValue": "true"
      },
      "threshold": {
        "type": "String",
        "metadata": {
          "displayName": "Threshold",
          "description": "Threshold for the alert"
        },
        "defaultValue": "0"
      },
      "effect": {
        "type": "String",
        "metadata": {
          "displayName": "Effect",
          "description": "Effect of the policy"
        },
        "allowedValues": [
          "deployIfNotExists",
          "disabled"
        ],
        "defaultValue": "deployIfNotExists"
      },
      "MonitorDisableTagName": {
        "type": "String",
        "metadata": {
          "displayName": "Monitoring disabled tag name",
          "description": "Tag name used to disable monitoring at the resource level. Set to true if monitoring should be disabled."
        },
        "defaultValue": "MonitorDisable"
      },
      "MonitorDisableTagValues": {
        "type": "Array",
        "metadata": {
          "displayName": "Monitoring disabled tag values(s)",
          "description": "Tag value(s) used to disable monitoring at the resource level. Set to true if monitoring should be disabled."
        },
        "defaultValue": [
          "true",
          "Test",
          "Dev",
          "Sandbox"
        ]
      }
    },
    "policyRule": {
      "if": {
        "allOf": [
          {
            "field": "type",
            "equals": "Microsoft.RecoveryServices/vaults"
          },
          {
            "field": "[[concat('tags[', parameters('MonitorDisableTagName'), ']')]",
            "notIn": "[[parameters('MonitorDisableTagValues')]"
          }
        ]
      },
      "then": {
        "effect": "[[parameters('effect')]",
        "details": {
          "roleDefinitionIds": [
            "/providers/Microsoft.Authorization/roleDefinitions/b24988ac-6180-42a0-ab88-20f7382dd24c"
          ],
          "type": "Microsoft.Insights/metricAlerts",
          "existenceCondition": {
            "allOf": [
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria.allOf[*].metricNamespace",
                "equals": "Microsoft.RecoveryServices/vaults"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria.allOf[*].metricName",
                "equals": "BackupHealthEvent"
              },
              {
                "field": "Microsoft.Insights/metricalerts/scopes[*]",
                "equals": "[[concat(subscription().id, '/resourceGroups/', resourceGroup().name, '/providers/Microsoft.RecoveryServices/vaults/', field('fullName'))]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/enabled",
                "equals": "[[parameters('enabled')]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/evaluationFrequency",
                "equals": "[[parameters('evaluationFrequency')]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/windowSize",
                "equals": "[[parameters('windowSize')]"
              },
              {
                "field": "Microsoft.Insights/metricalerts/severity",
                "equals": "[[parameters('severity')]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/autoMitigate",
                "equals": "[[parameters('autoMitigate')]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft-Azure-Monitor-SingleResourceMultipleMetricCriteria.allOf[*].timeAggregation",
                "equals": "Count"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria.allOf[*].StaticThresholdCriterion.operator",
                "equals": "GreaterThan"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria.allOf[*].StaticThresholdCriterion.threshold",
                "equals": "[[if(contains(field('tags'), '_amba-BackupHealthEvent-threshold-Override_'), field('tags._amba-BackupHealthEvent-threshold-Override_'), parameters('threshold'))]"
              }
            ]
          },
          "deployment": {
            "properties": {
              "mode": "incremental",
              "template": {
                "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#",
                "contentVersion": "1.0.0.0",
                "parameters": {
                  "resourceName": {
                    "type": "String",
                    "metadata": {
                      "displayName": "resourceName",
                      "description": "Name of the resource"
                    }
                  },
                  "resourceId": {
                    "type": "String",
                    "metadata": {
                      "displayName": "resourceId",
                      "description": "Resource ID of the resource emitting the metric that will be used for the comparison"
                    }
                  },
                  "severity": {
                    "type": "String"
                  },
                  "windowSize": {
                    "type": "String"
                  },
                  "evaluationFrequency": {
                    "type": "String"
                  },
                  "autoMitigate": {
                    "type": "String"
                  },
                  "enabled": {
                    "type": "String"
                  },
                  "threshold": {
                    "type": "String"
                  }
                },
                "variables": {},
                "resources": [
                  {
                    "type": "Microsoft.Insights/metricAlerts",
                    "apiVersion": "2018-03-01",
                    "name": "[[concat(parameters('resourceName'), '-BackupHealthEvent')]",
                    "location": "global",
                    "tags": {
                      "_deployed_by_amba": true
                    },
                    "properties": {
                      "description": "Metric Alert for RecoveryServices vaults BackupHealthEvent",
                      "severity": "[[parameters('severity')]",
                      "enabled": "[[parameters('enabled')]",
                      "scopes": [
                        "[[parameters('resourceId')]"
                      ],
                      "evaluationFrequency": "[[parameters('evaluationFrequency')]",
                      "windowSize": "[[parameters('windowSize')]",
                      "criteria": {
                        "allOf": [
                          {
                            "name": "BackupHealthEvent",
                            "metricNamespace": "Microsoft.RecoveryServices/vaults",
                            "metricName": "BackupHealthEvent",
                            "operator": "GreaterThan",
                            "threshold": "[[parameters('threshold')]",
                            "timeAggregation": "Count",
                            "criterionType": "StaticThresholdCriterion"
                          }
                        ],
                        "odata.type": "Microsoft.Azure.Monitor.SingleResourceMultipleMetricCriteria"
                      },
                      "autoMitigate": "[[parameters('autoMitigate')]",
                      "parameters": {
                        "severity": {
                          "value": "[[parameters('severity')]"
                        },
                        "windowSize": {
                          "value": "[[parameters('windowSize')]"
                        },
                        "evaluationFrequency": {
                          "value": "[[parameters('evaluationFrequency')]"
                        },
                        "autoMitigate": {
                          "value": "[[parameters('autoMitigate')]"
                        },
                        "enabled": {
                          "value": "[[parameters('enabled')]"
                        },
                        "threshold": {
                          "value": "[[parameters('threshold')]"
                        }
                      }
                    }
                  }
                ]
              },
              "parameters": {
                "resourceName": {
                  "value": "[[field('name')]"
                },
                "resourceId": {
                  "value": "[[field('id')]"
                },
                "severity": {
                  "value": "[[parameters('severity')]"
                },
                "windowSize": {
                  "value": "[[parameters('windowSize')]"
                },
                "evaluationFrequency": {
                  "value": "[[parameters('evaluationFrequency')]"
                },
                "autoMitigate": {
                  "value": "[[parameters('autoMitigate')]"
                },
                "enabled": {
                  "value": "[[parameters('enabled')]"
                },
                "threshold": {
                  "value": "[[if(contains(field('tags'), '_amba-BackupHealthEvent-threshold-Override_'), field('tags._amba-BackupHealthEvent-threshold-Override_'), parameters('threshold'))]"
                }
              }
            }
          }
        }
      }
    }
  }
}



RestoreHealthEvent - Metric Alert

The count of health events pertaining to restore job health

Properties:

criterionTypeStaticThresholdCriterion
dimensions
[
  {
    "name": "healthstatus",
    "operator": "exclude",
    "values": [
      "healthy"
    ]
  }
]
evaluationFrequencyPT15M
metricNameRestoreHealthEvent
metricNamespaceMicrosoft.RecoveryServices/vaults
operatorGreaterThanOrEqual
severity2
threshold1
timeAggregationCount
windowSizePT15M

References:

Templates:

{
  "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#",
  "contentVersion": "1.0.0.0",
  "parameters": {
    "alertName": {
      "type": "string",
      "minLength": 1,
      "metadata": {
        "description": "Name of the alert"
      }
    },
    "alertDescription": {
      "type": "string",
      "defaultValue": "The count of health events pertaining to restore job health",
      "metadata": {
        "description": "Description of alert"
      }
    },
    "targetResourceId": {
      "type": "string",
      "minLength": 1,
      "metadata": {
        "description": "List of Azure resource Ids seperated by a comma. For example - /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroup/resource-group-name/Microsoft.compute/virtualMachines/vm-name"
      }
    },
    "targetResourceRegion": {
      "type": "string",
      "metadata": {
        "description": "Azure region in which target resources to be monitored are in (without spaces). For example: EastUS"
      }
    },
    "targetResourceType": {
      "type": "string",
      "minLength": 1,
      "metadata": {
        "description": "Resource type of target resources to be monitored."
      }
    },
    "isEnabled": {
      "type": "bool",
      "defaultValue": true,
      "metadata": {
        "description": "Specifies whether the alert is enabled"
      }
    },
    "alertSeverity": {
      "type": "int",
      "defaultValue": 2,
      "allowedValues": [
        0,
        1,
        2,
        3,
        4
      ],
      "metadata": {
        "description": "Severity of alert {0,1,2,3,4}"
      }
    },
    "operator": {
      "type": "string",
      "defaultValue": "GreaterThanOrEqual",
      "allowedValues": [
        "Equals",
        "GreaterThan",
        "GreaterThanOrEqual",
        "LessThan",
        "LessThanOrEqual"
      ],
      "metadata": {
        "description": "Operator comparing the current value with the threshold value."
      }
    },
    "threshold": {
      "type": "string",
      "defaultValue": "1",
      "metadata": {
        "description": "The threshold value at which the alert is activated."
      }
    },
    "timeAggregation": {
      "type": "string",
      "defaultValue": "Count",
      "allowedValues": [
        "Average",
        "Minimum",
        "Maximum",
        "Total",
        "Count"
      ],
      "metadata": {
        "description": "How the data that is collected should be combined over time."
      }
    },
    "windowSize": {
      "type": "string",
      "defaultValue": "PT15M",
      "allowedValues": [
        "PT1M",
        "PT5M",
        "PT15M",
        "PT30M",
        "PT1H",
        "PT6H",
        "PT12H",
        "PT24H",
        "PT1D"
      ],
      "metadata": {
        "description": "Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format."
      }
    },
    "evaluationFrequency": {
      "type": "string",
      "defaultValue": "PT15M",
      "allowedValues": [
        "PT1M",
        "PT5M",
        "PT15M",
        "PT30M",
        "PT1H"
      ],
      "metadata": {
        "description": "how often the metric alert is evaluated represented in ISO 8601 duration format"
      }
    },
    "currentDateTimeUtcNow": {
      "type": "string",
      "defaultValue": "[utcNow()]",
      "metadata": {
          "description": "The current date and time using the utcNow function. Used for deployment name uniqueness"
      }
    },
    "telemetryOptOut": {
      "type": "string",
      "defaultValue": "No",
      "allowedValues": [
          "Yes",
          "No"
      ],
      "metadata": {
          "description": "The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry."
      }
    }
  },
  "variables": {
    "pidDeploymentName": "[take(concat('pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-', uniqueString(resourceGroup().id, parameters('alertName'), parameters('currentDateTimeUtcNow'))), 64)]",
    "varTargetResourceId": "[split(parameters('targetResourceId'), ',')]"
  },
  "resources": [
    {
      "type": "Microsoft.Insights/metricAlerts",
      "apiVersion": "2018-03-01",
      "name": "[parameters('alertName')]",
      "location": "global",
      "tags": {
        "_deployed_by_amba": true
      },
      "properties": {
        "description": "[parameters('alertDescription')]",
        "scopes": "[variables('varTargetResourceId')]",
        "targetResourceType": "[parameters('targetResourceType')]",
        "targetResourceRegion": "[parameters('targetResourceRegion')]",
        "severity": "[parameters('alertSeverity')]",
        "enabled": "[parameters('isEnabled')]",
        "evaluationFrequency": "[parameters('evaluationFrequency')]",
        "windowSize": "[parameters('windowSize')]",
        "criteria": {
          "odata.type": "Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria",
          "allOf": [
            {
              "name": "1st criterion",
              "metricName": "RestoreHealthEvent",
              "dimensions": [{"operator": "exclude", "name": "healthstatus", "values": ["healthy"]}],
              "operator": "[parameters('operator')]",
              "threshold": "[parameters('threshold')]",
              "timeAggregation": "[parameters('timeAggregation')]",
              "criterionType": "StaticThresholdCriterion"
            }
          ]
        }
      }
    },
    {
      "condition": "[equals(parameters('telemetryOptOut'), 'No')]",
      "apiVersion": "2023-07-01",
      "name": "[variables('pidDeploymentName')]",
      "type": "Microsoft.Resources/deployments",
      "properties": {
          "mode": "Incremental",
          "template": {
              "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#",
              "contentVersion": "1.0.0.0",
              "resources": []
          }
      }
    }
  ]
}
@description('Name of the alert')
@minLength(1)
param alertName string

@description('Description of alert')
param alertDescription string = 'The count of health events pertaining to restore job health'

@description('Array of Azure resource Ids. For example - /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroup/resource-group-name/Microsoft.compute/virtualMachines/vm-name')
@minLength(1)
param targetResourceId array

@description('Azure region in which target resources to be monitored are in (without spaces). For example: EastUS')
param targetResourceRegion string

@description('Resource type of target resources to be monitored.')
@minLength(1)
param targetResourceType string

@description('Specifies whether the alert is enabled')
param isEnabled bool = true

@description('Severity of alert {0,1,2,3,4}')
@allowed([
  0
  1
  2
  3
  4
])
param alertSeverity int = 2

@description('Operator comparing the current value with the threshold value.')
@allowed([
  'Equals'
  'GreaterThan'
  'GreaterThanOrEqual'
  'LessThan'
  'LessThanOrEqual'
])
param operator string = 'GreaterThanOrEqual'

@description('The threshold value at which the alert is activated.')
param threshold int = 1

@description('How the data that is collected should be combined over time.')
@allowed([
  'Average'
  'Minimum'
  'Maximum'
  'Total'
  'Count'
])
param timeAggregation string = 'Count'

@description('Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format.')
@allowed([
  'PT1M'
  'PT5M'
  'PT15M'
  'PT30M'
  'PT1H'
  'PT6H'
  'PT12H'
  'PT24H'
  'P1D'
])
param windowSize string = 'PT15M'

@description('how often the metric alert is evaluated represented in ISO 8601 duration format')
@allowed([
  'PT1M'
  'PT5M'
  'PT15M'
  'PT30M'
  'PT1H'
])
param evaluationFrequency string = 'PT15M'

@description('"The current date and time using the utcNow function. Used for deployment name uniqueness')
param currentDateTimeUtcNow string = utcNow()

@description('The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry.')
@allowed([
  'Yes'
  'No'
])
param telemetryOptOut string = 'No'

resource metricAlert 'Microsoft.Insights/metricAlerts@2018-03-01' = {
  name: alertName
  location: 'global'
  tags: {
    _deployed_by_amba: 'true'
  }
  properties: {
    description: alertDescription
    scopes: targetResourceId
    targetResourceType: targetResourceType
    targetResourceRegion: targetResourceRegion
    severity: alertSeverity
    enabled: isEnabled
    evaluationFrequency: evaluationFrequency
    windowSize: windowSize
    criteria: {
      'odata.type': 'Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria'
      allOf: [
        {
          name: '1st criterion'
          metricName: 'RestoreHealthEvent'
          dimensions: [
            {
              name: 'healthstatus'
              operator: 'exclude'
              values: ['healthy']
            }]
          operator: operator
          threshold: threshold
          timeAggregation: timeAggregation
          criterionType: 'StaticThresholdCriterion'
        }
      ]
    }
  }
}

var ambaTelemetryPidName = 'pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-${uniqueString(resourceGroup().id, alertName, currentDateTimeUtcNow)}'
resource ambaTelemetryPid 'Microsoft.Resources/deployments@2023-07-01' =  if (telemetryOptOut == 'No') {
  name: ambaTelemetryPidName
  tags: {
    _deployed_by_amba: 'true'
  }
  properties: {
    mode: 'Incremental'
    template: {
      '$schema': 'https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#'
      contentVersion: '1.0.0.0'
      resources: []
    }
  }
}
{
  "type": "Microsoft.Authorization/policyDefinitions",
  "apiVersion": "2021-06-01",
  "name": "805aa9c8-4d52-411d-9811-cc02b33952e3",
  "properties": {
    "policyType": "Custom",
    "mode": "All",
    "displayName": "Deploy RecoveryServices vaults RestoreHealthEvent Alert",
    "description": "Policy to Audit/Deploy RecoveryServices vaults RestoreHealthEvent Alert",
    "metadata": {
      "version": "1.0.0-preview",
      "category": "RecoveryServices",
      "preview": true,
      "source": "https://github.com/Azure/azure-monitor-baseline-alerts/",
      "alzCloudEnvironments": [
        "AzureCloud"
      ],
      "_deployed_by_amba": "True"
    },
    "parameters": {
      "severity": {
        "type": "String",
        "metadata": {
          "displayName": "Severity",
          "description": "Severity of the Alert"
        },
        "allowedValues": [
          "0",
          "1",
          "2",
          "3",
          "4"
        ],
        "defaultValue": "2"
      },
      "windowSize": {
        "type": "String",
        "metadata": {
          "displayName": "Window Size",
          "description": "Window size for the alert"
        },
        "allowedValues": [
          "PT1M",
          "PT5M",
          "PT15M",
          "PT30M",
          "PT1H",
          "PT6H",
          "PT12H",
          "P1D"
        ],
        "defaultValue": "PT15M"
      },
      "evaluationFrequency": {
        "type": "String",
        "metadata": {
          "displayName": "Evaluation Frequency",
          "description": "Evaluation frequency for the alert"
        },
        "allowedValues": [
          "PT1M",
          "PT5M",
          "PT15M",
          "PT30M",
          "PT1H"
        ],
        "defaultValue": "PT15M"
      },
      "autoMitigate": {
        "type": "String",
        "metadata": {
          "displayName": "Auto Mitigate",
          "description": "Auto Mitigate for the alert"
        },
        "allowedValues": [
          "true",
          "false"
        ],
        "defaultValue": "true"
      },
      "enabled": {
        "type": "String",
        "metadata": {
          "displayName": "Alert State",
          "description": "Alert state for the alert"
        },
        "allowedValues": [
          "true",
          "false"
        ],
        "defaultValue": "true"
      },
      "threshold": {
        "type": "String",
        "metadata": {
          "displayName": "Threshold",
          "description": "Threshold for the alert"
        },
        "defaultValue": "1"
      },
      "effect": {
        "type": "String",
        "metadata": {
          "displayName": "Effect",
          "description": "Effect of the policy"
        },
        "allowedValues": [
          "deployIfNotExists",
          "disabled"
        ],
        "defaultValue": "deployIfNotExists"
      },
      "MonitorDisableTagName": {
        "type": "String",
        "metadata": {
          "displayName": "Monitoring disabled tag name",
          "description": "Tag name used to disable monitoring at the resource level. Set to true if monitoring should be disabled."
        },
        "defaultValue": "MonitorDisable"
      },
      "MonitorDisableTagValues": {
        "type": "Array",
        "metadata": {
          "displayName": "Monitoring disabled tag values(s)",
          "description": "Tag value(s) used to disable monitoring at the resource level. Set to true if monitoring should be disabled."
        },
        "defaultValue": [
          "true",
          "Test",
          "Dev",
          "Sandbox"
        ]
      }
    },
    "policyRule": {
      "if": {
        "allOf": [
          {
            "field": "type",
            "equals": "Microsoft.RecoveryServices/vaults"
          },
          {
            "field": "[[concat('tags[', parameters('MonitorDisableTagName'), ']')]",
            "notIn": "[[parameters('MonitorDisableTagValues')]"
          }
        ]
      },
      "then": {
        "effect": "[[parameters('effect')]",
        "details": {
          "roleDefinitionIds": [
            "/providers/Microsoft.Authorization/roleDefinitions/b24988ac-6180-42a0-ab88-20f7382dd24c"
          ],
          "type": "Microsoft.Insights/metricAlerts",
          "existenceCondition": {
            "allOf": [
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria.allOf[*].metricNamespace",
                "equals": "Microsoft.RecoveryServices/vaults"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria.allOf[*].metricName",
                "equals": "RestoreHealthEvent"
              },
              {
                "field": "Microsoft.Insights/metricalerts/scopes[*]",
                "equals": "[[concat(subscription().id, '/resourceGroups/', resourceGroup().name, '/providers/Microsoft.RecoveryServices/vaults/', field('fullName'))]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/enabled",
                "equals": "[[parameters('enabled')]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/evaluationFrequency",
                "equals": "[[parameters('evaluationFrequency')]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/windowSize",
                "equals": "[[parameters('windowSize')]"
              },
              {
                "field": "Microsoft.Insights/metricalerts/severity",
                "equals": "[[parameters('severity')]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/autoMitigate",
                "equals": "[[parameters('autoMitigate')]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft-Azure-Monitor-SingleResourceMultipleMetricCriteria.allOf[*].timeAggregation",
                "equals": "Count"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria.allOf[*].StaticThresholdCriterion.operator",
                "equals": "GreaterThanOrEqual"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria.allOf[*].StaticThresholdCriterion.threshold",
                "equals": "[[if(contains(field('tags'), '_amba-RestoreHealthEvent-threshold-Override_'), field('tags._amba-RestoreHealthEvent-threshold-Override_'), parameters('threshold'))]"
              }
            ]
          },
          "deployment": {
            "properties": {
              "mode": "incremental",
              "template": {
                "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#",
                "contentVersion": "1.0.0.0",
                "parameters": {
                  "resourceName": {
                    "type": "String",
                    "metadata": {
                      "displayName": "resourceName",
                      "description": "Name of the resource"
                    }
                  },
                  "resourceId": {
                    "type": "String",
                    "metadata": {
                      "displayName": "resourceId",
                      "description": "Resource ID of the resource emitting the metric that will be used for the comparison"
                    }
                  },
                  "severity": {
                    "type": "String"
                  },
                  "windowSize": {
                    "type": "String"
                  },
                  "evaluationFrequency": {
                    "type": "String"
                  },
                  "autoMitigate": {
                    "type": "String"
                  },
                  "enabled": {
                    "type": "String"
                  },
                  "threshold": {
                    "type": "String"
                  }
                },
                "variables": {},
                "resources": [
                  {
                    "type": "Microsoft.Insights/metricAlerts",
                    "apiVersion": "2018-03-01",
                    "name": "[[concat(parameters('resourceName'), '-RestoreHealthEvent')]",
                    "location": "global",
                    "tags": {
                      "_deployed_by_amba": true
                    },
                    "properties": {
                      "description": "Metric Alert for RecoveryServices vaults RestoreHealthEvent",
                      "severity": "[[parameters('severity')]",
                      "enabled": "[[parameters('enabled')]",
                      "scopes": [
                        "[[parameters('resourceId')]"
                      ],
                      "evaluationFrequency": "[[parameters('evaluationFrequency')]",
                      "windowSize": "[[parameters('windowSize')]",
                      "criteria": {
                        "allOf": [
                          {
                            "name": "RestoreHealthEvent",
                            "metricNamespace": "Microsoft.RecoveryServices/vaults",
                            "metricName": "RestoreHealthEvent",
                            "operator": "GreaterThanOrEqual",
                            "threshold": "[[parameters('threshold')]",
                            "timeAggregation": "Count",
                            "criterionType": "StaticThresholdCriterion"
                          }
                        ],
                        "odata.type": "Microsoft.Azure.Monitor.SingleResourceMultipleMetricCriteria"
                      },
                      "autoMitigate": "[[parameters('autoMitigate')]",
                      "parameters": {
                        "severity": {
                          "value": "[[parameters('severity')]"
                        },
                        "windowSize": {
                          "value": "[[parameters('windowSize')]"
                        },
                        "evaluationFrequency": {
                          "value": "[[parameters('evaluationFrequency')]"
                        },
                        "autoMitigate": {
                          "value": "[[parameters('autoMitigate')]"
                        },
                        "enabled": {
                          "value": "[[parameters('enabled')]"
                        },
                        "threshold": {
                          "value": "[[parameters('threshold')]"
                        }
                      }
                    }
                  }
                ]
              },
              "parameters": {
                "resourceName": {
                  "value": "[[field('name')]"
                },
                "resourceId": {
                  "value": "[[field('id')]"
                },
                "severity": {
                  "value": "[[parameters('severity')]"
                },
                "windowSize": {
                  "value": "[[parameters('windowSize')]"
                },
                "evaluationFrequency": {
                  "value": "[[parameters('evaluationFrequency')]"
                },
                "autoMitigate": {
                  "value": "[[parameters('autoMitigate')]"
                },
                "enabled": {
                  "value": "[[parameters('enabled')]"
                },
                "threshold": {
                  "value": "[[if(contains(field('tags'), '_amba-RestoreHealthEvent-threshold-Override_'), field('tags._amba-RestoreHealthEvent-threshold-Override_'), parameters('threshold'))]"
                }
              }
            }
          }
        }
      }
    }
  }
}