Azure Monitor Baseline Alerts
Download AlertsGlossaryGitHubGitHub IssuesToggle Dark/Light/Auto modeToggle Dark/Light/Auto modeToggle Dark/Light/Auto modeBack to homepage

managedClusters

NameTypeDescription
cluster_autoscaler_cluster_safe_to_autoscaleMetricDetermines whether or not cluster autoscaler will take action on the cluster
cluster_autoscaler_unschedulable_pods_countMetricNumber of pods that are currently unschedulable in the cluster
kube_node_status_allocatable_cpu_coresMetricTotal number of available cpu cores in a managed cluster
kube_node_status_allocatable_memory_bytesMetricTotal amount of available memory in a managed cluster
kube_node_status_conditionMetricStatuses for various node conditions
kube_pod_status_phaseMetricNumber of pods by phase
kube_pod_status_readyMetricNumber of pods in Ready state
node_cpu_usage_percentageMetricAggregated average CPU utilization measured in percentage across the cluster
node_disk_usage_percentageMetricDisk space used in percent by device
node_memory_rss_percentageMetricContainer RSS memory used in percent
node_memory_working_set_percentageMetricContainer working set memory used in percent

Dashboards:

Click a tab to view the dashboard template

{
  "__inputs": [],
  "__elements": {},
  "__requires": [
    {
      "type": "panel",
      "id": "bargauge",
      "name": "Bar gauge",
      "version": ""
    },
    {
      "type": "grafana",
      "id": "grafana",
      "name": "Grafana",
      "version": "9.5.12"
    },
    {
      "type": "datasource",
      "id": "grafana-azure-monitor-datasource",
      "name": "Azure Monitor",
      "version": "1.0.0"
    }
  ],
  "title": "Managed clusters",
  "editable": true,
  "links": [],
  "liveNow": false,
  "panels": [
    {
      "title": "cluster_autoscaler_cluster_safe_to_autoscale",
      "datasource": {
        "type": "grafana-azure-monitor-datasource",
        "uid": "${ds}"
      },
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "thresholds"
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "#808080",
                "value": null
              },
              {

                "color": "dark-red",                
                "value": 0
              },
              {

                "color": "dark-green",                
                "value": 1
              }
            ]
          }
        },
        "overrides": []
      },
      "gridPos": {
        "h": 8,
        "w": 12,
        "x": 0,
        "y": 0
      },
      "id": 1,
      "options": {
        "displayMode": "basic",
        "minVizHeight": 10,
        "minVizWidth": 0,
        "orientation": "horizontal",
        "reduceOptions": {
          "calcs": [
            "lastNotNull"
          ],
          "fields": "",
          "values": true
        },
        "showUnfilled": true,
        "valueMode": "color"
      },
      "pluginVersion": "9.5.12",
      "targets": [
        {
          "azureLogAnalytics": {

            "query": "AzureMetrics\r\n| where _ResourceId has 'Microsoft.ContainerService/managedClusters'\r\n| where MetricName has 'cluster_autoscaler_cluster_safe_to_autoscale'\r\n| summarize metric = avg(Average) by _ResourceId, Resource",
            "resources": [
              "/subscriptions/$sub"
            ]
          },
          "azureMonitor": {
            "allowedTimeGrainsMs": [],
            "timeGrain": "auto"
          },
          "datasource": {
            "type": "grafana-azure-monitor-datasource",
            "uid": "${ds}"
          },
          "queryType": "Azure Log Analytics",
          "refId": "A"
        }
      ],
      "transformations": [
        {
          "id": "organize",
          "options": {
            "excludeByName": {
              "_ResourceId": true
            },
            "indexByName": {},
            "renameByName": {}
          }
        }
      ],
      "type": "bargauge"
    },
    {
      "title": "cluster_autoscaler_unschedulable_pods_count",
      "datasource": {
        "type": "grafana-azure-monitor-datasource",
        "uid": "${ds}"
      },
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "thresholds"
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "#808080",
                "value": null
              },
              {
                
                "color": "dark-green",                
                "value": 0
              },
              {
                
                "color": "dark-red",                
                "value": 0
              }
            ]
          }
        },
        "overrides": []
      },
      "gridPos": {
        "h": 8,
        "w": 12,
        "x": 12,
        "y": 0
      },
      "id": 2,
      "options": {
        "displayMode": "basic",
        "minVizHeight": 10,
        "minVizWidth": 0,
        "orientation": "horizontal",
        "reduceOptions": {
          "calcs": [
            "lastNotNull"
          ],
          "fields": "",
          "values": true
        },
        "showUnfilled": true,
        "valueMode": "color"
      },
      "pluginVersion": "9.5.12",
      "targets": [
        {
          "azureLogAnalytics": {

            "query": "AzureMetrics\r\n| where _ResourceId has 'Microsoft.ContainerService/managedClusters'\r\n| where MetricName has 'cluster_autoscaler_unschedulable_pods_count'\r\n| summarize metric = avg(Average) by _ResourceId, Resource",
            "resources": [
              "/subscriptions/$sub"
            ]
          },
          "azureMonitor": {
            "allowedTimeGrainsMs": [],
            "timeGrain": "auto"
          },
          "datasource": {
            "type": "grafana-azure-monitor-datasource",
            "uid": "${ds}"
          },
          "queryType": "Azure Log Analytics",
          "refId": "A"
        }
      ],
      "transformations": [
        {
          "id": "organize",
          "options": {
            "excludeByName": {
              "_ResourceId": true
            },
            "indexByName": {},
            "renameByName": {}
          }
        }
      ],
      "type": "bargauge"
    },
    {
      "title": "kube_node_status_allocatable_cpu_cores",
      "datasource": {
        "type": "grafana-azure-monitor-datasource",
        "uid": "${ds}"
      },
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "thresholds"
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "#808080",
                "value": null
              },
              {

                "color": "dark-red",                
                "value": 0
              },
              {

                "color": "dark-green",                
                "value": 2
              }
            ]
          }
        },
        "overrides": []
      },
      "gridPos": {
        "h": 8,
        "w": 12,
        "x": 0,
        "y": 8
      },
      "id": 3,
      "options": {
        "displayMode": "basic",
        "minVizHeight": 10,
        "minVizWidth": 0,
        "orientation": "horizontal",
        "reduceOptions": {
          "calcs": [
            "lastNotNull"
          ],
          "fields": "",
          "values": true
        },
        "showUnfilled": true,
        "valueMode": "color"
      },
      "pluginVersion": "9.5.12",
      "targets": [
        {
          "azureLogAnalytics": {

            "query": "AzureMetrics\r\n| where _ResourceId has 'Microsoft.ContainerService/managedClusters'\r\n| where MetricName has 'kube_node_status_allocatable_cpu_cores'\r\n| summarize metric = avg(Average) by _ResourceId, Resource",
            "resources": [
              "/subscriptions/$sub"
            ]
          },
          "azureMonitor": {
            "allowedTimeGrainsMs": [],
            "timeGrain": "auto"
          },
          "datasource": {
            "type": "grafana-azure-monitor-datasource",
            "uid": "${ds}"
          },
          "queryType": "Azure Log Analytics",
          "refId": "A"
        }
      ],
      "transformations": [
        {
          "id": "organize",
          "options": {
            "excludeByName": {
              "_ResourceId": true
            },
            "indexByName": {},
            "renameByName": {}
          }
        }
      ],
      "type": "bargauge"
    },
    {
      "title": "kube_node_status_allocatable_memory_bytes",
      "datasource": {
        "type": "grafana-azure-monitor-datasource",
        "uid": "${ds}"
      },
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "thresholds"
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "#808080",
                "value": null
              },
              {

                "color": "dark-red",                
                "value": 0
              },
              {

                "color": "dark-green",                
                "value": 2147483648
              }
            ]
          }
        },
        "overrides": []
      },
      "gridPos": {
        "h": 8,
        "w": 12,
        "x": 12,
        "y": 8
      },
      "id": 4,
      "options": {
        "displayMode": "basic",
        "minVizHeight": 10,
        "minVizWidth": 0,
        "orientation": "horizontal",
        "reduceOptions": {
          "calcs": [
            "lastNotNull"
          ],
          "fields": "",
          "values": true
        },
        "showUnfilled": true,
        "valueMode": "color"
      },
      "pluginVersion": "9.5.12",
      "targets": [
        {
          "azureLogAnalytics": {

            "query": "AzureMetrics\r\n| where _ResourceId has 'Microsoft.ContainerService/managedClusters'\r\n| where MetricName has 'kube_node_status_allocatable_memory_bytes'\r\n| summarize metric = avg(Average) by _ResourceId, Resource",
            "resources": [
              "/subscriptions/$sub"
            ]
          },
          "azureMonitor": {
            "allowedTimeGrainsMs": [],
            "timeGrain": "auto"
          },
          "datasource": {
            "type": "grafana-azure-monitor-datasource",
            "uid": "${ds}"
          },
          "queryType": "Azure Log Analytics",
          "refId": "A"
        }
      ],
      "transformations": [
        {
          "id": "organize",
          "options": {
            "excludeByName": {
              "_ResourceId": true
            },
            "indexByName": {},
            "renameByName": {}
          }
        }
      ],
      "type": "bargauge"
    },
    {
      "title": "kube_node_status_condition",
      "datasource": {
        "type": "grafana-azure-monitor-datasource",
        "uid": "${ds}"
      },
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "thresholds"
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "#808080",
                "value": null
              },
              {
                
                "color": "dark-green",                
                "value": 0
              },
              {
                
                "color": "dark-red",                
                "value": 0
              }
            ]
          }
        },
        "overrides": []
      },
      "gridPos": {
        "h": 8,
        "w": 12,
        "x": 0,
        "y": 16
      },
      "id": 5,
      "options": {
        "displayMode": "basic",
        "minVizHeight": 10,
        "minVizWidth": 0,
        "orientation": "horizontal",
        "reduceOptions": {
          "calcs": [
            "lastNotNull"
          ],
          "fields": "",
          "values": true
        },
        "showUnfilled": true,
        "valueMode": "color"
      },
      "pluginVersion": "9.5.12",
      "targets": [
        {
          "azureLogAnalytics": {

            "query": "AzureMetrics\r\n| where _ResourceId has 'Microsoft.ContainerService/managedClusters'\r\n| where MetricName has 'kube_node_status_condition'\r\n| summarize metric = avg(Total) by _ResourceId, Resource",
            "resources": [
              "/subscriptions/$sub"
            ]
          },
          "azureMonitor": {
            "allowedTimeGrainsMs": [],
            "timeGrain": "auto"
          },
          "datasource": {
            "type": "grafana-azure-monitor-datasource",
            "uid": "${ds}"
          },
          "queryType": "Azure Log Analytics",
          "refId": "A"
        }
      ],
      "transformations": [
        {
          "id": "organize",
          "options": {
            "excludeByName": {
              "_ResourceId": true
            },
            "indexByName": {},
            "renameByName": {}
          }
        }
      ],
      "type": "bargauge"
    },
    {
      "title": "kube_pod_status_phase",
      "datasource": {
        "type": "grafana-azure-monitor-datasource",
        "uid": "${ds}"
      },
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "thresholds"
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "#808080",
                "value": null
              },
              {
                
                "color": "dark-green",                
                "value": 0
              },
              {
                
                "color": "dark-red",                
                "value": 0
              }
            ]
          }
        },
        "overrides": []
      },
      "gridPos": {
        "h": 8,
        "w": 12,
        "x": 12,
        "y": 16
      },
      "id": 6,
      "options": {
        "displayMode": "basic",
        "minVizHeight": 10,
        "minVizWidth": 0,
        "orientation": "horizontal",
        "reduceOptions": {
          "calcs": [
            "lastNotNull"
          ],
          "fields": "",
          "values": true
        },
        "showUnfilled": true,
        "valueMode": "color"
      },
      "pluginVersion": "9.5.12",
      "targets": [
        {
          "azureLogAnalytics": {

            "query": "AzureMetrics\r\n| where _ResourceId has 'Microsoft.ContainerService/managedClusters'\r\n| where MetricName has 'kube_pod_status_phase'\r\n| summarize metric = avg(Average) by _ResourceId, Resource",
            "resources": [
              "/subscriptions/$sub"
            ]
          },
          "azureMonitor": {
            "allowedTimeGrainsMs": [],
            "timeGrain": "auto"
          },
          "datasource": {
            "type": "grafana-azure-monitor-datasource",
            "uid": "${ds}"
          },
          "queryType": "Azure Log Analytics",
          "refId": "A"
        }
      ],
      "transformations": [
        {
          "id": "organize",
          "options": {
            "excludeByName": {
              "_ResourceId": true
            },
            "indexByName": {},
            "renameByName": {}
          }
        }
      ],
      "type": "bargauge"
    },
    {
      "title": "kube_pod_status_ready",
      "datasource": {
        "type": "grafana-azure-monitor-datasource",
        "uid": "${ds}"
      },
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "thresholds"
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "#808080",
                "value": null
              },
              {

                "color": "dark-red",                
                "value": 0
              },
              {

                "color": "dark-green",                
                "value": 1
              }
            ]
          }
        },
        "overrides": []
      },
      "gridPos": {
        "h": 8,
        "w": 12,
        "x": 0,
        "y": 24
      },
      "id": 7,
      "options": {
        "displayMode": "basic",
        "minVizHeight": 10,
        "minVizWidth": 0,
        "orientation": "horizontal",
        "reduceOptions": {
          "calcs": [
            "lastNotNull"
          ],
          "fields": "",
          "values": true
        },
        "showUnfilled": true,
        "valueMode": "color"
      },
      "pluginVersion": "9.5.12",
      "targets": [
        {
          "azureLogAnalytics": {

            "query": "AzureMetrics\r\n| where _ResourceId has 'Microsoft.ContainerService/managedClusters'\r\n| where MetricName has 'kube_pod_status_ready'\r\n| summarize metric = avg(Average) by _ResourceId, Resource",
            "resources": [
              "/subscriptions/$sub"
            ]
          },
          "azureMonitor": {
            "allowedTimeGrainsMs": [],
            "timeGrain": "auto"
          },
          "datasource": {
            "type": "grafana-azure-monitor-datasource",
            "uid": "${ds}"
          },
          "queryType": "Azure Log Analytics",
          "refId": "A"
        }
      ],
      "transformations": [
        {
          "id": "organize",
          "options": {
            "excludeByName": {
              "_ResourceId": true
            },
            "indexByName": {},
            "renameByName": {}
          }
        }
      ],
      "type": "bargauge"
    },
    {
      "title": "node_cpu_usage_percentage",
      "datasource": {
        "type": "grafana-azure-monitor-datasource",
        "uid": "${ds}"
      },
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "thresholds"
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "#808080",
                "value": null
              },
              {
                
                "color": "dark-green",                
                "value": 0
              },
              {
                
                "color": "dark-red",                
                "value": 95
              }
            ]
          }
        },
        "overrides": []
      },
      "gridPos": {
        "h": 8,
        "w": 12,
        "x": 12,
        "y": 24
      },
      "id": 8,
      "options": {
        "displayMode": "basic",
        "minVizHeight": 10,
        "minVizWidth": 0,
        "orientation": "horizontal",
        "reduceOptions": {
          "calcs": [
            "lastNotNull"
          ],
          "fields": "",
          "values": true
        },
        "showUnfilled": true,
        "valueMode": "color"
      },
      "pluginVersion": "9.5.12",
      "targets": [
        {
          "azureLogAnalytics": {

            "query": "AzureMetrics\r\n| where _ResourceId has 'Microsoft.ContainerService/managedClusters'\r\n| where MetricName has 'node_cpu_usage_percentage'\r\n| summarize metric = avg(Average) by _ResourceId, Resource",
            "resources": [
              "/subscriptions/$sub"
            ]
          },
          "azureMonitor": {
            "allowedTimeGrainsMs": [],
            "timeGrain": "auto"
          },
          "datasource": {
            "type": "grafana-azure-monitor-datasource",
            "uid": "${ds}"
          },
          "queryType": "Azure Log Analytics",
          "refId": "A"
        }
      ],
      "transformations": [
        {
          "id": "organize",
          "options": {
            "excludeByName": {
              "_ResourceId": true
            },
            "indexByName": {},
            "renameByName": {}
          }
        }
      ],
      "type": "bargauge"
    },
    {
      "title": "node_disk_usage_percentage",
      "datasource": {
        "type": "grafana-azure-monitor-datasource",
        "uid": "${ds}"
      },
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "thresholds"
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "#808080",
                "value": null
              },
              {
                
                "color": "dark-green",                
                "value": 0
              },
              {
                
                "color": "dark-red",                
                "value": 80
              }
            ]
          }
        },
        "overrides": []
      },
      "gridPos": {
        "h": 8,
        "w": 12,
        "x": 0,
        "y": 32
      },
      "id": 9,
      "options": {
        "displayMode": "basic",
        "minVizHeight": 10,
        "minVizWidth": 0,
        "orientation": "horizontal",
        "reduceOptions": {
          "calcs": [
            "lastNotNull"
          ],
          "fields": "",
          "values": true
        },
        "showUnfilled": true,
        "valueMode": "color"
      },
      "pluginVersion": "9.5.12",
      "targets": [
        {
          "azureLogAnalytics": {

            "query": "AzureMetrics\r\n| where _ResourceId has 'Microsoft.ContainerService/managedClusters'\r\n| where MetricName has 'node_disk_usage_percentage'\r\n| summarize metric = avg(Average) by _ResourceId, Resource",
            "resources": [
              "/subscriptions/$sub"
            ]
          },
          "azureMonitor": {
            "allowedTimeGrainsMs": [],
            "timeGrain": "auto"
          },
          "datasource": {
            "type": "grafana-azure-monitor-datasource",
            "uid": "${ds}"
          },
          "queryType": "Azure Log Analytics",
          "refId": "A"
        }
      ],
      "transformations": [
        {
          "id": "organize",
          "options": {
            "excludeByName": {
              "_ResourceId": true
            },
            "indexByName": {},
            "renameByName": {}
          }
        }
      ],
      "type": "bargauge"
    },
    {
      "title": "node_memory_rss_percentage",
      "datasource": {
        "type": "grafana-azure-monitor-datasource",
        "uid": "${ds}"
      },
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "thresholds"
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "#808080",
                "value": null
              },
              {
                
                "color": "dark-green",                
                "value": 0
              },
              {
                
                "color": "dark-red",                
                "value": 90
              }
            ]
          }
        },
        "overrides": []
      },
      "gridPos": {
        "h": 8,
        "w": 12,
        "x": 12,
        "y": 32
      },
      "id": 10,
      "options": {
        "displayMode": "basic",
        "minVizHeight": 10,
        "minVizWidth": 0,
        "orientation": "horizontal",
        "reduceOptions": {
          "calcs": [
            "lastNotNull"
          ],
          "fields": "",
          "values": true
        },
        "showUnfilled": true,
        "valueMode": "color"
      },
      "pluginVersion": "9.5.12",
      "targets": [
        {
          "azureLogAnalytics": {

            "query": "AzureMetrics\r\n| where _ResourceId has 'Microsoft.ContainerService/managedClusters'\r\n| where MetricName has 'node_memory_rss_percentage'\r\n| summarize metric = avg(Average) by _ResourceId, Resource",
            "resources": [
              "/subscriptions/$sub"
            ]
          },
          "azureMonitor": {
            "allowedTimeGrainsMs": [],
            "timeGrain": "auto"
          },
          "datasource": {
            "type": "grafana-azure-monitor-datasource",
            "uid": "${ds}"
          },
          "queryType": "Azure Log Analytics",
          "refId": "A"
        }
      ],
      "transformations": [
        {
          "id": "organize",
          "options": {
            "excludeByName": {
              "_ResourceId": true
            },
            "indexByName": {},
            "renameByName": {}
          }
        }
      ],
      "type": "bargauge"
    },
    {
      "title": "node_memory_working_set_percentage",
      "datasource": {
        "type": "grafana-azure-monitor-datasource",
        "uid": "${ds}"
      },
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "thresholds"
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "#808080",
                "value": null
              },
              {
                
                "color": "dark-green",                
                "value": 0
              },
              {
                
                "color": "dark-red",                
                "value": 100
              }
            ]
          }
        },
        "overrides": []
      },
      "gridPos": {
        "h": 8,
        "w": 12,
        "x": 0,
        "y": 40
      },
      "id": 11,
      "options": {
        "displayMode": "basic",
        "minVizHeight": 10,
        "minVizWidth": 0,
        "orientation": "horizontal",
        "reduceOptions": {
          "calcs": [
            "lastNotNull"
          ],
          "fields": "",
          "values": true
        },
        "showUnfilled": true,
        "valueMode": "color"
      },
      "pluginVersion": "9.5.12",
      "targets": [
        {
          "azureLogAnalytics": {

            "query": "AzureMetrics\r\n| where _ResourceId has 'Microsoft.ContainerService/managedClusters'\r\n| where MetricName has 'node_memory_working_set_percentage'\r\n| summarize metric = avg(Average) by _ResourceId, Resource",
            "resources": [
              "/subscriptions/$sub"
            ]
          },
          "azureMonitor": {
            "allowedTimeGrainsMs": [],
            "timeGrain": "auto"
          },
          "datasource": {
            "type": "grafana-azure-monitor-datasource",
            "uid": "${ds}"
          },
          "queryType": "Azure Log Analytics",
          "refId": "A"
        }
      ],
      "transformations": [
        {
          "id": "organize",
          "options": {
            "excludeByName": {
              "_ResourceId": true
            },
            "indexByName": {},
            "renameByName": {}
          }
        }
      ],
      "type": "bargauge"
    }
  ],
  "refresh": "",
  "schemaVersion": 38,
  "style": "dark",
  "tags": [],
  "templating": {
    "list": [
      {
        "current": {},
        "hide": 0,
        "includeAll": false,
        "label": "Datasource",
        "multi": false,
        "name": "ds",
        "options": [],
        "query": "grafana-azure-monitor-datasource",
        "queryValue": "",
        "refresh": 1,
        "regex": "",
        "skipUrlSync": false,
        "type": "datasource"
      },
      {
        "current": {},
        "datasource": {
          "type": "grafana-azure-monitor-datasource",
          "uid": "${ds}"
        },
        "definition": "",
        "hide": 0,
        "includeAll": false,
        "label": "Subscription",
        "multi": false,
        "name": "sub",
        "options": [],
        "query": {
          "azureLogAnalytics": {
            "query": "",
            "resources": []
          },
          "queryType": "Azure Subscriptions",
          "refId": "A"
        },
        "refresh": 1,
        "regex": "",
        "skipUrlSync": false,
        "sort": 0,
        "type": "query"
      }
    ]
  },
  "time": {
    "from": "now-6h",
    "to": "now"
  },
  "timepicker": {},
  "timezone": "",
  "version": null
}



cluster_autoscaler_cluster_safe_to_autoscale - Metric Alert

Determines whether or not cluster autoscaler will take action on the cluster

Properties:

criterionTypeStaticThresholdCriterion
evaluationFrequencyPT1M
metricNamecluster_autoscaler_cluster_safe_to_autoscale
metricNamespaceMicrosoft.ContainerService/managedClusters
operatorLessThan
severity3
threshold1
timeAggregationAverage
windowSizePT5M

References:

Templates:

{
  "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#",
  "contentVersion": "1.0.0.0",
  "parameters": {
    "alertName": {
      "type": "string",
      "minLength": 1,
      "metadata": {
        "description": "Name of the alert"
      }
    },
    "alertDescription": {
      "type": "string",
      "defaultValue": "Determines whether or not cluster autoscaler will take action on the cluster",
      "metadata": {
        "description": "Description of alert"
      }
    },
    "targetResourceId": {
      "type": "string",
      "minLength": 1,
      "metadata": {
        "description": "List of Azure resource Ids seperated by a comma. For example - /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroup/resource-group-name/Microsoft.compute/virtualMachines/vm-name"
      }
    },
    "targetResourceRegion": {
      "type": "string",
      "metadata": {
        "description": "Azure region in which target resources to be monitored are in (without spaces). For example: EastUS"
      }
    },
    "targetResourceType": {
      "type": "string",
      "minLength": 1,
      "metadata": {
        "description": "Resource type of target resources to be monitored."
      }
    },
    "isEnabled": {
      "type": "bool",
      "defaultValue": true,
      "metadata": {
        "description": "Specifies whether the alert is enabled"
      }
    },
    "alertSeverity": {
      "type": "int",
      "defaultValue": 3,
      "allowedValues": [
        0,
        1,
        2,
        3,
        4
      ],
      "metadata": {
        "description": "Severity of alert {0,1,2,3,4}"
      }
    },
    "operator": {
      "type": "string",
      "defaultValue": "LessThan",
      "allowedValues": [
        "Equals",
        "GreaterThan",
        "GreaterThanOrEqual",
        "LessThan",
        "LessThanOrEqual"
      ],
      "metadata": {
        "description": "Operator comparing the current value with the threshold value."
      }
    },
    "threshold": {
      "type": "string",
      "defaultValue": "1",
      "metadata": {
        "description": "The threshold value at which the alert is activated."
      }
    },
    "timeAggregation": {
      "type": "string",
      "defaultValue": "Average",
      "allowedValues": [
        "Average",
        "Minimum",
        "Maximum",
        "Total",
        "Count"
      ],
      "metadata": {
        "description": "How the data that is collected should be combined over time."
      }
    },
    "windowSize": {
      "type": "string",
      "defaultValue": "PT5M",
      "allowedValues": [
        "PT1M",
        "PT5M",
        "PT15M",
        "PT30M",
        "PT1H",
        "PT6H",
        "PT12H",
        "PT24H",
        "PT1D"
      ],
      "metadata": {
        "description": "Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format."
      }
    },
    "evaluationFrequency": {
      "type": "string",
      "defaultValue": "PT1M",
      "allowedValues": [
        "PT1M",
        "PT5M",
        "PT15M",
        "PT30M",
        "PT1H"
      ],
      "metadata": {
        "description": "how often the metric alert is evaluated represented in ISO 8601 duration format"
      }
    },
    "currentDateTimeUtcNow": {
      "type": "string",
      "defaultValue": "[utcNow()]",
      "metadata": {
          "description": "The current date and time using the utcNow function. Used for deployment name uniqueness"
      }
    },
    "telemetryOptOut": {
      "type": "string",
      "defaultValue": "No",
      "allowedValues": [
          "Yes",
          "No"
      ],
      "metadata": {
          "description": "The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry."
      }
    }
  },
  "variables": {
    "pidDeploymentName": "[take(concat('pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-', uniqueString(resourceGroup().id, parameters('alertName'), parameters('currentDateTimeUtcNow'))), 64)]",
    "varTargetResourceId": "[split(parameters('targetResourceId'), ',')]"
  },
  "resources": [
    {
      "type": "Microsoft.Insights/metricAlerts",
      "apiVersion": "2018-03-01",
      "name": "[parameters('alertName')]",
      "location": "global",
      "tags": {
        "_deployed_by_amba": true
      },
      "properties": {
        "description": "[parameters('alertDescription')]",
        "scopes": "[variables('varTargetResourceId')]",
        "targetResourceType": "[parameters('targetResourceType')]",
        "targetResourceRegion": "[parameters('targetResourceRegion')]",
        "severity": "[parameters('alertSeverity')]",
        "enabled": "[parameters('isEnabled')]",
        "evaluationFrequency": "[parameters('evaluationFrequency')]",
        "windowSize": "[parameters('windowSize')]",
        "criteria": {
          "odata.type": "Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria",
          "allOf": [
            {
              "name": "1st criterion",
              "metricName": "cluster_autoscaler_cluster_safe_to_autoscale",
              "dimensions": [],
              "operator": "[parameters('operator')]",
              "threshold": "[parameters('threshold')]",
              "timeAggregation": "[parameters('timeAggregation')]",
              "criterionType": "StaticThresholdCriterion"
            }
          ]
        }
      }
    },
    {
      "condition": "[equals(parameters('telemetryOptOut'), 'No')]",
      "apiVersion": "2023-07-01",
      "name": "[variables('pidDeploymentName')]",
      "type": "Microsoft.Resources/deployments",
      "properties": {
          "mode": "Incremental",
          "template": {
              "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#",
              "contentVersion": "1.0.0.0",
              "resources": []
          }
      }
    }
  ]
}
@description('Name of the alert')
@minLength(1)
param alertName string

@description('Description of alert')
param alertDescription string = 'Determines whether or not cluster autoscaler will take action on the cluster'

@description('Array of Azure resource Ids. For example - /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroup/resource-group-name/Microsoft.compute/virtualMachines/vm-name')
@minLength(1)
param targetResourceId array

@description('Azure region in which target resources to be monitored are in (without spaces). For example: EastUS')
param targetResourceRegion string

@description('Resource type of target resources to be monitored.')
@minLength(1)
param targetResourceType string

@description('Specifies whether the alert is enabled')
param isEnabled bool = true

@description('Severity of alert {0,1,2,3,4}')
@allowed([
  0
  1
  2
  3
  4
])
param alertSeverity int = 3

@description('Operator comparing the current value with the threshold value.')
@allowed([
  'Equals'
  'GreaterThan'
  'GreaterThanOrEqual'
  'LessThan'
  'LessThanOrEqual'
])
param operator string = 'LessThan'

@description('The threshold value at which the alert is activated.')
param threshold int = 1

@description('How the data that is collected should be combined over time.')
@allowed([
  'Average'
  'Minimum'
  'Maximum'
  'Total'
  'Count'
])
param timeAggregation string = 'Average'

@description('Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format.')
@allowed([
  'PT1M'
  'PT5M'
  'PT15M'
  'PT30M'
  'PT1H'
  'PT6H'
  'PT12H'
  'PT24H'
  'P1D'
])
param windowSize string = 'PT5M'

@description('how often the metric alert is evaluated represented in ISO 8601 duration format')
@allowed([
  'PT1M'
  'PT5M'
  'PT15M'
  'PT30M'
  'PT1H'
])
param evaluationFrequency string = 'PT1M'

@description('"The current date and time using the utcNow function. Used for deployment name uniqueness')
param currentDateTimeUtcNow string = utcNow()

@description('The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry.')
@allowed([
  'Yes'
  'No'
])
param telemetryOptOut string = 'No'

resource metricAlert 'Microsoft.Insights/metricAlerts@2018-03-01' = {
  name: alertName
  location: 'global'
  tags: {
    _deployed_by_amba: 'true'
  }
  properties: {
    description: alertDescription
    scopes: targetResourceId
    targetResourceType: targetResourceType
    targetResourceRegion: targetResourceRegion
    severity: alertSeverity
    enabled: isEnabled
    evaluationFrequency: evaluationFrequency
    windowSize: windowSize
    criteria: {
      'odata.type': 'Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria'
      allOf: [
        {
          name: '1st criterion'
          metricName: 'cluster_autoscaler_cluster_safe_to_autoscale'
          dimensions: [[]]
          operator: operator
          threshold: threshold
          timeAggregation: timeAggregation
          criterionType: 'StaticThresholdCriterion'
        }
      ]
    }
  }
}

var ambaTelemetryPidName = 'pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-${uniqueString(resourceGroup().id, alertName, currentDateTimeUtcNow)}'
resource ambaTelemetryPid 'Microsoft.Resources/deployments@2023-07-01' =  if (telemetryOptOut == 'No') {
  name: ambaTelemetryPidName
  tags: {
    _deployed_by_amba: 'true'
  }
  properties: {
    mode: 'Incremental'
    template: {
      '$schema': 'https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#'
      contentVersion: '1.0.0.0'
      resources: []
    }
  }
}
{
  "type": "Microsoft.Authorization/policyDefinitions",
  "apiVersion": "2021-06-01",
  "name": "49fd8bac-d061-459d-8d80-a048c4c8ba56",
  "properties": {
    "policyType": "Custom",
    "mode": "All",
    "displayName": "Deploy ContainerService managedClusters cluster_autoscaler_cluster_safe_to_autoscale Alert",
    "description": "Policy to Audit/Deploy ContainerService managedClusters cluster_autoscaler_cluster_safe_to_autoscale Alert",
    "metadata": {
      "version": "1.0.0-preview",
      "category": "ContainerService",
      "preview": true,
      "source": "https://github.com/Azure/azure-monitor-baseline-alerts/",
      "alzCloudEnvironments": [
        "AzureCloud"
      ],
      "_deployed_by_amba": "True"
    },
    "parameters": {
      "severity": {
        "type": "String",
        "metadata": {
          "displayName": "Severity",
          "description": "Severity of the Alert"
        },
        "allowedValues": [
          "0",
          "1",
          "2",
          "3",
          "4"
        ],
        "defaultValue": "3"
      },
      "windowSize": {
        "type": "String",
        "metadata": {
          "displayName": "Window Size",
          "description": "Window size for the alert"
        },
        "allowedValues": [
          "PT1M",
          "PT5M",
          "PT15M",
          "PT30M",
          "PT1H",
          "PT6H",
          "PT12H",
          "P1D"
        ],
        "defaultValue": "PT5M"
      },
      "evaluationFrequency": {
        "type": "String",
        "metadata": {
          "displayName": "Evaluation Frequency",
          "description": "Evaluation frequency for the alert"
        },
        "allowedValues": [
          "PT1M",
          "PT5M",
          "PT15M",
          "PT30M",
          "PT1H"
        ],
        "defaultValue": "PT1M"
      },
      "autoMitigate": {
        "type": "String",
        "metadata": {
          "displayName": "Auto Mitigate",
          "description": "Auto Mitigate for the alert"
        },
        "allowedValues": [
          "true",
          "false"
        ],
        "defaultValue": "true"
      },
      "enabled": {
        "type": "String",
        "metadata": {
          "displayName": "Alert State",
          "description": "Alert state for the alert"
        },
        "allowedValues": [
          "true",
          "false"
        ],
        "defaultValue": "true"
      },
      "threshold": {
        "type": "String",
        "metadata": {
          "displayName": "Threshold",
          "description": "Threshold for the alert"
        },
        "defaultValue": "1"
      },
      "effect": {
        "type": "String",
        "metadata": {
          "displayName": "Effect",
          "description": "Effect of the policy"
        },
        "allowedValues": [
          "deployIfNotExists",
          "disabled"
        ],
        "defaultValue": "deployIfNotExists"
      },
      "MonitorDisableTagName": {
        "type": "String",
        "metadata": {
          "displayName": "Monitoring disabled tag name",
          "description": "Tag name used to disable monitoring at the resource level. Set to true if monitoring should be disabled."
        },
        "defaultValue": "MonitorDisable"
      },
      "MonitorDisableTagValues": {
        "type": "Array",
        "metadata": {
          "displayName": "Monitoring disabled tag values(s)",
          "description": "Tag value(s) used to disable monitoring at the resource level. Set to true if monitoring should be disabled."
        },
        "defaultValue": [
          "true",
          "Test",
          "Dev",
          "Sandbox"
        ]
      }
    },
    "policyRule": {
      "if": {
        "allOf": [
          {
            "field": "type",
            "equals": "Microsoft.ContainerService/managedClusters"
          },
          {
            "field": "[[concat('tags[', parameters('MonitorDisableTagName'), ']')]",
            "notIn": "[[parameters('MonitorDisableTagValues')]"
          }
        ]
      },
      "then": {
        "effect": "[[parameters('effect')]",
        "details": {
          "roleDefinitionIds": [
            "/providers/Microsoft.Authorization/roleDefinitions/b24988ac-6180-42a0-ab88-20f7382dd24c"
          ],
          "type": "Microsoft.Insights/metricAlerts",
          "existenceCondition": {
            "allOf": [
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria.allOf[*].metricNamespace",
                "equals": "Microsoft.ContainerService/managedClusters"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria.allOf[*].metricName",
                "equals": "cluster_autoscaler_cluster_safe_to_autoscale"
              },
              {
                "field": "Microsoft.Insights/metricalerts/scopes[*]",
                "equals": "[[concat(subscription().id, '/resourceGroups/', resourceGroup().name, '/providers/Microsoft.ContainerService/managedClusters/', field('fullName'))]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/enabled",
                "equals": "[[parameters('enabled')]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/evaluationFrequency",
                "equals": "[[parameters('evaluationFrequency')]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/windowSize",
                "equals": "[[parameters('windowSize')]"
              },
              {
                "field": "Microsoft.Insights/metricalerts/severity",
                "equals": "[[parameters('severity')]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/autoMitigate",
                "equals": "[[parameters('autoMitigate')]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft-Azure-Monitor-SingleResourceMultipleMetricCriteria.allOf[*].timeAggregation",
                "equals": "Average"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria.allOf[*].StaticThresholdCriterion.operator",
                "equals": "LessThan"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria.allOf[*].StaticThresholdCriterion.threshold",
                "equals": "[[if(contains(field('tags'), '_amba-cluster_autoscaler_cluster_safe_to_autoscale-threshold-Override_'), field('tags._amba-cluster_autoscaler_cluster_safe_to_autoscale-threshold-Override_'), parameters('threshold'))]"
              }
            ]
          },
          "deployment": {
            "properties": {
              "mode": "incremental",
              "template": {
                "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#",
                "contentVersion": "1.0.0.0",
                "parameters": {
                  "resourceName": {
                    "type": "String",
                    "metadata": {
                      "displayName": "resourceName",
                      "description": "Name of the resource"
                    }
                  },
                  "resourceId": {
                    "type": "String",
                    "metadata": {
                      "displayName": "resourceId",
                      "description": "Resource ID of the resource emitting the metric that will be used for the comparison"
                    }
                  },
                  "severity": {
                    "type": "String"
                  },
                  "windowSize": {
                    "type": "String"
                  },
                  "evaluationFrequency": {
                    "type": "String"
                  },
                  "autoMitigate": {
                    "type": "String"
                  },
                  "enabled": {
                    "type": "String"
                  },
                  "threshold": {
                    "type": "String"
                  }
                },
                "variables": {},
                "resources": [
                  {
                    "type": "Microsoft.Insights/metricAlerts",
                    "apiVersion": "2018-03-01",
                    "name": "[[concat(parameters('resourceName'), '-cluster_autoscaler_cluster_safe_to_autoscale')]",
                    "location": "global",
                    "tags": {
                      "_deployed_by_amba": true
                    },
                    "properties": {
                      "description": "Metric Alert for ContainerService managedClusters cluster_autoscaler_cluster_safe_to_autoscale",
                      "severity": "[[parameters('severity')]",
                      "enabled": "[[parameters('enabled')]",
                      "scopes": [
                        "[[parameters('resourceId')]"
                      ],
                      "evaluationFrequency": "[[parameters('evaluationFrequency')]",
                      "windowSize": "[[parameters('windowSize')]",
                      "criteria": {
                        "allOf": [
                          {
                            "name": "cluster_autoscaler_cluster_safe_to_autoscale",
                            "metricNamespace": "Microsoft.ContainerService/managedClusters",
                            "metricName": "cluster_autoscaler_cluster_safe_to_autoscale",
                            "operator": "LessThan",
                            "threshold": "[[parameters('threshold')]",
                            "timeAggregation": "Average",
                            "criterionType": "StaticThresholdCriterion"
                          }
                        ],
                        "odata.type": "Microsoft.Azure.Monitor.SingleResourceMultipleMetricCriteria"
                      },
                      "autoMitigate": "[[parameters('autoMitigate')]",
                      "parameters": {
                        "severity": {
                          "value": "[[parameters('severity')]"
                        },
                        "windowSize": {
                          "value": "[[parameters('windowSize')]"
                        },
                        "evaluationFrequency": {
                          "value": "[[parameters('evaluationFrequency')]"
                        },
                        "autoMitigate": {
                          "value": "[[parameters('autoMitigate')]"
                        },
                        "enabled": {
                          "value": "[[parameters('enabled')]"
                        },
                        "threshold": {
                          "value": "[[parameters('threshold')]"
                        }
                      }
                    }
                  }
                ]
              },
              "parameters": {
                "resourceName": {
                  "value": "[[field('name')]"
                },
                "resourceId": {
                  "value": "[[field('id')]"
                },
                "severity": {
                  "value": "[[parameters('severity')]"
                },
                "windowSize": {
                  "value": "[[parameters('windowSize')]"
                },
                "evaluationFrequency": {
                  "value": "[[parameters('evaluationFrequency')]"
                },
                "autoMitigate": {
                  "value": "[[parameters('autoMitigate')]"
                },
                "enabled": {
                  "value": "[[parameters('enabled')]"
                },
                "threshold": {
                  "value": "[[if(contains(field('tags'), '_amba-cluster_autoscaler_cluster_safe_to_autoscale-threshold-Override_'), field('tags._amba-cluster_autoscaler_cluster_safe_to_autoscale-threshold-Override_'), parameters('threshold'))]"
                }
              }
            }
          }
        }
      }
    }
  }
}



cluster_autoscaler_unschedulable_pods_count - Metric Alert

Number of pods that are currently unschedulable in the cluster

Properties:

criterionTypeStaticThresholdCriterion
evaluationFrequencyPT1M
metricNamecluster_autoscaler_unschedulable_pods_count
metricNamespaceMicrosoft.ContainerService/managedClusters
operatorGreaterThan
severity3
threshold0
timeAggregationAverage
windowSizePT5M

References:

Templates:

{
  "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#",
  "contentVersion": "1.0.0.0",
  "parameters": {
    "alertName": {
      "type": "string",
      "minLength": 1,
      "metadata": {
        "description": "Name of the alert"
      }
    },
    "alertDescription": {
      "type": "string",
      "defaultValue": "Number of pods that are currently unschedulable in the cluster",
      "metadata": {
        "description": "Description of alert"
      }
    },
    "targetResourceId": {
      "type": "string",
      "minLength": 1,
      "metadata": {
        "description": "List of Azure resource Ids seperated by a comma. For example - /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroup/resource-group-name/Microsoft.compute/virtualMachines/vm-name"
      }
    },
    "targetResourceRegion": {
      "type": "string",
      "metadata": {
        "description": "Azure region in which target resources to be monitored are in (without spaces). For example: EastUS"
      }
    },
    "targetResourceType": {
      "type": "string",
      "minLength": 1,
      "metadata": {
        "description": "Resource type of target resources to be monitored."
      }
    },
    "isEnabled": {
      "type": "bool",
      "defaultValue": true,
      "metadata": {
        "description": "Specifies whether the alert is enabled"
      }
    },
    "alertSeverity": {
      "type": "int",
      "defaultValue": 3,
      "allowedValues": [
        0,
        1,
        2,
        3,
        4
      ],
      "metadata": {
        "description": "Severity of alert {0,1,2,3,4}"
      }
    },
    "operator": {
      "type": "string",
      "defaultValue": "GreaterThan",
      "allowedValues": [
        "Equals",
        "GreaterThan",
        "GreaterThanOrEqual",
        "LessThan",
        "LessThanOrEqual"
      ],
      "metadata": {
        "description": "Operator comparing the current value with the threshold value."
      }
    },
    "threshold": {
      "type": "string",
      "defaultValue": "0",
      "metadata": {
        "description": "The threshold value at which the alert is activated."
      }
    },
    "timeAggregation": {
      "type": "string",
      "defaultValue": "Average",
      "allowedValues": [
        "Average",
        "Minimum",
        "Maximum",
        "Total",
        "Count"
      ],
      "metadata": {
        "description": "How the data that is collected should be combined over time."
      }
    },
    "windowSize": {
      "type": "string",
      "defaultValue": "PT5M",
      "allowedValues": [
        "PT1M",
        "PT5M",
        "PT15M",
        "PT30M",
        "PT1H",
        "PT6H",
        "PT12H",
        "PT24H",
        "PT1D"
      ],
      "metadata": {
        "description": "Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format."
      }
    },
    "evaluationFrequency": {
      "type": "string",
      "defaultValue": "PT1M",
      "allowedValues": [
        "PT1M",
        "PT5M",
        "PT15M",
        "PT30M",
        "PT1H"
      ],
      "metadata": {
        "description": "how often the metric alert is evaluated represented in ISO 8601 duration format"
      }
    },
    "currentDateTimeUtcNow": {
      "type": "string",
      "defaultValue": "[utcNow()]",
      "metadata": {
          "description": "The current date and time using the utcNow function. Used for deployment name uniqueness"
      }
    },
    "telemetryOptOut": {
      "type": "string",
      "defaultValue": "No",
      "allowedValues": [
          "Yes",
          "No"
      ],
      "metadata": {
          "description": "The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry."
      }
    }
  },
  "variables": {
    "pidDeploymentName": "[take(concat('pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-', uniqueString(resourceGroup().id, parameters('alertName'), parameters('currentDateTimeUtcNow'))), 64)]",
    "varTargetResourceId": "[split(parameters('targetResourceId'), ',')]"
  },
  "resources": [
    {
      "type": "Microsoft.Insights/metricAlerts",
      "apiVersion": "2018-03-01",
      "name": "[parameters('alertName')]",
      "location": "global",
      "tags": {
        "_deployed_by_amba": true
      },
      "properties": {
        "description": "[parameters('alertDescription')]",
        "scopes": "[variables('varTargetResourceId')]",
        "targetResourceType": "[parameters('targetResourceType')]",
        "targetResourceRegion": "[parameters('targetResourceRegion')]",
        "severity": "[parameters('alertSeverity')]",
        "enabled": "[parameters('isEnabled')]",
        "evaluationFrequency": "[parameters('evaluationFrequency')]",
        "windowSize": "[parameters('windowSize')]",
        "criteria": {
          "odata.type": "Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria",
          "allOf": [
            {
              "name": "1st criterion",
              "metricName": "cluster_autoscaler_unschedulable_pods_count",
              "dimensions": [],
              "operator": "[parameters('operator')]",
              "threshold": "[parameters('threshold')]",
              "timeAggregation": "[parameters('timeAggregation')]",
              "criterionType": "StaticThresholdCriterion"
            }
          ]
        }
      }
    },
    {
      "condition": "[equals(parameters('telemetryOptOut'), 'No')]",
      "apiVersion": "2023-07-01",
      "name": "[variables('pidDeploymentName')]",
      "type": "Microsoft.Resources/deployments",
      "properties": {
          "mode": "Incremental",
          "template": {
              "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#",
              "contentVersion": "1.0.0.0",
              "resources": []
          }
      }
    }
  ]
}
@description('Name of the alert')
@minLength(1)
param alertName string

@description('Description of alert')
param alertDescription string = 'Number of pods that are currently unschedulable in the cluster'

@description('Array of Azure resource Ids. For example - /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroup/resource-group-name/Microsoft.compute/virtualMachines/vm-name')
@minLength(1)
param targetResourceId array

@description('Azure region in which target resources to be monitored are in (without spaces). For example: EastUS')
param targetResourceRegion string

@description('Resource type of target resources to be monitored.')
@minLength(1)
param targetResourceType string

@description('Specifies whether the alert is enabled')
param isEnabled bool = true

@description('Severity of alert {0,1,2,3,4}')
@allowed([
  0
  1
  2
  3
  4
])
param alertSeverity int = 3

@description('Operator comparing the current value with the threshold value.')
@allowed([
  'Equals'
  'GreaterThan'
  'GreaterThanOrEqual'
  'LessThan'
  'LessThanOrEqual'
])
param operator string = 'GreaterThan'

@description('The threshold value at which the alert is activated.')
param threshold int = 0

@description('How the data that is collected should be combined over time.')
@allowed([
  'Average'
  'Minimum'
  'Maximum'
  'Total'
  'Count'
])
param timeAggregation string = 'Average'

@description('Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format.')
@allowed([
  'PT1M'
  'PT5M'
  'PT15M'
  'PT30M'
  'PT1H'
  'PT6H'
  'PT12H'
  'PT24H'
  'P1D'
])
param windowSize string = 'PT5M'

@description('how often the metric alert is evaluated represented in ISO 8601 duration format')
@allowed([
  'PT1M'
  'PT5M'
  'PT15M'
  'PT30M'
  'PT1H'
])
param evaluationFrequency string = 'PT1M'

@description('"The current date and time using the utcNow function. Used for deployment name uniqueness')
param currentDateTimeUtcNow string = utcNow()

@description('The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry.')
@allowed([
  'Yes'
  'No'
])
param telemetryOptOut string = 'No'

resource metricAlert 'Microsoft.Insights/metricAlerts@2018-03-01' = {
  name: alertName
  location: 'global'
  tags: {
    _deployed_by_amba: 'true'
  }
  properties: {
    description: alertDescription
    scopes: targetResourceId
    targetResourceType: targetResourceType
    targetResourceRegion: targetResourceRegion
    severity: alertSeverity
    enabled: isEnabled
    evaluationFrequency: evaluationFrequency
    windowSize: windowSize
    criteria: {
      'odata.type': 'Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria'
      allOf: [
        {
          name: '1st criterion'
          metricName: 'cluster_autoscaler_unschedulable_pods_count'
          dimensions: [[]]
          operator: operator
          threshold: threshold
          timeAggregation: timeAggregation
          criterionType: 'StaticThresholdCriterion'
        }
      ]
    }
  }
}

var ambaTelemetryPidName = 'pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-${uniqueString(resourceGroup().id, alertName, currentDateTimeUtcNow)}'
resource ambaTelemetryPid 'Microsoft.Resources/deployments@2023-07-01' =  if (telemetryOptOut == 'No') {
  name: ambaTelemetryPidName
  tags: {
    _deployed_by_amba: 'true'
  }
  properties: {
    mode: 'Incremental'
    template: {
      '$schema': 'https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#'
      contentVersion: '1.0.0.0'
      resources: []
    }
  }
}
{
  "type": "Microsoft.Authorization/policyDefinitions",
  "apiVersion": "2021-06-01",
  "name": "301ca0e3-fc88-4285-be99-a2a587c412f5",
  "properties": {
    "policyType": "Custom",
    "mode": "All",
    "displayName": "Deploy ContainerService managedClusters cluster_autoscaler_unschedulable_pods_count Alert",
    "description": "Policy to Audit/Deploy ContainerService managedClusters cluster_autoscaler_unschedulable_pods_count Alert",
    "metadata": {
      "version": "1.0.0-preview",
      "category": "ContainerService",
      "preview": true,
      "source": "https://github.com/Azure/azure-monitor-baseline-alerts/",
      "alzCloudEnvironments": [
        "AzureCloud"
      ],
      "_deployed_by_amba": "True"
    },
    "parameters": {
      "severity": {
        "type": "String",
        "metadata": {
          "displayName": "Severity",
          "description": "Severity of the Alert"
        },
        "allowedValues": [
          "0",
          "1",
          "2",
          "3",
          "4"
        ],
        "defaultValue": "3"
      },
      "windowSize": {
        "type": "String",
        "metadata": {
          "displayName": "Window Size",
          "description": "Window size for the alert"
        },
        "allowedValues": [
          "PT1M",
          "PT5M",
          "PT15M",
          "PT30M",
          "PT1H",
          "PT6H",
          "PT12H",
          "P1D"
        ],
        "defaultValue": "PT5M"
      },
      "evaluationFrequency": {
        "type": "String",
        "metadata": {
          "displayName": "Evaluation Frequency",
          "description": "Evaluation frequency for the alert"
        },
        "allowedValues": [
          "PT1M",
          "PT5M",
          "PT15M",
          "PT30M",
          "PT1H"
        ],
        "defaultValue": "PT1M"
      },
      "autoMitigate": {
        "type": "String",
        "metadata": {
          "displayName": "Auto Mitigate",
          "description": "Auto Mitigate for the alert"
        },
        "allowedValues": [
          "true",
          "false"
        ],
        "defaultValue": "true"
      },
      "enabled": {
        "type": "String",
        "metadata": {
          "displayName": "Alert State",
          "description": "Alert state for the alert"
        },
        "allowedValues": [
          "true",
          "false"
        ],
        "defaultValue": "true"
      },
      "threshold": {
        "type": "String",
        "metadata": {
          "displayName": "Threshold",
          "description": "Threshold for the alert"
        },
        "defaultValue": "0"
      },
      "effect": {
        "type": "String",
        "metadata": {
          "displayName": "Effect",
          "description": "Effect of the policy"
        },
        "allowedValues": [
          "deployIfNotExists",
          "disabled"
        ],
        "defaultValue": "deployIfNotExists"
      },
      "MonitorDisableTagName": {
        "type": "String",
        "metadata": {
          "displayName": "Monitoring disabled tag name",
          "description": "Tag name used to disable monitoring at the resource level. Set to true if monitoring should be disabled."
        },
        "defaultValue": "MonitorDisable"
      },
      "MonitorDisableTagValues": {
        "type": "Array",
        "metadata": {
          "displayName": "Monitoring disabled tag values(s)",
          "description": "Tag value(s) used to disable monitoring at the resource level. Set to true if monitoring should be disabled."
        },
        "defaultValue": [
          "true",
          "Test",
          "Dev",
          "Sandbox"
        ]
      }
    },
    "policyRule": {
      "if": {
        "allOf": [
          {
            "field": "type",
            "equals": "Microsoft.ContainerService/managedClusters"
          },
          {
            "field": "[[concat('tags[', parameters('MonitorDisableTagName'), ']')]",
            "notIn": "[[parameters('MonitorDisableTagValues')]"
          }
        ]
      },
      "then": {
        "effect": "[[parameters('effect')]",
        "details": {
          "roleDefinitionIds": [
            "/providers/Microsoft.Authorization/roleDefinitions/b24988ac-6180-42a0-ab88-20f7382dd24c"
          ],
          "type": "Microsoft.Insights/metricAlerts",
          "existenceCondition": {
            "allOf": [
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria.allOf[*].metricNamespace",
                "equals": "Microsoft.ContainerService/managedClusters"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria.allOf[*].metricName",
                "equals": "cluster_autoscaler_unschedulable_pods_count"
              },
              {
                "field": "Microsoft.Insights/metricalerts/scopes[*]",
                "equals": "[[concat(subscription().id, '/resourceGroups/', resourceGroup().name, '/providers/Microsoft.ContainerService/managedClusters/', field('fullName'))]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/enabled",
                "equals": "[[parameters('enabled')]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/evaluationFrequency",
                "equals": "[[parameters('evaluationFrequency')]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/windowSize",
                "equals": "[[parameters('windowSize')]"
              },
              {
                "field": "Microsoft.Insights/metricalerts/severity",
                "equals": "[[parameters('severity')]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/autoMitigate",
                "equals": "[[parameters('autoMitigate')]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft-Azure-Monitor-SingleResourceMultipleMetricCriteria.allOf[*].timeAggregation",
                "equals": "Average"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria.allOf[*].StaticThresholdCriterion.operator",
                "equals": "GreaterThan"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria.allOf[*].StaticThresholdCriterion.threshold",
                "equals": "[[if(contains(field('tags'), '_amba-cluster_autoscaler_unschedulable_pods_count-threshold-Override_'), field('tags._amba-cluster_autoscaler_unschedulable_pods_count-threshold-Override_'), parameters('threshold'))]"
              }
            ]
          },
          "deployment": {
            "properties": {
              "mode": "incremental",
              "template": {
                "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#",
                "contentVersion": "1.0.0.0",
                "parameters": {
                  "resourceName": {
                    "type": "String",
                    "metadata": {
                      "displayName": "resourceName",
                      "description": "Name of the resource"
                    }
                  },
                  "resourceId": {
                    "type": "String",
                    "metadata": {
                      "displayName": "resourceId",
                      "description": "Resource ID of the resource emitting the metric that will be used for the comparison"
                    }
                  },
                  "severity": {
                    "type": "String"
                  },
                  "windowSize": {
                    "type": "String"
                  },
                  "evaluationFrequency": {
                    "type": "String"
                  },
                  "autoMitigate": {
                    "type": "String"
                  },
                  "enabled": {
                    "type": "String"
                  },
                  "threshold": {
                    "type": "String"
                  }
                },
                "variables": {},
                "resources": [
                  {
                    "type": "Microsoft.Insights/metricAlerts",
                    "apiVersion": "2018-03-01",
                    "name": "[[concat(parameters('resourceName'), '-cluster_autoscaler_unschedulable_pods_count')]",
                    "location": "global",
                    "tags": {
                      "_deployed_by_amba": true
                    },
                    "properties": {
                      "description": "Metric Alert for ContainerService managedClusters cluster_autoscaler_unschedulable_pods_count",
                      "severity": "[[parameters('severity')]",
                      "enabled": "[[parameters('enabled')]",
                      "scopes": [
                        "[[parameters('resourceId')]"
                      ],
                      "evaluationFrequency": "[[parameters('evaluationFrequency')]",
                      "windowSize": "[[parameters('windowSize')]",
                      "criteria": {
                        "allOf": [
                          {
                            "name": "cluster_autoscaler_unschedulable_pods_count",
                            "metricNamespace": "Microsoft.ContainerService/managedClusters",
                            "metricName": "cluster_autoscaler_unschedulable_pods_count",
                            "operator": "GreaterThan",
                            "threshold": "[[parameters('threshold')]",
                            "timeAggregation": "Average",
                            "criterionType": "StaticThresholdCriterion"
                          }
                        ],
                        "odata.type": "Microsoft.Azure.Monitor.SingleResourceMultipleMetricCriteria"
                      },
                      "autoMitigate": "[[parameters('autoMitigate')]",
                      "parameters": {
                        "severity": {
                          "value": "[[parameters('severity')]"
                        },
                        "windowSize": {
                          "value": "[[parameters('windowSize')]"
                        },
                        "evaluationFrequency": {
                          "value": "[[parameters('evaluationFrequency')]"
                        },
                        "autoMitigate": {
                          "value": "[[parameters('autoMitigate')]"
                        },
                        "enabled": {
                          "value": "[[parameters('enabled')]"
                        },
                        "threshold": {
                          "value": "[[parameters('threshold')]"
                        }
                      }
                    }
                  }
                ]
              },
              "parameters": {
                "resourceName": {
                  "value": "[[field('name')]"
                },
                "resourceId": {
                  "value": "[[field('id')]"
                },
                "severity": {
                  "value": "[[parameters('severity')]"
                },
                "windowSize": {
                  "value": "[[parameters('windowSize')]"
                },
                "evaluationFrequency": {
                  "value": "[[parameters('evaluationFrequency')]"
                },
                "autoMitigate": {
                  "value": "[[parameters('autoMitigate')]"
                },
                "enabled": {
                  "value": "[[parameters('enabled')]"
                },
                "threshold": {
                  "value": "[[if(contains(field('tags'), '_amba-cluster_autoscaler_unschedulable_pods_count-threshold-Override_'), field('tags._amba-cluster_autoscaler_unschedulable_pods_count-threshold-Override_'), parameters('threshold'))]"
                }
              }
            }
          }
        }
      }
    }
  }
}



kube_node_status_allocatable_cpu_cores - Metric Alert

Total number of available cpu cores in a managed cluster

Properties:

criterionTypeStaticThresholdCriterion
evaluationFrequencyPT1M
metricNamekube_node_status_allocatable_cpu_cores
metricNamespaceMicrosoft.ContainerService/managedClusters
operatorLessThan
severity3
threshold2
timeAggregationAverage
windowSizePT5M

References:

Templates:

{
  "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#",
  "contentVersion": "1.0.0.0",
  "parameters": {
    "alertName": {
      "type": "string",
      "minLength": 1,
      "metadata": {
        "description": "Name of the alert"
      }
    },
    "alertDescription": {
      "type": "string",
      "defaultValue": "Total number of available cpu cores in a managed cluster",
      "metadata": {
        "description": "Description of alert"
      }
    },
    "targetResourceId": {
      "type": "string",
      "minLength": 1,
      "metadata": {
        "description": "List of Azure resource Ids seperated by a comma. For example - /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroup/resource-group-name/Microsoft.compute/virtualMachines/vm-name"
      }
    },
    "targetResourceRegion": {
      "type": "string",
      "metadata": {
        "description": "Azure region in which target resources to be monitored are in (without spaces). For example: EastUS"
      }
    },
    "targetResourceType": {
      "type": "string",
      "minLength": 1,
      "metadata": {
        "description": "Resource type of target resources to be monitored."
      }
    },
    "isEnabled": {
      "type": "bool",
      "defaultValue": true,
      "metadata": {
        "description": "Specifies whether the alert is enabled"
      }
    },
    "alertSeverity": {
      "type": "int",
      "defaultValue": 3,
      "allowedValues": [
        0,
        1,
        2,
        3,
        4
      ],
      "metadata": {
        "description": "Severity of alert {0,1,2,3,4}"
      }
    },
    "operator": {
      "type": "string",
      "defaultValue": "LessThan",
      "allowedValues": [
        "Equals",
        "GreaterThan",
        "GreaterThanOrEqual",
        "LessThan",
        "LessThanOrEqual"
      ],
      "metadata": {
        "description": "Operator comparing the current value with the threshold value."
      }
    },
    "threshold": {
      "type": "string",
      "defaultValue": "2",
      "metadata": {
        "description": "The threshold value at which the alert is activated."
      }
    },
    "timeAggregation": {
      "type": "string",
      "defaultValue": "Average",
      "allowedValues": [
        "Average",
        "Minimum",
        "Maximum",
        "Total",
        "Count"
      ],
      "metadata": {
        "description": "How the data that is collected should be combined over time."
      }
    },
    "windowSize": {
      "type": "string",
      "defaultValue": "PT5M",
      "allowedValues": [
        "PT1M",
        "PT5M",
        "PT15M",
        "PT30M",
        "PT1H",
        "PT6H",
        "PT12H",
        "PT24H",
        "PT1D"
      ],
      "metadata": {
        "description": "Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format."
      }
    },
    "evaluationFrequency": {
      "type": "string",
      "defaultValue": "PT1M",
      "allowedValues": [
        "PT1M",
        "PT5M",
        "PT15M",
        "PT30M",
        "PT1H"
      ],
      "metadata": {
        "description": "how often the metric alert is evaluated represented in ISO 8601 duration format"
      }
    },
    "currentDateTimeUtcNow": {
      "type": "string",
      "defaultValue": "[utcNow()]",
      "metadata": {
          "description": "The current date and time using the utcNow function. Used for deployment name uniqueness"
      }
    },
    "telemetryOptOut": {
      "type": "string",
      "defaultValue": "No",
      "allowedValues": [
          "Yes",
          "No"
      ],
      "metadata": {
          "description": "The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry."
      }
    }
  },
  "variables": {
    "pidDeploymentName": "[take(concat('pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-', uniqueString(resourceGroup().id, parameters('alertName'), parameters('currentDateTimeUtcNow'))), 64)]",
    "varTargetResourceId": "[split(parameters('targetResourceId'), ',')]"
  },
  "resources": [
    {
      "type": "Microsoft.Insights/metricAlerts",
      "apiVersion": "2018-03-01",
      "name": "[parameters('alertName')]",
      "location": "global",
      "tags": {
        "_deployed_by_amba": true
      },
      "properties": {
        "description": "[parameters('alertDescription')]",
        "scopes": "[variables('varTargetResourceId')]",
        "targetResourceType": "[parameters('targetResourceType')]",
        "targetResourceRegion": "[parameters('targetResourceRegion')]",
        "severity": "[parameters('alertSeverity')]",
        "enabled": "[parameters('isEnabled')]",
        "evaluationFrequency": "[parameters('evaluationFrequency')]",
        "windowSize": "[parameters('windowSize')]",
        "criteria": {
          "odata.type": "Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria",
          "allOf": [
            {
              "name": "1st criterion",
              "metricName": "kube_node_status_allocatable_cpu_cores",
              "dimensions": [],
              "operator": "[parameters('operator')]",
              "threshold": "[parameters('threshold')]",
              "timeAggregation": "[parameters('timeAggregation')]",
              "criterionType": "StaticThresholdCriterion"
            }
          ]
        }
      }
    },
    {
      "condition": "[equals(parameters('telemetryOptOut'), 'No')]",
      "apiVersion": "2023-07-01",
      "name": "[variables('pidDeploymentName')]",
      "type": "Microsoft.Resources/deployments",
      "properties": {
          "mode": "Incremental",
          "template": {
              "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#",
              "contentVersion": "1.0.0.0",
              "resources": []
          }
      }
    }
  ]
}
@description('Name of the alert')
@minLength(1)
param alertName string

@description('Description of alert')
param alertDescription string = 'Total number of available cpu cores in a managed cluster'

@description('Array of Azure resource Ids. For example - /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroup/resource-group-name/Microsoft.compute/virtualMachines/vm-name')
@minLength(1)
param targetResourceId array

@description('Azure region in which target resources to be monitored are in (without spaces). For example: EastUS')
param targetResourceRegion string

@description('Resource type of target resources to be monitored.')
@minLength(1)
param targetResourceType string

@description('Specifies whether the alert is enabled')
param isEnabled bool = true

@description('Severity of alert {0,1,2,3,4}')
@allowed([
  0
  1
  2
  3
  4
])
param alertSeverity int = 3

@description('Operator comparing the current value with the threshold value.')
@allowed([
  'Equals'
  'GreaterThan'
  'GreaterThanOrEqual'
  'LessThan'
  'LessThanOrEqual'
])
param operator string = 'LessThan'

@description('The threshold value at which the alert is activated.')
param threshold int = 2

@description('How the data that is collected should be combined over time.')
@allowed([
  'Average'
  'Minimum'
  'Maximum'
  'Total'
  'Count'
])
param timeAggregation string = 'Average'

@description('Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format.')
@allowed([
  'PT1M'
  'PT5M'
  'PT15M'
  'PT30M'
  'PT1H'
  'PT6H'
  'PT12H'
  'PT24H'
  'P1D'
])
param windowSize string = 'PT5M'

@description('how often the metric alert is evaluated represented in ISO 8601 duration format')
@allowed([
  'PT1M'
  'PT5M'
  'PT15M'
  'PT30M'
  'PT1H'
])
param evaluationFrequency string = 'PT1M'

@description('"The current date and time using the utcNow function. Used for deployment name uniqueness')
param currentDateTimeUtcNow string = utcNow()

@description('The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry.')
@allowed([
  'Yes'
  'No'
])
param telemetryOptOut string = 'No'

resource metricAlert 'Microsoft.Insights/metricAlerts@2018-03-01' = {
  name: alertName
  location: 'global'
  tags: {
    _deployed_by_amba: 'true'
  }
  properties: {
    description: alertDescription
    scopes: targetResourceId
    targetResourceType: targetResourceType
    targetResourceRegion: targetResourceRegion
    severity: alertSeverity
    enabled: isEnabled
    evaluationFrequency: evaluationFrequency
    windowSize: windowSize
    criteria: {
      'odata.type': 'Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria'
      allOf: [
        {
          name: '1st criterion'
          metricName: 'kube_node_status_allocatable_cpu_cores'
          dimensions: [[]]
          operator: operator
          threshold: threshold
          timeAggregation: timeAggregation
          criterionType: 'StaticThresholdCriterion'
        }
      ]
    }
  }
}

var ambaTelemetryPidName = 'pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-${uniqueString(resourceGroup().id, alertName, currentDateTimeUtcNow)}'
resource ambaTelemetryPid 'Microsoft.Resources/deployments@2023-07-01' =  if (telemetryOptOut == 'No') {
  name: ambaTelemetryPidName
  tags: {
    _deployed_by_amba: 'true'
  }
  properties: {
    mode: 'Incremental'
    template: {
      '$schema': 'https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#'
      contentVersion: '1.0.0.0'
      resources: []
    }
  }
}
{
  "type": "Microsoft.Authorization/policyDefinitions",
  "apiVersion": "2021-06-01",
  "name": "64a872f9-5ec6-4121-acad-edd12f4c3466",
  "properties": {
    "policyType": "Custom",
    "mode": "All",
    "displayName": "Deploy ContainerService managedClusters kube_node_status_allocatable_cpu_cores Alert",
    "description": "Policy to Audit/Deploy ContainerService managedClusters kube_node_status_allocatable_cpu_cores Alert",
    "metadata": {
      "version": "1.0.0-preview",
      "category": "ContainerService",
      "preview": true,
      "source": "https://github.com/Azure/azure-monitor-baseline-alerts/",
      "alzCloudEnvironments": [
        "AzureCloud"
      ],
      "_deployed_by_amba": "True"
    },
    "parameters": {
      "severity": {
        "type": "String",
        "metadata": {
          "displayName": "Severity",
          "description": "Severity of the Alert"
        },
        "allowedValues": [
          "0",
          "1",
          "2",
          "3",
          "4"
        ],
        "defaultValue": "3"
      },
      "windowSize": {
        "type": "String",
        "metadata": {
          "displayName": "Window Size",
          "description": "Window size for the alert"
        },
        "allowedValues": [
          "PT1M",
          "PT5M",
          "PT15M",
          "PT30M",
          "PT1H",
          "PT6H",
          "PT12H",
          "P1D"
        ],
        "defaultValue": "PT5M"
      },
      "evaluationFrequency": {
        "type": "String",
        "metadata": {
          "displayName": "Evaluation Frequency",
          "description": "Evaluation frequency for the alert"
        },
        "allowedValues": [
          "PT1M",
          "PT5M",
          "PT15M",
          "PT30M",
          "PT1H"
        ],
        "defaultValue": "PT1M"
      },
      "autoMitigate": {
        "type": "String",
        "metadata": {
          "displayName": "Auto Mitigate",
          "description": "Auto Mitigate for the alert"
        },
        "allowedValues": [
          "true",
          "false"
        ],
        "defaultValue": "true"
      },
      "enabled": {
        "type": "String",
        "metadata": {
          "displayName": "Alert State",
          "description": "Alert state for the alert"
        },
        "allowedValues": [
          "true",
          "false"
        ],
        "defaultValue": "true"
      },
      "threshold": {
        "type": "String",
        "metadata": {
          "displayName": "Threshold",
          "description": "Threshold for the alert"
        },
        "defaultValue": "2"
      },
      "effect": {
        "type": "String",
        "metadata": {
          "displayName": "Effect",
          "description": "Effect of the policy"
        },
        "allowedValues": [
          "deployIfNotExists",
          "disabled"
        ],
        "defaultValue": "deployIfNotExists"
      },
      "MonitorDisableTagName": {
        "type": "String",
        "metadata": {
          "displayName": "Monitoring disabled tag name",
          "description": "Tag name used to disable monitoring at the resource level. Set to true if monitoring should be disabled."
        },
        "defaultValue": "MonitorDisable"
      },
      "MonitorDisableTagValues": {
        "type": "Array",
        "metadata": {
          "displayName": "Monitoring disabled tag values(s)",
          "description": "Tag value(s) used to disable monitoring at the resource level. Set to true if monitoring should be disabled."
        },
        "defaultValue": [
          "true",
          "Test",
          "Dev",
          "Sandbox"
        ]
      }
    },
    "policyRule": {
      "if": {
        "allOf": [
          {
            "field": "type",
            "equals": "Microsoft.ContainerService/managedClusters"
          },
          {
            "field": "[[concat('tags[', parameters('MonitorDisableTagName'), ']')]",
            "notIn": "[[parameters('MonitorDisableTagValues')]"
          }
        ]
      },
      "then": {
        "effect": "[[parameters('effect')]",
        "details": {
          "roleDefinitionIds": [
            "/providers/Microsoft.Authorization/roleDefinitions/b24988ac-6180-42a0-ab88-20f7382dd24c"
          ],
          "type": "Microsoft.Insights/metricAlerts",
          "existenceCondition": {
            "allOf": [
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria.allOf[*].metricNamespace",
                "equals": "Microsoft.ContainerService/managedClusters"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria.allOf[*].metricName",
                "equals": "kube_node_status_allocatable_cpu_cores"
              },
              {
                "field": "Microsoft.Insights/metricalerts/scopes[*]",
                "equals": "[[concat(subscription().id, '/resourceGroups/', resourceGroup().name, '/providers/Microsoft.ContainerService/managedClusters/', field('fullName'))]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/enabled",
                "equals": "[[parameters('enabled')]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/evaluationFrequency",
                "equals": "[[parameters('evaluationFrequency')]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/windowSize",
                "equals": "[[parameters('windowSize')]"
              },
              {
                "field": "Microsoft.Insights/metricalerts/severity",
                "equals": "[[parameters('severity')]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/autoMitigate",
                "equals": "[[parameters('autoMitigate')]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft-Azure-Monitor-SingleResourceMultipleMetricCriteria.allOf[*].timeAggregation",
                "equals": "Average"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria.allOf[*].StaticThresholdCriterion.operator",
                "equals": "LessThan"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria.allOf[*].StaticThresholdCriterion.threshold",
                "equals": "[[if(contains(field('tags'), '_amba-kube_node_status_allocatable_cpu_cores-threshold-Override_'), field('tags._amba-kube_node_status_allocatable_cpu_cores-threshold-Override_'), parameters('threshold'))]"
              }
            ]
          },
          "deployment": {
            "properties": {
              "mode": "incremental",
              "template": {
                "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#",
                "contentVersion": "1.0.0.0",
                "parameters": {
                  "resourceName": {
                    "type": "String",
                    "metadata": {
                      "displayName": "resourceName",
                      "description": "Name of the resource"
                    }
                  },
                  "resourceId": {
                    "type": "String",
                    "metadata": {
                      "displayName": "resourceId",
                      "description": "Resource ID of the resource emitting the metric that will be used for the comparison"
                    }
                  },
                  "severity": {
                    "type": "String"
                  },
                  "windowSize": {
                    "type": "String"
                  },
                  "evaluationFrequency": {
                    "type": "String"
                  },
                  "autoMitigate": {
                    "type": "String"
                  },
                  "enabled": {
                    "type": "String"
                  },
                  "threshold": {
                    "type": "String"
                  }
                },
                "variables": {},
                "resources": [
                  {
                    "type": "Microsoft.Insights/metricAlerts",
                    "apiVersion": "2018-03-01",
                    "name": "[[concat(parameters('resourceName'), '-kube_node_status_allocatable_cpu_cores')]",
                    "location": "global",
                    "tags": {
                      "_deployed_by_amba": true
                    },
                    "properties": {
                      "description": "Metric Alert for ContainerService managedClusters kube_node_status_allocatable_cpu_cores",
                      "severity": "[[parameters('severity')]",
                      "enabled": "[[parameters('enabled')]",
                      "scopes": [
                        "[[parameters('resourceId')]"
                      ],
                      "evaluationFrequency": "[[parameters('evaluationFrequency')]",
                      "windowSize": "[[parameters('windowSize')]",
                      "criteria": {
                        "allOf": [
                          {
                            "name": "kube_node_status_allocatable_cpu_cores",
                            "metricNamespace": "Microsoft.ContainerService/managedClusters",
                            "metricName": "kube_node_status_allocatable_cpu_cores",
                            "operator": "LessThan",
                            "threshold": "[[parameters('threshold')]",
                            "timeAggregation": "Average",
                            "criterionType": "StaticThresholdCriterion"
                          }
                        ],
                        "odata.type": "Microsoft.Azure.Monitor.SingleResourceMultipleMetricCriteria"
                      },
                      "autoMitigate": "[[parameters('autoMitigate')]",
                      "parameters": {
                        "severity": {
                          "value": "[[parameters('severity')]"
                        },
                        "windowSize": {
                          "value": "[[parameters('windowSize')]"
                        },
                        "evaluationFrequency": {
                          "value": "[[parameters('evaluationFrequency')]"
                        },
                        "autoMitigate": {
                          "value": "[[parameters('autoMitigate')]"
                        },
                        "enabled": {
                          "value": "[[parameters('enabled')]"
                        },
                        "threshold": {
                          "value": "[[parameters('threshold')]"
                        }
                      }
                    }
                  }
                ]
              },
              "parameters": {
                "resourceName": {
                  "value": "[[field('name')]"
                },
                "resourceId": {
                  "value": "[[field('id')]"
                },
                "severity": {
                  "value": "[[parameters('severity')]"
                },
                "windowSize": {
                  "value": "[[parameters('windowSize')]"
                },
                "evaluationFrequency": {
                  "value": "[[parameters('evaluationFrequency')]"
                },
                "autoMitigate": {
                  "value": "[[parameters('autoMitigate')]"
                },
                "enabled": {
                  "value": "[[parameters('enabled')]"
                },
                "threshold": {
                  "value": "[[if(contains(field('tags'), '_amba-kube_node_status_allocatable_cpu_cores-threshold-Override_'), field('tags._amba-kube_node_status_allocatable_cpu_cores-threshold-Override_'), parameters('threshold'))]"
                }
              }
            }
          }
        }
      }
    }
  }
}



kube_node_status_allocatable_memory_bytes - Metric Alert

Total amount of available memory in a managed cluster

Properties:

criterionTypeStaticThresholdCriterion
evaluationFrequencyPT1M
metricNamekube_node_status_allocatable_memory_bytes
metricNamespaceMicrosoft.ContainerService/managedClusters
operatorLessThan
severity3
threshold2.147483648e+09
timeAggregationAverage
windowSizePT5M

References:

Templates:

{
  "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#",
  "contentVersion": "1.0.0.0",
  "parameters": {
    "alertName": {
      "type": "string",
      "minLength": 1,
      "metadata": {
        "description": "Name of the alert"
      }
    },
    "alertDescription": {
      "type": "string",
      "defaultValue": "Total amount of available memory in a managed cluster",
      "metadata": {
        "description": "Description of alert"
      }
    },
    "targetResourceId": {
      "type": "string",
      "minLength": 1,
      "metadata": {
        "description": "List of Azure resource Ids seperated by a comma. For example - /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroup/resource-group-name/Microsoft.compute/virtualMachines/vm-name"
      }
    },
    "targetResourceRegion": {
      "type": "string",
      "metadata": {
        "description": "Azure region in which target resources to be monitored are in (without spaces). For example: EastUS"
      }
    },
    "targetResourceType": {
      "type": "string",
      "minLength": 1,
      "metadata": {
        "description": "Resource type of target resources to be monitored."
      }
    },
    "isEnabled": {
      "type": "bool",
      "defaultValue": true,
      "metadata": {
        "description": "Specifies whether the alert is enabled"
      }
    },
    "alertSeverity": {
      "type": "int",
      "defaultValue": 3,
      "allowedValues": [
        0,
        1,
        2,
        3,
        4
      ],
      "metadata": {
        "description": "Severity of alert {0,1,2,3,4}"
      }
    },
    "operator": {
      "type": "string",
      "defaultValue": "LessThan",
      "allowedValues": [
        "Equals",
        "GreaterThan",
        "GreaterThanOrEqual",
        "LessThan",
        "LessThanOrEqual"
      ],
      "metadata": {
        "description": "Operator comparing the current value with the threshold value."
      }
    },
    "threshold": {
      "type": "string",
      "defaultValue": "2147483648",
      "metadata": {
        "description": "The threshold value at which the alert is activated."
      }
    },
    "timeAggregation": {
      "type": "string",
      "defaultValue": "Average",
      "allowedValues": [
        "Average",
        "Minimum",
        "Maximum",
        "Total",
        "Count"
      ],
      "metadata": {
        "description": "How the data that is collected should be combined over time."
      }
    },
    "windowSize": {
      "type": "string",
      "defaultValue": "PT5M",
      "allowedValues": [
        "PT1M",
        "PT5M",
        "PT15M",
        "PT30M",
        "PT1H",
        "PT6H",
        "PT12H",
        "PT24H",
        "PT1D"
      ],
      "metadata": {
        "description": "Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format."
      }
    },
    "evaluationFrequency": {
      "type": "string",
      "defaultValue": "PT1M",
      "allowedValues": [
        "PT1M",
        "PT5M",
        "PT15M",
        "PT30M",
        "PT1H"
      ],
      "metadata": {
        "description": "how often the metric alert is evaluated represented in ISO 8601 duration format"
      }
    },
    "currentDateTimeUtcNow": {
      "type": "string",
      "defaultValue": "[utcNow()]",
      "metadata": {
          "description": "The current date and time using the utcNow function. Used for deployment name uniqueness"
      }
    },
    "telemetryOptOut": {
      "type": "string",
      "defaultValue": "No",
      "allowedValues": [
          "Yes",
          "No"
      ],
      "metadata": {
          "description": "The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry."
      }
    }
  },
  "variables": {
    "pidDeploymentName": "[take(concat('pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-', uniqueString(resourceGroup().id, parameters('alertName'), parameters('currentDateTimeUtcNow'))), 64)]",
    "varTargetResourceId": "[split(parameters('targetResourceId'), ',')]"
  },
  "resources": [
    {
      "type": "Microsoft.Insights/metricAlerts",
      "apiVersion": "2018-03-01",
      "name": "[parameters('alertName')]",
      "location": "global",
      "tags": {
        "_deployed_by_amba": true
      },
      "properties": {
        "description": "[parameters('alertDescription')]",
        "scopes": "[variables('varTargetResourceId')]",
        "targetResourceType": "[parameters('targetResourceType')]",
        "targetResourceRegion": "[parameters('targetResourceRegion')]",
        "severity": "[parameters('alertSeverity')]",
        "enabled": "[parameters('isEnabled')]",
        "evaluationFrequency": "[parameters('evaluationFrequency')]",
        "windowSize": "[parameters('windowSize')]",
        "criteria": {
          "odata.type": "Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria",
          "allOf": [
            {
              "name": "1st criterion",
              "metricName": "kube_node_status_allocatable_memory_bytes",
              "dimensions": [],
              "operator": "[parameters('operator')]",
              "threshold": "[parameters('threshold')]",
              "timeAggregation": "[parameters('timeAggregation')]",
              "criterionType": "StaticThresholdCriterion"
            }
          ]
        }
      }
    },
    {
      "condition": "[equals(parameters('telemetryOptOut'), 'No')]",
      "apiVersion": "2023-07-01",
      "name": "[variables('pidDeploymentName')]",
      "type": "Microsoft.Resources/deployments",
      "properties": {
          "mode": "Incremental",
          "template": {
              "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#",
              "contentVersion": "1.0.0.0",
              "resources": []
          }
      }
    }
  ]
}
@description('Name of the alert')
@minLength(1)
param alertName string

@description('Description of alert')
param alertDescription string = 'Total amount of available memory in a managed cluster'

@description('Array of Azure resource Ids. For example - /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroup/resource-group-name/Microsoft.compute/virtualMachines/vm-name')
@minLength(1)
param targetResourceId array

@description('Azure region in which target resources to be monitored are in (without spaces). For example: EastUS')
param targetResourceRegion string

@description('Resource type of target resources to be monitored.')
@minLength(1)
param targetResourceType string

@description('Specifies whether the alert is enabled')
param isEnabled bool = true

@description('Severity of alert {0,1,2,3,4}')
@allowed([
  0
  1
  2
  3
  4
])
param alertSeverity int = 3

@description('Operator comparing the current value with the threshold value.')
@allowed([
  'Equals'
  'GreaterThan'
  'GreaterThanOrEqual'
  'LessThan'
  'LessThanOrEqual'
])
param operator string = 'LessThan'

@description('The threshold value at which the alert is activated.')
param threshold int = 2147483648

@description('How the data that is collected should be combined over time.')
@allowed([
  'Average'
  'Minimum'
  'Maximum'
  'Total'
  'Count'
])
param timeAggregation string = 'Average'

@description('Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format.')
@allowed([
  'PT1M'
  'PT5M'
  'PT15M'
  'PT30M'
  'PT1H'
  'PT6H'
  'PT12H'
  'PT24H'
  'P1D'
])
param windowSize string = 'PT5M'

@description('how often the metric alert is evaluated represented in ISO 8601 duration format')
@allowed([
  'PT1M'
  'PT5M'
  'PT15M'
  'PT30M'
  'PT1H'
])
param evaluationFrequency string = 'PT1M'

@description('"The current date and time using the utcNow function. Used for deployment name uniqueness')
param currentDateTimeUtcNow string = utcNow()

@description('The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry.')
@allowed([
  'Yes'
  'No'
])
param telemetryOptOut string = 'No'

resource metricAlert 'Microsoft.Insights/metricAlerts@2018-03-01' = {
  name: alertName
  location: 'global'
  tags: {
    _deployed_by_amba: 'true'
  }
  properties: {
    description: alertDescription
    scopes: targetResourceId
    targetResourceType: targetResourceType
    targetResourceRegion: targetResourceRegion
    severity: alertSeverity
    enabled: isEnabled
    evaluationFrequency: evaluationFrequency
    windowSize: windowSize
    criteria: {
      'odata.type': 'Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria'
      allOf: [
        {
          name: '1st criterion'
          metricName: 'kube_node_status_allocatable_memory_bytes'
          dimensions: [[]]
          operator: operator
          threshold: threshold
          timeAggregation: timeAggregation
          criterionType: 'StaticThresholdCriterion'
        }
      ]
    }
  }
}

var ambaTelemetryPidName = 'pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-${uniqueString(resourceGroup().id, alertName, currentDateTimeUtcNow)}'
resource ambaTelemetryPid 'Microsoft.Resources/deployments@2023-07-01' =  if (telemetryOptOut == 'No') {
  name: ambaTelemetryPidName
  tags: {
    _deployed_by_amba: 'true'
  }
  properties: {
    mode: 'Incremental'
    template: {
      '$schema': 'https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#'
      contentVersion: '1.0.0.0'
      resources: []
    }
  }
}
{
  "type": "Microsoft.Authorization/policyDefinitions",
  "apiVersion": "2021-06-01",
  "name": "14fa63fb-7da9-4d2e-9de9-203c4a3e0401",
  "properties": {
    "policyType": "Custom",
    "mode": "All",
    "displayName": "Deploy ContainerService managedClusters kube_node_status_allocatable_memory_bytes Alert",
    "description": "Policy to Audit/Deploy ContainerService managedClusters kube_node_status_allocatable_memory_bytes Alert",
    "metadata": {
      "version": "1.0.0-preview",
      "category": "ContainerService",
      "preview": true,
      "source": "https://github.com/Azure/azure-monitor-baseline-alerts/",
      "alzCloudEnvironments": [
        "AzureCloud"
      ],
      "_deployed_by_amba": "True"
    },
    "parameters": {
      "severity": {
        "type": "String",
        "metadata": {
          "displayName": "Severity",
          "description": "Severity of the Alert"
        },
        "allowedValues": [
          "0",
          "1",
          "2",
          "3",
          "4"
        ],
        "defaultValue": "3"
      },
      "windowSize": {
        "type": "String",
        "metadata": {
          "displayName": "Window Size",
          "description": "Window size for the alert"
        },
        "allowedValues": [
          "PT1M",
          "PT5M",
          "PT15M",
          "PT30M",
          "PT1H",
          "PT6H",
          "PT12H",
          "P1D"
        ],
        "defaultValue": "PT5M"
      },
      "evaluationFrequency": {
        "type": "String",
        "metadata": {
          "displayName": "Evaluation Frequency",
          "description": "Evaluation frequency for the alert"
        },
        "allowedValues": [
          "PT1M",
          "PT5M",
          "PT15M",
          "PT30M",
          "PT1H"
        ],
        "defaultValue": "PT1M"
      },
      "autoMitigate": {
        "type": "String",
        "metadata": {
          "displayName": "Auto Mitigate",
          "description": "Auto Mitigate for the alert"
        },
        "allowedValues": [
          "true",
          "false"
        ],
        "defaultValue": "true"
      },
      "enabled": {
        "type": "String",
        "metadata": {
          "displayName": "Alert State",
          "description": "Alert state for the alert"
        },
        "allowedValues": [
          "true",
          "false"
        ],
        "defaultValue": "true"
      },
      "threshold": {
        "type": "String",
        "metadata": {
          "displayName": "Threshold",
          "description": "Threshold for the alert"
        },
        "defaultValue": "2147483648"
      },
      "effect": {
        "type": "String",
        "metadata": {
          "displayName": "Effect",
          "description": "Effect of the policy"
        },
        "allowedValues": [
          "deployIfNotExists",
          "disabled"
        ],
        "defaultValue": "deployIfNotExists"
      },
      "MonitorDisableTagName": {
        "type": "String",
        "metadata": {
          "displayName": "Monitoring disabled tag name",
          "description": "Tag name used to disable monitoring at the resource level. Set to true if monitoring should be disabled."
        },
        "defaultValue": "MonitorDisable"
      },
      "MonitorDisableTagValues": {
        "type": "Array",
        "metadata": {
          "displayName": "Monitoring disabled tag values(s)",
          "description": "Tag value(s) used to disable monitoring at the resource level. Set to true if monitoring should be disabled."
        },
        "defaultValue": [
          "true",
          "Test",
          "Dev",
          "Sandbox"
        ]
      }
    },
    "policyRule": {
      "if": {
        "allOf": [
          {
            "field": "type",
            "equals": "Microsoft.ContainerService/managedClusters"
          },
          {
            "field": "[[concat('tags[', parameters('MonitorDisableTagName'), ']')]",
            "notIn": "[[parameters('MonitorDisableTagValues')]"
          }
        ]
      },
      "then": {
        "effect": "[[parameters('effect')]",
        "details": {
          "roleDefinitionIds": [
            "/providers/Microsoft.Authorization/roleDefinitions/b24988ac-6180-42a0-ab88-20f7382dd24c"
          ],
          "type": "Microsoft.Insights/metricAlerts",
          "existenceCondition": {
            "allOf": [
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria.allOf[*].metricNamespace",
                "equals": "Microsoft.ContainerService/managedClusters"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria.allOf[*].metricName",
                "equals": "kube_node_status_allocatable_memory_bytes"
              },
              {
                "field": "Microsoft.Insights/metricalerts/scopes[*]",
                "equals": "[[concat(subscription().id, '/resourceGroups/', resourceGroup().name, '/providers/Microsoft.ContainerService/managedClusters/', field('fullName'))]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/enabled",
                "equals": "[[parameters('enabled')]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/evaluationFrequency",
                "equals": "[[parameters('evaluationFrequency')]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/windowSize",
                "equals": "[[parameters('windowSize')]"
              },
              {
                "field": "Microsoft.Insights/metricalerts/severity",
                "equals": "[[parameters('severity')]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/autoMitigate",
                "equals": "[[parameters('autoMitigate')]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft-Azure-Monitor-SingleResourceMultipleMetricCriteria.allOf[*].timeAggregation",
                "equals": "Average"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria.allOf[*].StaticThresholdCriterion.operator",
                "equals": "LessThan"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria.allOf[*].StaticThresholdCriterion.threshold",
                "equals": "[[if(contains(field('tags'), '_amba-kube_node_status_allocatable_memory_bytes-threshold-Override_'), field('tags._amba-kube_node_status_allocatable_memory_bytes-threshold-Override_'), parameters('threshold'))]"
              }
            ]
          },
          "deployment": {
            "properties": {
              "mode": "incremental",
              "template": {
                "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#",
                "contentVersion": "1.0.0.0",
                "parameters": {
                  "resourceName": {
                    "type": "String",
                    "metadata": {
                      "displayName": "resourceName",
                      "description": "Name of the resource"
                    }
                  },
                  "resourceId": {
                    "type": "String",
                    "metadata": {
                      "displayName": "resourceId",
                      "description": "Resource ID of the resource emitting the metric that will be used for the comparison"
                    }
                  },
                  "severity": {
                    "type": "String"
                  },
                  "windowSize": {
                    "type": "String"
                  },
                  "evaluationFrequency": {
                    "type": "String"
                  },
                  "autoMitigate": {
                    "type": "String"
                  },
                  "enabled": {
                    "type": "String"
                  },
                  "threshold": {
                    "type": "String"
                  }
                },
                "variables": {},
                "resources": [
                  {
                    "type": "Microsoft.Insights/metricAlerts",
                    "apiVersion": "2018-03-01",
                    "name": "[[concat(parameters('resourceName'), '-kube_node_status_allocatable_memory_bytes')]",
                    "location": "global",
                    "tags": {
                      "_deployed_by_amba": true
                    },
                    "properties": {
                      "description": "Metric Alert for ContainerService managedClusters kube_node_status_allocatable_memory_bytes",
                      "severity": "[[parameters('severity')]",
                      "enabled": "[[parameters('enabled')]",
                      "scopes": [
                        "[[parameters('resourceId')]"
                      ],
                      "evaluationFrequency": "[[parameters('evaluationFrequency')]",
                      "windowSize": "[[parameters('windowSize')]",
                      "criteria": {
                        "allOf": [
                          {
                            "name": "kube_node_status_allocatable_memory_bytes",
                            "metricNamespace": "Microsoft.ContainerService/managedClusters",
                            "metricName": "kube_node_status_allocatable_memory_bytes",
                            "operator": "LessThan",
                            "threshold": "[[parameters('threshold')]",
                            "timeAggregation": "Average",
                            "criterionType": "StaticThresholdCriterion"
                          }
                        ],
                        "odata.type": "Microsoft.Azure.Monitor.SingleResourceMultipleMetricCriteria"
                      },
                      "autoMitigate": "[[parameters('autoMitigate')]",
                      "parameters": {
                        "severity": {
                          "value": "[[parameters('severity')]"
                        },
                        "windowSize": {
                          "value": "[[parameters('windowSize')]"
                        },
                        "evaluationFrequency": {
                          "value": "[[parameters('evaluationFrequency')]"
                        },
                        "autoMitigate": {
                          "value": "[[parameters('autoMitigate')]"
                        },
                        "enabled": {
                          "value": "[[parameters('enabled')]"
                        },
                        "threshold": {
                          "value": "[[parameters('threshold')]"
                        }
                      }
                    }
                  }
                ]
              },
              "parameters": {
                "resourceName": {
                  "value": "[[field('name')]"
                },
                "resourceId": {
                  "value": "[[field('id')]"
                },
                "severity": {
                  "value": "[[parameters('severity')]"
                },
                "windowSize": {
                  "value": "[[parameters('windowSize')]"
                },
                "evaluationFrequency": {
                  "value": "[[parameters('evaluationFrequency')]"
                },
                "autoMitigate": {
                  "value": "[[parameters('autoMitigate')]"
                },
                "enabled": {
                  "value": "[[parameters('enabled')]"
                },
                "threshold": {
                  "value": "[[if(contains(field('tags'), '_amba-kube_node_status_allocatable_memory_bytes-threshold-Override_'), field('tags._amba-kube_node_status_allocatable_memory_bytes-threshold-Override_'), parameters('threshold'))]"
                }
              }
            }
          }
        }
      }
    }
  }
}



kube_node_status_condition - Metric Alert

Statuses for various node conditions

Properties:

criterionTypeStaticThresholdCriterion
dimensions
[
  {
    "name": "status2",
    "operator": "include",
    "values": [
      "notready",
      "unknown"
    ]
  }
]
evaluationFrequencyPT1M
metricNamekube_node_status_condition
metricNamespaceMicrosoft.ContainerService/managedClusters
operatorGreaterThan
severity3
threshold0
timeAggregationTotal
windowSizePT5M

References:

Templates:

{
  "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#",
  "contentVersion": "1.0.0.0",
  "parameters": {
    "alertName": {
      "type": "string",
      "minLength": 1,
      "metadata": {
        "description": "Name of the alert"
      }
    },
    "alertDescription": {
      "type": "string",
      "defaultValue": "Statuses for various node conditions",
      "metadata": {
        "description": "Description of alert"
      }
    },
    "targetResourceId": {
      "type": "string",
      "minLength": 1,
      "metadata": {
        "description": "List of Azure resource Ids seperated by a comma. For example - /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroup/resource-group-name/Microsoft.compute/virtualMachines/vm-name"
      }
    },
    "targetResourceRegion": {
      "type": "string",
      "metadata": {
        "description": "Azure region in which target resources to be monitored are in (without spaces). For example: EastUS"
      }
    },
    "targetResourceType": {
      "type": "string",
      "minLength": 1,
      "metadata": {
        "description": "Resource type of target resources to be monitored."
      }
    },
    "isEnabled": {
      "type": "bool",
      "defaultValue": true,
      "metadata": {
        "description": "Specifies whether the alert is enabled"
      }
    },
    "alertSeverity": {
      "type": "int",
      "defaultValue": 3,
      "allowedValues": [
        0,
        1,
        2,
        3,
        4
      ],
      "metadata": {
        "description": "Severity of alert {0,1,2,3,4}"
      }
    },
    "operator": {
      "type": "string",
      "defaultValue": "GreaterThan",
      "allowedValues": [
        "Equals",
        "GreaterThan",
        "GreaterThanOrEqual",
        "LessThan",
        "LessThanOrEqual"
      ],
      "metadata": {
        "description": "Operator comparing the current value with the threshold value."
      }
    },
    "threshold": {
      "type": "string",
      "defaultValue": "0",
      "metadata": {
        "description": "The threshold value at which the alert is activated."
      }
    },
    "timeAggregation": {
      "type": "string",
      "defaultValue": "Total",
      "allowedValues": [
        "Average",
        "Minimum",
        "Maximum",
        "Total",
        "Count"
      ],
      "metadata": {
        "description": "How the data that is collected should be combined over time."
      }
    },
    "windowSize": {
      "type": "string",
      "defaultValue": "PT5M",
      "allowedValues": [
        "PT1M",
        "PT5M",
        "PT15M",
        "PT30M",
        "PT1H",
        "PT6H",
        "PT12H",
        "PT24H",
        "PT1D"
      ],
      "metadata": {
        "description": "Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format."
      }
    },
    "evaluationFrequency": {
      "type": "string",
      "defaultValue": "PT1M",
      "allowedValues": [
        "PT1M",
        "PT5M",
        "PT15M",
        "PT30M",
        "PT1H"
      ],
      "metadata": {
        "description": "how often the metric alert is evaluated represented in ISO 8601 duration format"
      }
    },
    "currentDateTimeUtcNow": {
      "type": "string",
      "defaultValue": "[utcNow()]",
      "metadata": {
          "description": "The current date and time using the utcNow function. Used for deployment name uniqueness"
      }
    },
    "telemetryOptOut": {
      "type": "string",
      "defaultValue": "No",
      "allowedValues": [
          "Yes",
          "No"
      ],
      "metadata": {
          "description": "The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry."
      }
    }
  },
  "variables": {
    "pidDeploymentName": "[take(concat('pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-', uniqueString(resourceGroup().id, parameters('alertName'), parameters('currentDateTimeUtcNow'))), 64)]",
    "varTargetResourceId": "[split(parameters('targetResourceId'), ',')]"
  },
  "resources": [
    {
      "type": "Microsoft.Insights/metricAlerts",
      "apiVersion": "2018-03-01",
      "name": "[parameters('alertName')]",
      "location": "global",
      "tags": {
        "_deployed_by_amba": true
      },
      "properties": {
        "description": "[parameters('alertDescription')]",
        "scopes": "[variables('varTargetResourceId')]",
        "targetResourceType": "[parameters('targetResourceType')]",
        "targetResourceRegion": "[parameters('targetResourceRegion')]",
        "severity": "[parameters('alertSeverity')]",
        "enabled": "[parameters('isEnabled')]",
        "evaluationFrequency": "[parameters('evaluationFrequency')]",
        "windowSize": "[parameters('windowSize')]",
        "criteria": {
          "odata.type": "Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria",
          "allOf": [
            {
              "name": "1st criterion",
              "metricName": "kube_node_status_condition",
              "dimensions": [{"operator": "include", "name": "status2", "values": ["notready", "unknown"]}],
              "operator": "[parameters('operator')]",
              "threshold": "[parameters('threshold')]",
              "timeAggregation": "[parameters('timeAggregation')]",
              "criterionType": "StaticThresholdCriterion"
            }
          ]
        }
      }
    },
    {
      "condition": "[equals(parameters('telemetryOptOut'), 'No')]",
      "apiVersion": "2023-07-01",
      "name": "[variables('pidDeploymentName')]",
      "type": "Microsoft.Resources/deployments",
      "properties": {
          "mode": "Incremental",
          "template": {
              "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#",
              "contentVersion": "1.0.0.0",
              "resources": []
          }
      }
    }
  ]
}
@description('Name of the alert')
@minLength(1)
param alertName string

@description('Description of alert')
param alertDescription string = 'Statuses for various node conditions'

@description('Array of Azure resource Ids. For example - /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroup/resource-group-name/Microsoft.compute/virtualMachines/vm-name')
@minLength(1)
param targetResourceId array

@description('Azure region in which target resources to be monitored are in (without spaces). For example: EastUS')
param targetResourceRegion string

@description('Resource type of target resources to be monitored.')
@minLength(1)
param targetResourceType string

@description('Specifies whether the alert is enabled')
param isEnabled bool = true

@description('Severity of alert {0,1,2,3,4}')
@allowed([
  0
  1
  2
  3
  4
])
param alertSeverity int = 3

@description('Operator comparing the current value with the threshold value.')
@allowed([
  'Equals'
  'GreaterThan'
  'GreaterThanOrEqual'
  'LessThan'
  'LessThanOrEqual'
])
param operator string = 'GreaterThan'

@description('The threshold value at which the alert is activated.')
param threshold int = 0

@description('How the data that is collected should be combined over time.')
@allowed([
  'Average'
  'Minimum'
  'Maximum'
  'Total'
  'Count'
])
param timeAggregation string = 'Total'

@description('Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format.')
@allowed([
  'PT1M'
  'PT5M'
  'PT15M'
  'PT30M'
  'PT1H'
  'PT6H'
  'PT12H'
  'PT24H'
  'P1D'
])
param windowSize string = 'PT5M'

@description('how often the metric alert is evaluated represented in ISO 8601 duration format')
@allowed([
  'PT1M'
  'PT5M'
  'PT15M'
  'PT30M'
  'PT1H'
])
param evaluationFrequency string = 'PT1M'

@description('"The current date and time using the utcNow function. Used for deployment name uniqueness')
param currentDateTimeUtcNow string = utcNow()

@description('The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry.')
@allowed([
  'Yes'
  'No'
])
param telemetryOptOut string = 'No'

resource metricAlert 'Microsoft.Insights/metricAlerts@2018-03-01' = {
  name: alertName
  location: 'global'
  tags: {
    _deployed_by_amba: 'true'
  }
  properties: {
    description: alertDescription
    scopes: targetResourceId
    targetResourceType: targetResourceType
    targetResourceRegion: targetResourceRegion
    severity: alertSeverity
    enabled: isEnabled
    evaluationFrequency: evaluationFrequency
    windowSize: windowSize
    criteria: {
      'odata.type': 'Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria'
      allOf: [
        {
          name: '1st criterion'
          metricName: 'kube_node_status_condition'
          dimensions: [
            {
              name: 'status2'
              operator: 'include'
              values: ['notready','unknown']
            }]
          operator: operator
          threshold: threshold
          timeAggregation: timeAggregation
          criterionType: 'StaticThresholdCriterion'
        }
      ]
    }
  }
}

var ambaTelemetryPidName = 'pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-${uniqueString(resourceGroup().id, alertName, currentDateTimeUtcNow)}'
resource ambaTelemetryPid 'Microsoft.Resources/deployments@2023-07-01' =  if (telemetryOptOut == 'No') {
  name: ambaTelemetryPidName
  tags: {
    _deployed_by_amba: 'true'
  }
  properties: {
    mode: 'Incremental'
    template: {
      '$schema': 'https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#'
      contentVersion: '1.0.0.0'
      resources: []
    }
  }
}
{
  "type": "Microsoft.Authorization/policyDefinitions",
  "apiVersion": "2021-06-01",
  "name": "ebbbbb92-5208-4d52-b407-6bea8e4473b9",
  "properties": {
    "policyType": "Custom",
    "mode": "All",
    "displayName": "Deploy ContainerService managedClusters kube_node_status_condition Alert",
    "description": "Policy to Audit/Deploy ContainerService managedClusters kube_node_status_condition Alert",
    "metadata": {
      "version": "1.0.0-preview",
      "category": "ContainerService",
      "preview": true,
      "source": "https://github.com/Azure/azure-monitor-baseline-alerts/",
      "alzCloudEnvironments": [
        "AzureCloud"
      ],
      "_deployed_by_amba": "True"
    },
    "parameters": {
      "severity": {
        "type": "String",
        "metadata": {
          "displayName": "Severity",
          "description": "Severity of the Alert"
        },
        "allowedValues": [
          "0",
          "1",
          "2",
          "3",
          "4"
        ],
        "defaultValue": "3"
      },
      "windowSize": {
        "type": "String",
        "metadata": {
          "displayName": "Window Size",
          "description": "Window size for the alert"
        },
        "allowedValues": [
          "PT1M",
          "PT5M",
          "PT15M",
          "PT30M",
          "PT1H",
          "PT6H",
          "PT12H",
          "P1D"
        ],
        "defaultValue": "PT5M"
      },
      "evaluationFrequency": {
        "type": "String",
        "metadata": {
          "displayName": "Evaluation Frequency",
          "description": "Evaluation frequency for the alert"
        },
        "allowedValues": [
          "PT1M",
          "PT5M",
          "PT15M",
          "PT30M",
          "PT1H"
        ],
        "defaultValue": "PT1M"
      },
      "autoMitigate": {
        "type": "String",
        "metadata": {
          "displayName": "Auto Mitigate",
          "description": "Auto Mitigate for the alert"
        },
        "allowedValues": [
          "true",
          "false"
        ],
        "defaultValue": "true"
      },
      "enabled": {
        "type": "String",
        "metadata": {
          "displayName": "Alert State",
          "description": "Alert state for the alert"
        },
        "allowedValues": [
          "true",
          "false"
        ],
        "defaultValue": "true"
      },
      "threshold": {
        "type": "String",
        "metadata": {
          "displayName": "Threshold",
          "description": "Threshold for the alert"
        },
        "defaultValue": "0"
      },
      "effect": {
        "type": "String",
        "metadata": {
          "displayName": "Effect",
          "description": "Effect of the policy"
        },
        "allowedValues": [
          "deployIfNotExists",
          "disabled"
        ],
        "defaultValue": "deployIfNotExists"
      },
      "MonitorDisableTagName": {
        "type": "String",
        "metadata": {
          "displayName": "Monitoring disabled tag name",
          "description": "Tag name used to disable monitoring at the resource level. Set to true if monitoring should be disabled."
        },
        "defaultValue": "MonitorDisable"
      },
      "MonitorDisableTagValues": {
        "type": "Array",
        "metadata": {
          "displayName": "Monitoring disabled tag values(s)",
          "description": "Tag value(s) used to disable monitoring at the resource level. Set to true if monitoring should be disabled."
        },
        "defaultValue": [
          "true",
          "Test",
          "Dev",
          "Sandbox"
        ]
      }
    },
    "policyRule": {
      "if": {
        "allOf": [
          {
            "field": "type",
            "equals": "Microsoft.ContainerService/managedClusters"
          },
          {
            "field": "[[concat('tags[', parameters('MonitorDisableTagName'), ']')]",
            "notIn": "[[parameters('MonitorDisableTagValues')]"
          }
        ]
      },
      "then": {
        "effect": "[[parameters('effect')]",
        "details": {
          "roleDefinitionIds": [
            "/providers/Microsoft.Authorization/roleDefinitions/b24988ac-6180-42a0-ab88-20f7382dd24c"
          ],
          "type": "Microsoft.Insights/metricAlerts",
          "existenceCondition": {
            "allOf": [
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria.allOf[*].metricNamespace",
                "equals": "Microsoft.ContainerService/managedClusters"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria.allOf[*].metricName",
                "equals": "kube_node_status_condition"
              },
              {
                "field": "Microsoft.Insights/metricalerts/scopes[*]",
                "equals": "[[concat(subscription().id, '/resourceGroups/', resourceGroup().name, '/providers/Microsoft.ContainerService/managedClusters/', field('fullName'))]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/enabled",
                "equals": "[[parameters('enabled')]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/evaluationFrequency",
                "equals": "[[parameters('evaluationFrequency')]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/windowSize",
                "equals": "[[parameters('windowSize')]"
              },
              {
                "field": "Microsoft.Insights/metricalerts/severity",
                "equals": "[[parameters('severity')]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/autoMitigate",
                "equals": "[[parameters('autoMitigate')]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft-Azure-Monitor-SingleResourceMultipleMetricCriteria.allOf[*].timeAggregation",
                "equals": "Total"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria.allOf[*].StaticThresholdCriterion.operator",
                "equals": "GreaterThan"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria.allOf[*].StaticThresholdCriterion.threshold",
                "equals": "[[if(contains(field('tags'), '_amba-kube_node_status_condition-threshold-Override_'), field('tags._amba-kube_node_status_condition-threshold-Override_'), parameters('threshold'))]"
              }
            ]
          },
          "deployment": {
            "properties": {
              "mode": "incremental",
              "template": {
                "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#",
                "contentVersion": "1.0.0.0",
                "parameters": {
                  "resourceName": {
                    "type": "String",
                    "metadata": {
                      "displayName": "resourceName",
                      "description": "Name of the resource"
                    }
                  },
                  "resourceId": {
                    "type": "String",
                    "metadata": {
                      "displayName": "resourceId",
                      "description": "Resource ID of the resource emitting the metric that will be used for the comparison"
                    }
                  },
                  "severity": {
                    "type": "String"
                  },
                  "windowSize": {
                    "type": "String"
                  },
                  "evaluationFrequency": {
                    "type": "String"
                  },
                  "autoMitigate": {
                    "type": "String"
                  },
                  "enabled": {
                    "type": "String"
                  },
                  "threshold": {
                    "type": "String"
                  }
                },
                "variables": {},
                "resources": [
                  {
                    "type": "Microsoft.Insights/metricAlerts",
                    "apiVersion": "2018-03-01",
                    "name": "[[concat(parameters('resourceName'), '-kube_node_status_condition')]",
                    "location": "global",
                    "tags": {
                      "_deployed_by_amba": true
                    },
                    "properties": {
                      "description": "Metric Alert for ContainerService managedClusters kube_node_status_condition",
                      "severity": "[[parameters('severity')]",
                      "enabled": "[[parameters('enabled')]",
                      "scopes": [
                        "[[parameters('resourceId')]"
                      ],
                      "evaluationFrequency": "[[parameters('evaluationFrequency')]",
                      "windowSize": "[[parameters('windowSize')]",
                      "criteria": {
                        "allOf": [
                          {
                            "name": "kube_node_status_condition",
                            "metricNamespace": "Microsoft.ContainerService/managedClusters",
                            "metricName": "kube_node_status_condition",
                            "operator": "GreaterThan",
                            "threshold": "[[parameters('threshold')]",
                            "timeAggregation": "Total",
                            "criterionType": "StaticThresholdCriterion"
                          }
                        ],
                        "odata.type": "Microsoft.Azure.Monitor.SingleResourceMultipleMetricCriteria"
                      },
                      "autoMitigate": "[[parameters('autoMitigate')]",
                      "parameters": {
                        "severity": {
                          "value": "[[parameters('severity')]"
                        },
                        "windowSize": {
                          "value": "[[parameters('windowSize')]"
                        },
                        "evaluationFrequency": {
                          "value": "[[parameters('evaluationFrequency')]"
                        },
                        "autoMitigate": {
                          "value": "[[parameters('autoMitigate')]"
                        },
                        "enabled": {
                          "value": "[[parameters('enabled')]"
                        },
                        "threshold": {
                          "value": "[[parameters('threshold')]"
                        }
                      }
                    }
                  }
                ]
              },
              "parameters": {
                "resourceName": {
                  "value": "[[field('name')]"
                },
                "resourceId": {
                  "value": "[[field('id')]"
                },
                "severity": {
                  "value": "[[parameters('severity')]"
                },
                "windowSize": {
                  "value": "[[parameters('windowSize')]"
                },
                "evaluationFrequency": {
                  "value": "[[parameters('evaluationFrequency')]"
                },
                "autoMitigate": {
                  "value": "[[parameters('autoMitigate')]"
                },
                "enabled": {
                  "value": "[[parameters('enabled')]"
                },
                "threshold": {
                  "value": "[[if(contains(field('tags'), '_amba-kube_node_status_condition-threshold-Override_'), field('tags._amba-kube_node_status_condition-threshold-Override_'), parameters('threshold'))]"
                }
              }
            }
          }
        }
      }
    }
  }
}



kube_pod_status_phase - Metric Alert

Number of pods by phase

Properties:

criterionTypeStaticThresholdCriterion
dimensions
[
  {
    "name": "phase",
    "operator": "include",
    "values": [
      "failed"
    ]
  }
]
evaluationFrequencyPT1M
metricNamekube_pod_status_phase
metricNamespaceMicrosoft.ContainerService/managedClusters
operatorGreaterThan
severity3
threshold0
timeAggregationAverage
windowSizePT5M

References:

Templates:

{
  "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#",
  "contentVersion": "1.0.0.0",
  "parameters": {
    "alertName": {
      "type": "string",
      "minLength": 1,
      "metadata": {
        "description": "Name of the alert"
      }
    },
    "alertDescription": {
      "type": "string",
      "defaultValue": "Number of pods by phase",
      "metadata": {
        "description": "Description of alert"
      }
    },
    "targetResourceId": {
      "type": "string",
      "minLength": 1,
      "metadata": {
        "description": "List of Azure resource Ids seperated by a comma. For example - /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroup/resource-group-name/Microsoft.compute/virtualMachines/vm-name"
      }
    },
    "targetResourceRegion": {
      "type": "string",
      "metadata": {
        "description": "Azure region in which target resources to be monitored are in (without spaces). For example: EastUS"
      }
    },
    "targetResourceType": {
      "type": "string",
      "minLength": 1,
      "metadata": {
        "description": "Resource type of target resources to be monitored."
      }
    },
    "isEnabled": {
      "type": "bool",
      "defaultValue": true,
      "metadata": {
        "description": "Specifies whether the alert is enabled"
      }
    },
    "alertSeverity": {
      "type": "int",
      "defaultValue": 3,
      "allowedValues": [
        0,
        1,
        2,
        3,
        4
      ],
      "metadata": {
        "description": "Severity of alert {0,1,2,3,4}"
      }
    },
    "operator": {
      "type": "string",
      "defaultValue": "GreaterThan",
      "allowedValues": [
        "Equals",
        "GreaterThan",
        "GreaterThanOrEqual",
        "LessThan",
        "LessThanOrEqual"
      ],
      "metadata": {
        "description": "Operator comparing the current value with the threshold value."
      }
    },
    "threshold": {
      "type": "string",
      "defaultValue": "0",
      "metadata": {
        "description": "The threshold value at which the alert is activated."
      }
    },
    "timeAggregation": {
      "type": "string",
      "defaultValue": "Average",
      "allowedValues": [
        "Average",
        "Minimum",
        "Maximum",
        "Total",
        "Count"
      ],
      "metadata": {
        "description": "How the data that is collected should be combined over time."
      }
    },
    "windowSize": {
      "type": "string",
      "defaultValue": "PT5M",
      "allowedValues": [
        "PT1M",
        "PT5M",
        "PT15M",
        "PT30M",
        "PT1H",
        "PT6H",
        "PT12H",
        "PT24H",
        "PT1D"
      ],
      "metadata": {
        "description": "Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format."
      }
    },
    "evaluationFrequency": {
      "type": "string",
      "defaultValue": "PT1M",
      "allowedValues": [
        "PT1M",
        "PT5M",
        "PT15M",
        "PT30M",
        "PT1H"
      ],
      "metadata": {
        "description": "how often the metric alert is evaluated represented in ISO 8601 duration format"
      }
    },
    "currentDateTimeUtcNow": {
      "type": "string",
      "defaultValue": "[utcNow()]",
      "metadata": {
          "description": "The current date and time using the utcNow function. Used for deployment name uniqueness"
      }
    },
    "telemetryOptOut": {
      "type": "string",
      "defaultValue": "No",
      "allowedValues": [
          "Yes",
          "No"
      ],
      "metadata": {
          "description": "The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry."
      }
    }
  },
  "variables": {
    "pidDeploymentName": "[take(concat('pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-', uniqueString(resourceGroup().id, parameters('alertName'), parameters('currentDateTimeUtcNow'))), 64)]",
    "varTargetResourceId": "[split(parameters('targetResourceId'), ',')]"
  },
  "resources": [
    {
      "type": "Microsoft.Insights/metricAlerts",
      "apiVersion": "2018-03-01",
      "name": "[parameters('alertName')]",
      "location": "global",
      "tags": {
        "_deployed_by_amba": true
      },
      "properties": {
        "description": "[parameters('alertDescription')]",
        "scopes": "[variables('varTargetResourceId')]",
        "targetResourceType": "[parameters('targetResourceType')]",
        "targetResourceRegion": "[parameters('targetResourceRegion')]",
        "severity": "[parameters('alertSeverity')]",
        "enabled": "[parameters('isEnabled')]",
        "evaluationFrequency": "[parameters('evaluationFrequency')]",
        "windowSize": "[parameters('windowSize')]",
        "criteria": {
          "odata.type": "Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria",
          "allOf": [
            {
              "name": "1st criterion",
              "metricName": "kube_pod_status_phase",
              "dimensions": [{"operator": "include", "name": "phase", "values": ["failed"]}],
              "operator": "[parameters('operator')]",
              "threshold": "[parameters('threshold')]",
              "timeAggregation": "[parameters('timeAggregation')]",
              "criterionType": "StaticThresholdCriterion"
            }
          ]
        }
      }
    },
    {
      "condition": "[equals(parameters('telemetryOptOut'), 'No')]",
      "apiVersion": "2023-07-01",
      "name": "[variables('pidDeploymentName')]",
      "type": "Microsoft.Resources/deployments",
      "properties": {
          "mode": "Incremental",
          "template": {
              "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#",
              "contentVersion": "1.0.0.0",
              "resources": []
          }
      }
    }
  ]
}
@description('Name of the alert')
@minLength(1)
param alertName string

@description('Description of alert')
param alertDescription string = 'Number of pods by phase'

@description('Array of Azure resource Ids. For example - /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroup/resource-group-name/Microsoft.compute/virtualMachines/vm-name')
@minLength(1)
param targetResourceId array

@description('Azure region in which target resources to be monitored are in (without spaces). For example: EastUS')
param targetResourceRegion string

@description('Resource type of target resources to be monitored.')
@minLength(1)
param targetResourceType string

@description('Specifies whether the alert is enabled')
param isEnabled bool = true

@description('Severity of alert {0,1,2,3,4}')
@allowed([
  0
  1
  2
  3
  4
])
param alertSeverity int = 3

@description('Operator comparing the current value with the threshold value.')
@allowed([
  'Equals'
  'GreaterThan'
  'GreaterThanOrEqual'
  'LessThan'
  'LessThanOrEqual'
])
param operator string = 'GreaterThan'

@description('The threshold value at which the alert is activated.')
param threshold int = 0

@description('How the data that is collected should be combined over time.')
@allowed([
  'Average'
  'Minimum'
  'Maximum'
  'Total'
  'Count'
])
param timeAggregation string = 'Average'

@description('Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format.')
@allowed([
  'PT1M'
  'PT5M'
  'PT15M'
  'PT30M'
  'PT1H'
  'PT6H'
  'PT12H'
  'PT24H'
  'P1D'
])
param windowSize string = 'PT5M'

@description('how often the metric alert is evaluated represented in ISO 8601 duration format')
@allowed([
  'PT1M'
  'PT5M'
  'PT15M'
  'PT30M'
  'PT1H'
])
param evaluationFrequency string = 'PT1M'

@description('"The current date and time using the utcNow function. Used for deployment name uniqueness')
param currentDateTimeUtcNow string = utcNow()

@description('The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry.')
@allowed([
  'Yes'
  'No'
])
param telemetryOptOut string = 'No'

resource metricAlert 'Microsoft.Insights/metricAlerts@2018-03-01' = {
  name: alertName
  location: 'global'
  tags: {
    _deployed_by_amba: 'true'
  }
  properties: {
    description: alertDescription
    scopes: targetResourceId
    targetResourceType: targetResourceType
    targetResourceRegion: targetResourceRegion
    severity: alertSeverity
    enabled: isEnabled
    evaluationFrequency: evaluationFrequency
    windowSize: windowSize
    criteria: {
      'odata.type': 'Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria'
      allOf: [
        {
          name: '1st criterion'
          metricName: 'kube_pod_status_phase'
          dimensions: [
            {
              name: 'phase'
              operator: 'include'
              values: ['failed']
            }]
          operator: operator
          threshold: threshold
          timeAggregation: timeAggregation
          criterionType: 'StaticThresholdCriterion'
        }
      ]
    }
  }
}

var ambaTelemetryPidName = 'pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-${uniqueString(resourceGroup().id, alertName, currentDateTimeUtcNow)}'
resource ambaTelemetryPid 'Microsoft.Resources/deployments@2023-07-01' =  if (telemetryOptOut == 'No') {
  name: ambaTelemetryPidName
  tags: {
    _deployed_by_amba: 'true'
  }
  properties: {
    mode: 'Incremental'
    template: {
      '$schema': 'https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#'
      contentVersion: '1.0.0.0'
      resources: []
    }
  }
}
{
  "type": "Microsoft.Authorization/policyDefinitions",
  "apiVersion": "2021-06-01",
  "name": "e38e6e11-ac88-4014-98c8-8e64f70b832a",
  "properties": {
    "policyType": "Custom",
    "mode": "All",
    "displayName": "Deploy ContainerService managedClusters kube_pod_status_phase Alert",
    "description": "Policy to Audit/Deploy ContainerService managedClusters kube_pod_status_phase Alert",
    "metadata": {
      "version": "1.0.0-preview",
      "category": "ContainerService",
      "preview": true,
      "source": "https://github.com/Azure/azure-monitor-baseline-alerts/",
      "alzCloudEnvironments": [
        "AzureCloud"
      ],
      "_deployed_by_amba": "True"
    },
    "parameters": {
      "severity": {
        "type": "String",
        "metadata": {
          "displayName": "Severity",
          "description": "Severity of the Alert"
        },
        "allowedValues": [
          "0",
          "1",
          "2",
          "3",
          "4"
        ],
        "defaultValue": "3"
      },
      "windowSize": {
        "type": "String",
        "metadata": {
          "displayName": "Window Size",
          "description": "Window size for the alert"
        },
        "allowedValues": [
          "PT1M",
          "PT5M",
          "PT15M",
          "PT30M",
          "PT1H",
          "PT6H",
          "PT12H",
          "P1D"
        ],
        "defaultValue": "PT5M"
      },
      "evaluationFrequency": {
        "type": "String",
        "metadata": {
          "displayName": "Evaluation Frequency",
          "description": "Evaluation frequency for the alert"
        },
        "allowedValues": [
          "PT1M",
          "PT5M",
          "PT15M",
          "PT30M",
          "PT1H"
        ],
        "defaultValue": "PT1M"
      },
      "autoMitigate": {
        "type": "String",
        "metadata": {
          "displayName": "Auto Mitigate",
          "description": "Auto Mitigate for the alert"
        },
        "allowedValues": [
          "true",
          "false"
        ],
        "defaultValue": "true"
      },
      "enabled": {
        "type": "String",
        "metadata": {
          "displayName": "Alert State",
          "description": "Alert state for the alert"
        },
        "allowedValues": [
          "true",
          "false"
        ],
        "defaultValue": "true"
      },
      "threshold": {
        "type": "String",
        "metadata": {
          "displayName": "Threshold",
          "description": "Threshold for the alert"
        },
        "defaultValue": "0"
      },
      "effect": {
        "type": "String",
        "metadata": {
          "displayName": "Effect",
          "description": "Effect of the policy"
        },
        "allowedValues": [
          "deployIfNotExists",
          "disabled"
        ],
        "defaultValue": "deployIfNotExists"
      },
      "MonitorDisableTagName": {
        "type": "String",
        "metadata": {
          "displayName": "Monitoring disabled tag name",
          "description": "Tag name used to disable monitoring at the resource level. Set to true if monitoring should be disabled."
        },
        "defaultValue": "MonitorDisable"
      },
      "MonitorDisableTagValues": {
        "type": "Array",
        "metadata": {
          "displayName": "Monitoring disabled tag values(s)",
          "description": "Tag value(s) used to disable monitoring at the resource level. Set to true if monitoring should be disabled."
        },
        "defaultValue": [
          "true",
          "Test",
          "Dev",
          "Sandbox"
        ]
      }
    },
    "policyRule": {
      "if": {
        "allOf": [
          {
            "field": "type",
            "equals": "Microsoft.ContainerService/managedClusters"
          },
          {
            "field": "[[concat('tags[', parameters('MonitorDisableTagName'), ']')]",
            "notIn": "[[parameters('MonitorDisableTagValues')]"
          }
        ]
      },
      "then": {
        "effect": "[[parameters('effect')]",
        "details": {
          "roleDefinitionIds": [
            "/providers/Microsoft.Authorization/roleDefinitions/b24988ac-6180-42a0-ab88-20f7382dd24c"
          ],
          "type": "Microsoft.Insights/metricAlerts",
          "existenceCondition": {
            "allOf": [
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria.allOf[*].metricNamespace",
                "equals": "Microsoft.ContainerService/managedClusters"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria.allOf[*].metricName",
                "equals": "kube_pod_status_phase"
              },
              {
                "field": "Microsoft.Insights/metricalerts/scopes[*]",
                "equals": "[[concat(subscription().id, '/resourceGroups/', resourceGroup().name, '/providers/Microsoft.ContainerService/managedClusters/', field('fullName'))]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/enabled",
                "equals": "[[parameters('enabled')]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/evaluationFrequency",
                "equals": "[[parameters('evaluationFrequency')]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/windowSize",
                "equals": "[[parameters('windowSize')]"
              },
              {
                "field": "Microsoft.Insights/metricalerts/severity",
                "equals": "[[parameters('severity')]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/autoMitigate",
                "equals": "[[parameters('autoMitigate')]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft-Azure-Monitor-SingleResourceMultipleMetricCriteria.allOf[*].timeAggregation",
                "equals": "Average"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria.allOf[*].StaticThresholdCriterion.operator",
                "equals": "GreaterThan"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria.allOf[*].StaticThresholdCriterion.threshold",
                "equals": "[[if(contains(field('tags'), '_amba-kube_pod_status_phase-threshold-Override_'), field('tags._amba-kube_pod_status_phase-threshold-Override_'), parameters('threshold'))]"
              }
            ]
          },
          "deployment": {
            "properties": {
              "mode": "incremental",
              "template": {
                "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#",
                "contentVersion": "1.0.0.0",
                "parameters": {
                  "resourceName": {
                    "type": "String",
                    "metadata": {
                      "displayName": "resourceName",
                      "description": "Name of the resource"
                    }
                  },
                  "resourceId": {
                    "type": "String",
                    "metadata": {
                      "displayName": "resourceId",
                      "description": "Resource ID of the resource emitting the metric that will be used for the comparison"
                    }
                  },
                  "severity": {
                    "type": "String"
                  },
                  "windowSize": {
                    "type": "String"
                  },
                  "evaluationFrequency": {
                    "type": "String"
                  },
                  "autoMitigate": {
                    "type": "String"
                  },
                  "enabled": {
                    "type": "String"
                  },
                  "threshold": {
                    "type": "String"
                  }
                },
                "variables": {},
                "resources": [
                  {
                    "type": "Microsoft.Insights/metricAlerts",
                    "apiVersion": "2018-03-01",
                    "name": "[[concat(parameters('resourceName'), '-kube_pod_status_phase')]",
                    "location": "global",
                    "tags": {
                      "_deployed_by_amba": true
                    },
                    "properties": {
                      "description": "Metric Alert for ContainerService managedClusters kube_pod_status_phase",
                      "severity": "[[parameters('severity')]",
                      "enabled": "[[parameters('enabled')]",
                      "scopes": [
                        "[[parameters('resourceId')]"
                      ],
                      "evaluationFrequency": "[[parameters('evaluationFrequency')]",
                      "windowSize": "[[parameters('windowSize')]",
                      "criteria": {
                        "allOf": [
                          {
                            "name": "kube_pod_status_phase",
                            "metricNamespace": "Microsoft.ContainerService/managedClusters",
                            "metricName": "kube_pod_status_phase",
                            "operator": "GreaterThan",
                            "threshold": "[[parameters('threshold')]",
                            "timeAggregation": "Average",
                            "criterionType": "StaticThresholdCriterion"
                          }
                        ],
                        "odata.type": "Microsoft.Azure.Monitor.SingleResourceMultipleMetricCriteria"
                      },
                      "autoMitigate": "[[parameters('autoMitigate')]",
                      "parameters": {
                        "severity": {
                          "value": "[[parameters('severity')]"
                        },
                        "windowSize": {
                          "value": "[[parameters('windowSize')]"
                        },
                        "evaluationFrequency": {
                          "value": "[[parameters('evaluationFrequency')]"
                        },
                        "autoMitigate": {
                          "value": "[[parameters('autoMitigate')]"
                        },
                        "enabled": {
                          "value": "[[parameters('enabled')]"
                        },
                        "threshold": {
                          "value": "[[parameters('threshold')]"
                        }
                      }
                    }
                  }
                ]
              },
              "parameters": {
                "resourceName": {
                  "value": "[[field('name')]"
                },
                "resourceId": {
                  "value": "[[field('id')]"
                },
                "severity": {
                  "value": "[[parameters('severity')]"
                },
                "windowSize": {
                  "value": "[[parameters('windowSize')]"
                },
                "evaluationFrequency": {
                  "value": "[[parameters('evaluationFrequency')]"
                },
                "autoMitigate": {
                  "value": "[[parameters('autoMitigate')]"
                },
                "enabled": {
                  "value": "[[parameters('enabled')]"
                },
                "threshold": {
                  "value": "[[if(contains(field('tags'), '_amba-kube_pod_status_phase-threshold-Override_'), field('tags._amba-kube_pod_status_phase-threshold-Override_'), parameters('threshold'))]"
                }
              }
            }
          }
        }
      }
    }
  }
}



kube_pod_status_ready - Metric Alert

Number of pods in Ready state

Properties:

criterionTypeStaticThresholdCriterion
evaluationFrequencyPT1M
metricNamekube_pod_status_ready
metricNamespaceMicrosoft.ContainerService/managedClusters
operatorLessThan
severity2
threshold1
timeAggregationAverage
windowSizePT5M

References:

Templates:

{
  "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#",
  "contentVersion": "1.0.0.0",
  "parameters": {
    "alertName": {
      "type": "string",
      "minLength": 1,
      "metadata": {
        "description": "Name of the alert"
      }
    },
    "alertDescription": {
      "type": "string",
      "defaultValue": "Number of pods in Ready state",
      "metadata": {
        "description": "Description of alert"
      }
    },
    "targetResourceId": {
      "type": "string",
      "minLength": 1,
      "metadata": {
        "description": "List of Azure resource Ids seperated by a comma. For example - /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroup/resource-group-name/Microsoft.compute/virtualMachines/vm-name"
      }
    },
    "targetResourceRegion": {
      "type": "string",
      "metadata": {
        "description": "Azure region in which target resources to be monitored are in (without spaces). For example: EastUS"
      }
    },
    "targetResourceType": {
      "type": "string",
      "minLength": 1,
      "metadata": {
        "description": "Resource type of target resources to be monitored."
      }
    },
    "isEnabled": {
      "type": "bool",
      "defaultValue": true,
      "metadata": {
        "description": "Specifies whether the alert is enabled"
      }
    },
    "alertSeverity": {
      "type": "int",
      "defaultValue": 2,
      "allowedValues": [
        0,
        1,
        2,
        3,
        4
      ],
      "metadata": {
        "description": "Severity of alert {0,1,2,3,4}"
      }
    },
    "operator": {
      "type": "string",
      "defaultValue": "LessThan",
      "allowedValues": [
        "Equals",
        "GreaterThan",
        "GreaterThanOrEqual",
        "LessThan",
        "LessThanOrEqual"
      ],
      "metadata": {
        "description": "Operator comparing the current value with the threshold value."
      }
    },
    "threshold": {
      "type": "string",
      "defaultValue": "1",
      "metadata": {
        "description": "The threshold value at which the alert is activated."
      }
    },
    "timeAggregation": {
      "type": "string",
      "defaultValue": "Average",
      "allowedValues": [
        "Average",
        "Minimum",
        "Maximum",
        "Total",
        "Count"
      ],
      "metadata": {
        "description": "How the data that is collected should be combined over time."
      }
    },
    "windowSize": {
      "type": "string",
      "defaultValue": "PT5M",
      "allowedValues": [
        "PT1M",
        "PT5M",
        "PT15M",
        "PT30M",
        "PT1H",
        "PT6H",
        "PT12H",
        "PT24H",
        "PT1D"
      ],
      "metadata": {
        "description": "Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format."
      }
    },
    "evaluationFrequency": {
      "type": "string",
      "defaultValue": "PT1M",
      "allowedValues": [
        "PT1M",
        "PT5M",
        "PT15M",
        "PT30M",
        "PT1H"
      ],
      "metadata": {
        "description": "how often the metric alert is evaluated represented in ISO 8601 duration format"
      }
    },
    "currentDateTimeUtcNow": {
      "type": "string",
      "defaultValue": "[utcNow()]",
      "metadata": {
          "description": "The current date and time using the utcNow function. Used for deployment name uniqueness"
      }
    },
    "telemetryOptOut": {
      "type": "string",
      "defaultValue": "No",
      "allowedValues": [
          "Yes",
          "No"
      ],
      "metadata": {
          "description": "The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry."
      }
    }
  },
  "variables": {
    "pidDeploymentName": "[take(concat('pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-', uniqueString(resourceGroup().id, parameters('alertName'), parameters('currentDateTimeUtcNow'))), 64)]",
    "varTargetResourceId": "[split(parameters('targetResourceId'), ',')]"
  },
  "resources": [
    {
      "type": "Microsoft.Insights/metricAlerts",
      "apiVersion": "2018-03-01",
      "name": "[parameters('alertName')]",
      "location": "global",
      "tags": {
        "_deployed_by_amba": true
      },
      "properties": {
        "description": "[parameters('alertDescription')]",
        "scopes": "[variables('varTargetResourceId')]",
        "targetResourceType": "[parameters('targetResourceType')]",
        "targetResourceRegion": "[parameters('targetResourceRegion')]",
        "severity": "[parameters('alertSeverity')]",
        "enabled": "[parameters('isEnabled')]",
        "evaluationFrequency": "[parameters('evaluationFrequency')]",
        "windowSize": "[parameters('windowSize')]",
        "criteria": {
          "odata.type": "Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria",
          "allOf": [
            {
              "name": "1st criterion",
              "metricName": "kube_pod_status_ready",
              "dimensions": [],
              "operator": "[parameters('operator')]",
              "threshold": "[parameters('threshold')]",
              "timeAggregation": "[parameters('timeAggregation')]",
              "criterionType": "StaticThresholdCriterion"
            }
          ]
        }
      }
    },
    {
      "condition": "[equals(parameters('telemetryOptOut'), 'No')]",
      "apiVersion": "2023-07-01",
      "name": "[variables('pidDeploymentName')]",
      "type": "Microsoft.Resources/deployments",
      "properties": {
          "mode": "Incremental",
          "template": {
              "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#",
              "contentVersion": "1.0.0.0",
              "resources": []
          }
      }
    }
  ]
}
@description('Name of the alert')
@minLength(1)
param alertName string

@description('Description of alert')
param alertDescription string = 'Number of pods in Ready state'

@description('Array of Azure resource Ids. For example - /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroup/resource-group-name/Microsoft.compute/virtualMachines/vm-name')
@minLength(1)
param targetResourceId array

@description('Azure region in which target resources to be monitored are in (without spaces). For example: EastUS')
param targetResourceRegion string

@description('Resource type of target resources to be monitored.')
@minLength(1)
param targetResourceType string

@description('Specifies whether the alert is enabled')
param isEnabled bool = true

@description('Severity of alert {0,1,2,3,4}')
@allowed([
  0
  1
  2
  3
  4
])
param alertSeverity int = 2

@description('Operator comparing the current value with the threshold value.')
@allowed([
  'Equals'
  'GreaterThan'
  'GreaterThanOrEqual'
  'LessThan'
  'LessThanOrEqual'
])
param operator string = 'LessThan'

@description('The threshold value at which the alert is activated.')
param threshold int = 1

@description('How the data that is collected should be combined over time.')
@allowed([
  'Average'
  'Minimum'
  'Maximum'
  'Total'
  'Count'
])
param timeAggregation string = 'Average'

@description('Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format.')
@allowed([
  'PT1M'
  'PT5M'
  'PT15M'
  'PT30M'
  'PT1H'
  'PT6H'
  'PT12H'
  'PT24H'
  'P1D'
])
param windowSize string = 'PT5M'

@description('how often the metric alert is evaluated represented in ISO 8601 duration format')
@allowed([
  'PT1M'
  'PT5M'
  'PT15M'
  'PT30M'
  'PT1H'
])
param evaluationFrequency string = 'PT1M'

@description('"The current date and time using the utcNow function. Used for deployment name uniqueness')
param currentDateTimeUtcNow string = utcNow()

@description('The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry.')
@allowed([
  'Yes'
  'No'
])
param telemetryOptOut string = 'No'

resource metricAlert 'Microsoft.Insights/metricAlerts@2018-03-01' = {
  name: alertName
  location: 'global'
  tags: {
    _deployed_by_amba: 'true'
  }
  properties: {
    description: alertDescription
    scopes: targetResourceId
    targetResourceType: targetResourceType
    targetResourceRegion: targetResourceRegion
    severity: alertSeverity
    enabled: isEnabled
    evaluationFrequency: evaluationFrequency
    windowSize: windowSize
    criteria: {
      'odata.type': 'Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria'
      allOf: [
        {
          name: '1st criterion'
          metricName: 'kube_pod_status_ready'
          dimensions: [[]]
          operator: operator
          threshold: threshold
          timeAggregation: timeAggregation
          criterionType: 'StaticThresholdCriterion'
        }
      ]
    }
  }
}

var ambaTelemetryPidName = 'pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-${uniqueString(resourceGroup().id, alertName, currentDateTimeUtcNow)}'
resource ambaTelemetryPid 'Microsoft.Resources/deployments@2023-07-01' =  if (telemetryOptOut == 'No') {
  name: ambaTelemetryPidName
  tags: {
    _deployed_by_amba: 'true'
  }
  properties: {
    mode: 'Incremental'
    template: {
      '$schema': 'https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#'
      contentVersion: '1.0.0.0'
      resources: []
    }
  }
}
{
  "type": "Microsoft.Authorization/policyDefinitions",
  "apiVersion": "2021-06-01",
  "name": "cebf00b7-7294-4cf7-bc50-108f999d0c67",
  "properties": {
    "policyType": "Custom",
    "mode": "All",
    "displayName": "Deploy ContainerService managedClusters kube_pod_status_ready Alert",
    "description": "Policy to Audit/Deploy ContainerService managedClusters kube_pod_status_ready Alert",
    "metadata": {
      "version": "1.0.0-preview",
      "category": "ContainerService",
      "preview": true,
      "source": "https://github.com/Azure/azure-monitor-baseline-alerts/",
      "alzCloudEnvironments": [
        "AzureCloud"
      ],
      "_deployed_by_amba": "True"
    },
    "parameters": {
      "severity": {
        "type": "String",
        "metadata": {
          "displayName": "Severity",
          "description": "Severity of the Alert"
        },
        "allowedValues": [
          "0",
          "1",
          "2",
          "3",
          "4"
        ],
        "defaultValue": "2"
      },
      "windowSize": {
        "type": "String",
        "metadata": {
          "displayName": "Window Size",
          "description": "Window size for the alert"
        },
        "allowedValues": [
          "PT1M",
          "PT5M",
          "PT15M",
          "PT30M",
          "PT1H",
          "PT6H",
          "PT12H",
          "P1D"
        ],
        "defaultValue": "PT5M"
      },
      "evaluationFrequency": {
        "type": "String",
        "metadata": {
          "displayName": "Evaluation Frequency",
          "description": "Evaluation frequency for the alert"
        },
        "allowedValues": [
          "PT1M",
          "PT5M",
          "PT15M",
          "PT30M",
          "PT1H"
        ],
        "defaultValue": "PT1M"
      },
      "autoMitigate": {
        "type": "String",
        "metadata": {
          "displayName": "Auto Mitigate",
          "description": "Auto Mitigate for the alert"
        },
        "allowedValues": [
          "true",
          "false"
        ],
        "defaultValue": "true"
      },
      "enabled": {
        "type": "String",
        "metadata": {
          "displayName": "Alert State",
          "description": "Alert state for the alert"
        },
        "allowedValues": [
          "true",
          "false"
        ],
        "defaultValue": "true"
      },
      "threshold": {
        "type": "String",
        "metadata": {
          "displayName": "Threshold",
          "description": "Threshold for the alert"
        },
        "defaultValue": "1"
      },
      "effect": {
        "type": "String",
        "metadata": {
          "displayName": "Effect",
          "description": "Effect of the policy"
        },
        "allowedValues": [
          "deployIfNotExists",
          "disabled"
        ],
        "defaultValue": "deployIfNotExists"
      },
      "MonitorDisableTagName": {
        "type": "String",
        "metadata": {
          "displayName": "Monitoring disabled tag name",
          "description": "Tag name used to disable monitoring at the resource level. Set to true if monitoring should be disabled."
        },
        "defaultValue": "MonitorDisable"
      },
      "MonitorDisableTagValues": {
        "type": "Array",
        "metadata": {
          "displayName": "Monitoring disabled tag values(s)",
          "description": "Tag value(s) used to disable monitoring at the resource level. Set to true if monitoring should be disabled."
        },
        "defaultValue": [
          "true",
          "Test",
          "Dev",
          "Sandbox"
        ]
      }
    },
    "policyRule": {
      "if": {
        "allOf": [
          {
            "field": "type",
            "equals": "Microsoft.ContainerService/managedClusters"
          },
          {
            "field": "[[concat('tags[', parameters('MonitorDisableTagName'), ']')]",
            "notIn": "[[parameters('MonitorDisableTagValues')]"
          }
        ]
      },
      "then": {
        "effect": "[[parameters('effect')]",
        "details": {
          "roleDefinitionIds": [
            "/providers/Microsoft.Authorization/roleDefinitions/b24988ac-6180-42a0-ab88-20f7382dd24c"
          ],
          "type": "Microsoft.Insights/metricAlerts",
          "existenceCondition": {
            "allOf": [
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria.allOf[*].metricNamespace",
                "equals": "Microsoft.ContainerService/managedClusters"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria.allOf[*].metricName",
                "equals": "kube_pod_status_ready"
              },
              {
                "field": "Microsoft.Insights/metricalerts/scopes[*]",
                "equals": "[[concat(subscription().id, '/resourceGroups/', resourceGroup().name, '/providers/Microsoft.ContainerService/managedClusters/', field('fullName'))]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/enabled",
                "equals": "[[parameters('enabled')]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/evaluationFrequency",
                "equals": "[[parameters('evaluationFrequency')]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/windowSize",
                "equals": "[[parameters('windowSize')]"
              },
              {
                "field": "Microsoft.Insights/metricalerts/severity",
                "equals": "[[parameters('severity')]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/autoMitigate",
                "equals": "[[parameters('autoMitigate')]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft-Azure-Monitor-SingleResourceMultipleMetricCriteria.allOf[*].timeAggregation",
                "equals": "Average"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria.allOf[*].StaticThresholdCriterion.operator",
                "equals": "LessThan"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria.allOf[*].StaticThresholdCriterion.threshold",
                "equals": "[[if(contains(field('tags'), '_amba-kube_pod_status_ready-threshold-Override_'), field('tags._amba-kube_pod_status_ready-threshold-Override_'), parameters('threshold'))]"
              }
            ]
          },
          "deployment": {
            "properties": {
              "mode": "incremental",
              "template": {
                "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#",
                "contentVersion": "1.0.0.0",
                "parameters": {
                  "resourceName": {
                    "type": "String",
                    "metadata": {
                      "displayName": "resourceName",
                      "description": "Name of the resource"
                    }
                  },
                  "resourceId": {
                    "type": "String",
                    "metadata": {
                      "displayName": "resourceId",
                      "description": "Resource ID of the resource emitting the metric that will be used for the comparison"
                    }
                  },
                  "severity": {
                    "type": "String"
                  },
                  "windowSize": {
                    "type": "String"
                  },
                  "evaluationFrequency": {
                    "type": "String"
                  },
                  "autoMitigate": {
                    "type": "String"
                  },
                  "enabled": {
                    "type": "String"
                  },
                  "threshold": {
                    "type": "String"
                  }
                },
                "variables": {},
                "resources": [
                  {
                    "type": "Microsoft.Insights/metricAlerts",
                    "apiVersion": "2018-03-01",
                    "name": "[[concat(parameters('resourceName'), '-kube_pod_status_ready')]",
                    "location": "global",
                    "tags": {
                      "_deployed_by_amba": true
                    },
                    "properties": {
                      "description": "Metric Alert for ContainerService managedClusters kube_pod_status_ready",
                      "severity": "[[parameters('severity')]",
                      "enabled": "[[parameters('enabled')]",
                      "scopes": [
                        "[[parameters('resourceId')]"
                      ],
                      "evaluationFrequency": "[[parameters('evaluationFrequency')]",
                      "windowSize": "[[parameters('windowSize')]",
                      "criteria": {
                        "allOf": [
                          {
                            "name": "kube_pod_status_ready",
                            "metricNamespace": "Microsoft.ContainerService/managedClusters",
                            "metricName": "kube_pod_status_ready",
                            "operator": "LessThan",
                            "threshold": "[[parameters('threshold')]",
                            "timeAggregation": "Average",
                            "criterionType": "StaticThresholdCriterion"
                          }
                        ],
                        "odata.type": "Microsoft.Azure.Monitor.SingleResourceMultipleMetricCriteria"
                      },
                      "autoMitigate": "[[parameters('autoMitigate')]",
                      "parameters": {
                        "severity": {
                          "value": "[[parameters('severity')]"
                        },
                        "windowSize": {
                          "value": "[[parameters('windowSize')]"
                        },
                        "evaluationFrequency": {
                          "value": "[[parameters('evaluationFrequency')]"
                        },
                        "autoMitigate": {
                          "value": "[[parameters('autoMitigate')]"
                        },
                        "enabled": {
                          "value": "[[parameters('enabled')]"
                        },
                        "threshold": {
                          "value": "[[parameters('threshold')]"
                        }
                      }
                    }
                  }
                ]
              },
              "parameters": {
                "resourceName": {
                  "value": "[[field('name')]"
                },
                "resourceId": {
                  "value": "[[field('id')]"
                },
                "severity": {
                  "value": "[[parameters('severity')]"
                },
                "windowSize": {
                  "value": "[[parameters('windowSize')]"
                },
                "evaluationFrequency": {
                  "value": "[[parameters('evaluationFrequency')]"
                },
                "autoMitigate": {
                  "value": "[[parameters('autoMitigate')]"
                },
                "enabled": {
                  "value": "[[parameters('enabled')]"
                },
                "threshold": {
                  "value": "[[if(contains(field('tags'), '_amba-kube_pod_status_ready-threshold-Override_'), field('tags._amba-kube_pod_status_ready-threshold-Override_'), parameters('threshold'))]"
                }
              }
            }
          }
        }
      }
    }
  }
}



node_cpu_usage_percentage - Metric Alert

Aggregated average CPU utilization measured in percentage across the cluster

Properties:

criterionTypeStaticThresholdCriterion
evaluationFrequencyPT5M
metricNamenode_cpu_usage_percentage
metricNamespaceMicrosoft.ContainerService/managedClusters
operatorGreaterThan
severity3
threshold95
timeAggregationAverage
windowSizePT5M

References:

Templates:

{
  "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#",
  "contentVersion": "1.0.0.0",
  "parameters": {
    "alertName": {
      "type": "string",
      "minLength": 1,
      "metadata": {
        "description": "Name of the alert"
      }
    },
    "alertDescription": {
      "type": "string",
      "defaultValue": "Aggregated average CPU utilization measured in percentage across the cluster",
      "metadata": {
        "description": "Description of alert"
      }
    },
    "targetResourceId": {
      "type": "string",
      "minLength": 1,
      "metadata": {
        "description": "List of Azure resource Ids seperated by a comma. For example - /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroup/resource-group-name/Microsoft.compute/virtualMachines/vm-name"
      }
    },
    "targetResourceRegion": {
      "type": "string",
      "metadata": {
        "description": "Azure region in which target resources to be monitored are in (without spaces). For example: EastUS"
      }
    },
    "targetResourceType": {
      "type": "string",
      "minLength": 1,
      "metadata": {
        "description": "Resource type of target resources to be monitored."
      }
    },
    "isEnabled": {
      "type": "bool",
      "defaultValue": true,
      "metadata": {
        "description": "Specifies whether the alert is enabled"
      }
    },
    "alertSeverity": {
      "type": "int",
      "defaultValue": 3,
      "allowedValues": [
        0,
        1,
        2,
        3,
        4
      ],
      "metadata": {
        "description": "Severity of alert {0,1,2,3,4}"
      }
    },
    "operator": {
      "type": "string",
      "defaultValue": "GreaterThan",
      "allowedValues": [
        "Equals",
        "GreaterThan",
        "GreaterThanOrEqual",
        "LessThan",
        "LessThanOrEqual"
      ],
      "metadata": {
        "description": "Operator comparing the current value with the threshold value."
      }
    },
    "threshold": {
      "type": "string",
      "defaultValue": "95",
      "metadata": {
        "description": "The threshold value at which the alert is activated."
      }
    },
    "timeAggregation": {
      "type": "string",
      "defaultValue": "Average",
      "allowedValues": [
        "Average",
        "Minimum",
        "Maximum",
        "Total",
        "Count"
      ],
      "metadata": {
        "description": "How the data that is collected should be combined over time."
      }
    },
    "windowSize": {
      "type": "string",
      "defaultValue": "PT5M",
      "allowedValues": [
        "PT1M",
        "PT5M",
        "PT15M",
        "PT30M",
        "PT1H",
        "PT6H",
        "PT12H",
        "PT24H",
        "PT1D"
      ],
      "metadata": {
        "description": "Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format."
      }
    },
    "evaluationFrequency": {
      "type": "string",
      "defaultValue": "PT5M",
      "allowedValues": [
        "PT1M",
        "PT5M",
        "PT15M",
        "PT30M",
        "PT1H"
      ],
      "metadata": {
        "description": "how often the metric alert is evaluated represented in ISO 8601 duration format"
      }
    },
    "currentDateTimeUtcNow": {
      "type": "string",
      "defaultValue": "[utcNow()]",
      "metadata": {
          "description": "The current date and time using the utcNow function. Used for deployment name uniqueness"
      }
    },
    "telemetryOptOut": {
      "type": "string",
      "defaultValue": "No",
      "allowedValues": [
          "Yes",
          "No"
      ],
      "metadata": {
          "description": "The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry."
      }
    }
  },
  "variables": {
    "pidDeploymentName": "[take(concat('pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-', uniqueString(resourceGroup().id, parameters('alertName'), parameters('currentDateTimeUtcNow'))), 64)]",
    "varTargetResourceId": "[split(parameters('targetResourceId'), ',')]"
  },
  "resources": [
    {
      "type": "Microsoft.Insights/metricAlerts",
      "apiVersion": "2018-03-01",
      "name": "[parameters('alertName')]",
      "location": "global",
      "tags": {
        "_deployed_by_amba": true
      },
      "properties": {
        "description": "[parameters('alertDescription')]",
        "scopes": "[variables('varTargetResourceId')]",
        "targetResourceType": "[parameters('targetResourceType')]",
        "targetResourceRegion": "[parameters('targetResourceRegion')]",
        "severity": "[parameters('alertSeverity')]",
        "enabled": "[parameters('isEnabled')]",
        "evaluationFrequency": "[parameters('evaluationFrequency')]",
        "windowSize": "[parameters('windowSize')]",
        "criteria": {
          "odata.type": "Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria",
          "allOf": [
            {
              "name": "1st criterion",
              "metricName": "node_cpu_usage_percentage",
              "dimensions": [],
              "operator": "[parameters('operator')]",
              "threshold": "[parameters('threshold')]",
              "timeAggregation": "[parameters('timeAggregation')]",
              "criterionType": "StaticThresholdCriterion"
            }
          ]
        }
      }
    },
    {
      "condition": "[equals(parameters('telemetryOptOut'), 'No')]",
      "apiVersion": "2023-07-01",
      "name": "[variables('pidDeploymentName')]",
      "type": "Microsoft.Resources/deployments",
      "properties": {
          "mode": "Incremental",
          "template": {
              "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#",
              "contentVersion": "1.0.0.0",
              "resources": []
          }
      }
    }
  ]
}
@description('Name of the alert')
@minLength(1)
param alertName string

@description('Description of alert')
param alertDescription string = 'Aggregated average CPU utilization measured in percentage across the cluster'

@description('Array of Azure resource Ids. For example - /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroup/resource-group-name/Microsoft.compute/virtualMachines/vm-name')
@minLength(1)
param targetResourceId array

@description('Azure region in which target resources to be monitored are in (without spaces). For example: EastUS')
param targetResourceRegion string

@description('Resource type of target resources to be monitored.')
@minLength(1)
param targetResourceType string

@description('Specifies whether the alert is enabled')
param isEnabled bool = true

@description('Severity of alert {0,1,2,3,4}')
@allowed([
  0
  1
  2
  3
  4
])
param alertSeverity int = 3

@description('Operator comparing the current value with the threshold value.')
@allowed([
  'Equals'
  'GreaterThan'
  'GreaterThanOrEqual'
  'LessThan'
  'LessThanOrEqual'
])
param operator string = 'GreaterThan'

@description('The threshold value at which the alert is activated.')
param threshold int = 95

@description('How the data that is collected should be combined over time.')
@allowed([
  'Average'
  'Minimum'
  'Maximum'
  'Total'
  'Count'
])
param timeAggregation string = 'Average'

@description('Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format.')
@allowed([
  'PT1M'
  'PT5M'
  'PT15M'
  'PT30M'
  'PT1H'
  'PT6H'
  'PT12H'
  'PT24H'
  'P1D'
])
param windowSize string = 'PT5M'

@description('how often the metric alert is evaluated represented in ISO 8601 duration format')
@allowed([
  'PT1M'
  'PT5M'
  'PT15M'
  'PT30M'
  'PT1H'
])
param evaluationFrequency string = 'PT5M'

@description('"The current date and time using the utcNow function. Used for deployment name uniqueness')
param currentDateTimeUtcNow string = utcNow()

@description('The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry.')
@allowed([
  'Yes'
  'No'
])
param telemetryOptOut string = 'No'

resource metricAlert 'Microsoft.Insights/metricAlerts@2018-03-01' = {
  name: alertName
  location: 'global'
  tags: {
    _deployed_by_amba: 'true'
  }
  properties: {
    description: alertDescription
    scopes: targetResourceId
    targetResourceType: targetResourceType
    targetResourceRegion: targetResourceRegion
    severity: alertSeverity
    enabled: isEnabled
    evaluationFrequency: evaluationFrequency
    windowSize: windowSize
    criteria: {
      'odata.type': 'Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria'
      allOf: [
        {
          name: '1st criterion'
          metricName: 'node_cpu_usage_percentage'
          dimensions: [[]]
          operator: operator
          threshold: threshold
          timeAggregation: timeAggregation
          criterionType: 'StaticThresholdCriterion'
        }
      ]
    }
  }
}

var ambaTelemetryPidName = 'pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-${uniqueString(resourceGroup().id, alertName, currentDateTimeUtcNow)}'
resource ambaTelemetryPid 'Microsoft.Resources/deployments@2023-07-01' =  if (telemetryOptOut == 'No') {
  name: ambaTelemetryPidName
  tags: {
    _deployed_by_amba: 'true'
  }
  properties: {
    mode: 'Incremental'
    template: {
      '$schema': 'https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#'
      contentVersion: '1.0.0.0'
      resources: []
    }
  }
}
{
  "type": "Microsoft.Authorization/policyDefinitions",
  "apiVersion": "2021-06-01",
  "name": "1303e91d-bc80-4ec2-937e-1d179fc32b43",
  "properties": {
    "policyType": "Custom",
    "mode": "All",
    "displayName": "Deploy ContainerService managedClusters node_cpu_usage_percentage Alert",
    "description": "Policy to Audit/Deploy ContainerService managedClusters node_cpu_usage_percentage Alert",
    "metadata": {
      "version": "1.0.0-preview",
      "category": "ContainerService",
      "preview": true,
      "source": "https://github.com/Azure/azure-monitor-baseline-alerts/",
      "alzCloudEnvironments": [
        "AzureCloud"
      ],
      "_deployed_by_amba": "True"
    },
    "parameters": {
      "severity": {
        "type": "String",
        "metadata": {
          "displayName": "Severity",
          "description": "Severity of the Alert"
        },
        "allowedValues": [
          "0",
          "1",
          "2",
          "3",
          "4"
        ],
        "defaultValue": "3"
      },
      "windowSize": {
        "type": "String",
        "metadata": {
          "displayName": "Window Size",
          "description": "Window size for the alert"
        },
        "allowedValues": [
          "PT1M",
          "PT5M",
          "PT15M",
          "PT30M",
          "PT1H",
          "PT6H",
          "PT12H",
          "P1D"
        ],
        "defaultValue": "PT5M"
      },
      "evaluationFrequency": {
        "type": "String",
        "metadata": {
          "displayName": "Evaluation Frequency",
          "description": "Evaluation frequency for the alert"
        },
        "allowedValues": [
          "PT1M",
          "PT5M",
          "PT15M",
          "PT30M",
          "PT1H"
        ],
        "defaultValue": "PT5M"
      },
      "autoMitigate": {
        "type": "String",
        "metadata": {
          "displayName": "Auto Mitigate",
          "description": "Auto Mitigate for the alert"
        },
        "allowedValues": [
          "true",
          "false"
        ],
        "defaultValue": "true"
      },
      "enabled": {
        "type": "String",
        "metadata": {
          "displayName": "Alert State",
          "description": "Alert state for the alert"
        },
        "allowedValues": [
          "true",
          "false"
        ],
        "defaultValue": "true"
      },
      "threshold": {
        "type": "String",
        "metadata": {
          "displayName": "Threshold",
          "description": "Threshold for the alert"
        },
        "defaultValue": "95"
      },
      "effect": {
        "type": "String",
        "metadata": {
          "displayName": "Effect",
          "description": "Effect of the policy"
        },
        "allowedValues": [
          "deployIfNotExists",
          "disabled"
        ],
        "defaultValue": "deployIfNotExists"
      },
      "MonitorDisableTagName": {
        "type": "String",
        "metadata": {
          "displayName": "Monitoring disabled tag name",
          "description": "Tag name used to disable monitoring at the resource level. Set to true if monitoring should be disabled."
        },
        "defaultValue": "MonitorDisable"
      },
      "MonitorDisableTagValues": {
        "type": "Array",
        "metadata": {
          "displayName": "Monitoring disabled tag values(s)",
          "description": "Tag value(s) used to disable monitoring at the resource level. Set to true if monitoring should be disabled."
        },
        "defaultValue": [
          "true",
          "Test",
          "Dev",
          "Sandbox"
        ]
      }
    },
    "policyRule": {
      "if": {
        "allOf": [
          {
            "field": "type",
            "equals": "Microsoft.ContainerService/managedClusters"
          },
          {
            "field": "[[concat('tags[', parameters('MonitorDisableTagName'), ']')]",
            "notIn": "[[parameters('MonitorDisableTagValues')]"
          }
        ]
      },
      "then": {
        "effect": "[[parameters('effect')]",
        "details": {
          "roleDefinitionIds": [
            "/providers/Microsoft.Authorization/roleDefinitions/b24988ac-6180-42a0-ab88-20f7382dd24c"
          ],
          "type": "Microsoft.Insights/metricAlerts",
          "existenceCondition": {
            "allOf": [
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria.allOf[*].metricNamespace",
                "equals": "Microsoft.ContainerService/managedClusters"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria.allOf[*].metricName",
                "equals": "node_cpu_usage_percentage"
              },
              {
                "field": "Microsoft.Insights/metricalerts/scopes[*]",
                "equals": "[[concat(subscription().id, '/resourceGroups/', resourceGroup().name, '/providers/Microsoft.ContainerService/managedClusters/', field('fullName'))]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/enabled",
                "equals": "[[parameters('enabled')]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/evaluationFrequency",
                "equals": "[[parameters('evaluationFrequency')]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/windowSize",
                "equals": "[[parameters('windowSize')]"
              },
              {
                "field": "Microsoft.Insights/metricalerts/severity",
                "equals": "[[parameters('severity')]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/autoMitigate",
                "equals": "[[parameters('autoMitigate')]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft-Azure-Monitor-SingleResourceMultipleMetricCriteria.allOf[*].timeAggregation",
                "equals": "Average"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria.allOf[*].StaticThresholdCriterion.operator",
                "equals": "GreaterThan"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria.allOf[*].StaticThresholdCriterion.threshold",
                "equals": "[[if(contains(field('tags'), '_amba-node_cpu_usage_percentage-threshold-Override_'), field('tags._amba-node_cpu_usage_percentage-threshold-Override_'), parameters('threshold'))]"
              }
            ]
          },
          "deployment": {
            "properties": {
              "mode": "incremental",
              "template": {
                "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#",
                "contentVersion": "1.0.0.0",
                "parameters": {
                  "resourceName": {
                    "type": "String",
                    "metadata": {
                      "displayName": "resourceName",
                      "description": "Name of the resource"
                    }
                  },
                  "resourceId": {
                    "type": "String",
                    "metadata": {
                      "displayName": "resourceId",
                      "description": "Resource ID of the resource emitting the metric that will be used for the comparison"
                    }
                  },
                  "severity": {
                    "type": "String"
                  },
                  "windowSize": {
                    "type": "String"
                  },
                  "evaluationFrequency": {
                    "type": "String"
                  },
                  "autoMitigate": {
                    "type": "String"
                  },
                  "enabled": {
                    "type": "String"
                  },
                  "threshold": {
                    "type": "String"
                  }
                },
                "variables": {},
                "resources": [
                  {
                    "type": "Microsoft.Insights/metricAlerts",
                    "apiVersion": "2018-03-01",
                    "name": "[[concat(parameters('resourceName'), '-node_cpu_usage_percentage')]",
                    "location": "global",
                    "tags": {
                      "_deployed_by_amba": true
                    },
                    "properties": {
                      "description": "Metric Alert for ContainerService managedClusters node_cpu_usage_percentage",
                      "severity": "[[parameters('severity')]",
                      "enabled": "[[parameters('enabled')]",
                      "scopes": [
                        "[[parameters('resourceId')]"
                      ],
                      "evaluationFrequency": "[[parameters('evaluationFrequency')]",
                      "windowSize": "[[parameters('windowSize')]",
                      "criteria": {
                        "allOf": [
                          {
                            "name": "node_cpu_usage_percentage",
                            "metricNamespace": "Microsoft.ContainerService/managedClusters",
                            "metricName": "node_cpu_usage_percentage",
                            "operator": "GreaterThan",
                            "threshold": "[[parameters('threshold')]",
                            "timeAggregation": "Average",
                            "criterionType": "StaticThresholdCriterion"
                          }
                        ],
                        "odata.type": "Microsoft.Azure.Monitor.SingleResourceMultipleMetricCriteria"
                      },
                      "autoMitigate": "[[parameters('autoMitigate')]",
                      "parameters": {
                        "severity": {
                          "value": "[[parameters('severity')]"
                        },
                        "windowSize": {
                          "value": "[[parameters('windowSize')]"
                        },
                        "evaluationFrequency": {
                          "value": "[[parameters('evaluationFrequency')]"
                        },
                        "autoMitigate": {
                          "value": "[[parameters('autoMitigate')]"
                        },
                        "enabled": {
                          "value": "[[parameters('enabled')]"
                        },
                        "threshold": {
                          "value": "[[parameters('threshold')]"
                        }
                      }
                    }
                  }
                ]
              },
              "parameters": {
                "resourceName": {
                  "value": "[[field('name')]"
                },
                "resourceId": {
                  "value": "[[field('id')]"
                },
                "severity": {
                  "value": "[[parameters('severity')]"
                },
                "windowSize": {
                  "value": "[[parameters('windowSize')]"
                },
                "evaluationFrequency": {
                  "value": "[[parameters('evaluationFrequency')]"
                },
                "autoMitigate": {
                  "value": "[[parameters('autoMitigate')]"
                },
                "enabled": {
                  "value": "[[parameters('enabled')]"
                },
                "threshold": {
                  "value": "[[if(contains(field('tags'), '_amba-node_cpu_usage_percentage-threshold-Override_'), field('tags._amba-node_cpu_usage_percentage-threshold-Override_'), parameters('threshold'))]"
                }
              }
            }
          }
        }
      }
    }
  }
}



node_disk_usage_percentage - Metric Alert

Disk space used in percent by device

Properties:

criterionTypeStaticThresholdCriterion
evaluationFrequencyPT1M
metricNamenode_disk_usage_percentage
metricNamespaceMicrosoft.ContainerService/managedClusters
operatorGreaterThan
severity2
threshold80
timeAggregationAverage
windowSizePT5M

References:

Templates:

{
  "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#",
  "contentVersion": "1.0.0.0",
  "parameters": {
    "alertName": {
      "type": "string",
      "minLength": 1,
      "metadata": {
        "description": "Name of the alert"
      }
    },
    "alertDescription": {
      "type": "string",
      "defaultValue": "Disk space used in percent by device",
      "metadata": {
        "description": "Description of alert"
      }
    },
    "targetResourceId": {
      "type": "string",
      "minLength": 1,
      "metadata": {
        "description": "List of Azure resource Ids seperated by a comma. For example - /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroup/resource-group-name/Microsoft.compute/virtualMachines/vm-name"
      }
    },
    "targetResourceRegion": {
      "type": "string",
      "metadata": {
        "description": "Azure region in which target resources to be monitored are in (without spaces). For example: EastUS"
      }
    },
    "targetResourceType": {
      "type": "string",
      "minLength": 1,
      "metadata": {
        "description": "Resource type of target resources to be monitored."
      }
    },
    "isEnabled": {
      "type": "bool",
      "defaultValue": true,
      "metadata": {
        "description": "Specifies whether the alert is enabled"
      }
    },
    "alertSeverity": {
      "type": "int",
      "defaultValue": 2,
      "allowedValues": [
        0,
        1,
        2,
        3,
        4
      ],
      "metadata": {
        "description": "Severity of alert {0,1,2,3,4}"
      }
    },
    "operator": {
      "type": "string",
      "defaultValue": "GreaterThan",
      "allowedValues": [
        "Equals",
        "GreaterThan",
        "GreaterThanOrEqual",
        "LessThan",
        "LessThanOrEqual"
      ],
      "metadata": {
        "description": "Operator comparing the current value with the threshold value."
      }
    },
    "threshold": {
      "type": "string",
      "defaultValue": "80",
      "metadata": {
        "description": "The threshold value at which the alert is activated."
      }
    },
    "timeAggregation": {
      "type": "string",
      "defaultValue": "Average",
      "allowedValues": [
        "Average",
        "Minimum",
        "Maximum",
        "Total",
        "Count"
      ],
      "metadata": {
        "description": "How the data that is collected should be combined over time."
      }
    },
    "windowSize": {
      "type": "string",
      "defaultValue": "PT5M",
      "allowedValues": [
        "PT1M",
        "PT5M",
        "PT15M",
        "PT30M",
        "PT1H",
        "PT6H",
        "PT12H",
        "PT24H",
        "PT1D"
      ],
      "metadata": {
        "description": "Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format."
      }
    },
    "evaluationFrequency": {
      "type": "string",
      "defaultValue": "PT1M",
      "allowedValues": [
        "PT1M",
        "PT5M",
        "PT15M",
        "PT30M",
        "PT1H"
      ],
      "metadata": {
        "description": "how often the metric alert is evaluated represented in ISO 8601 duration format"
      }
    },
    "currentDateTimeUtcNow": {
      "type": "string",
      "defaultValue": "[utcNow()]",
      "metadata": {
          "description": "The current date and time using the utcNow function. Used for deployment name uniqueness"
      }
    },
    "telemetryOptOut": {
      "type": "string",
      "defaultValue": "No",
      "allowedValues": [
          "Yes",
          "No"
      ],
      "metadata": {
          "description": "The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry."
      }
    }
  },
  "variables": {
    "pidDeploymentName": "[take(concat('pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-', uniqueString(resourceGroup().id, parameters('alertName'), parameters('currentDateTimeUtcNow'))), 64)]",
    "varTargetResourceId": "[split(parameters('targetResourceId'), ',')]"
  },
  "resources": [
    {
      "type": "Microsoft.Insights/metricAlerts",
      "apiVersion": "2018-03-01",
      "name": "[parameters('alertName')]",
      "location": "global",
      "tags": {
        "_deployed_by_amba": true
      },
      "properties": {
        "description": "[parameters('alertDescription')]",
        "scopes": "[variables('varTargetResourceId')]",
        "targetResourceType": "[parameters('targetResourceType')]",
        "targetResourceRegion": "[parameters('targetResourceRegion')]",
        "severity": "[parameters('alertSeverity')]",
        "enabled": "[parameters('isEnabled')]",
        "evaluationFrequency": "[parameters('evaluationFrequency')]",
        "windowSize": "[parameters('windowSize')]",
        "criteria": {
          "odata.type": "Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria",
          "allOf": [
            {
              "name": "1st criterion",
              "metricName": "node_disk_usage_percentage",
              "dimensions": [],
              "operator": "[parameters('operator')]",
              "threshold": "[parameters('threshold')]",
              "timeAggregation": "[parameters('timeAggregation')]",
              "criterionType": "StaticThresholdCriterion"
            }
          ]
        }
      }
    },
    {
      "condition": "[equals(parameters('telemetryOptOut'), 'No')]",
      "apiVersion": "2023-07-01",
      "name": "[variables('pidDeploymentName')]",
      "type": "Microsoft.Resources/deployments",
      "properties": {
          "mode": "Incremental",
          "template": {
              "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#",
              "contentVersion": "1.0.0.0",
              "resources": []
          }
      }
    }
  ]
}
@description('Name of the alert')
@minLength(1)
param alertName string

@description('Description of alert')
param alertDescription string = 'Disk space used in percent by device'

@description('Array of Azure resource Ids. For example - /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroup/resource-group-name/Microsoft.compute/virtualMachines/vm-name')
@minLength(1)
param targetResourceId array

@description('Azure region in which target resources to be monitored are in (without spaces). For example: EastUS')
param targetResourceRegion string

@description('Resource type of target resources to be monitored.')
@minLength(1)
param targetResourceType string

@description('Specifies whether the alert is enabled')
param isEnabled bool = true

@description('Severity of alert {0,1,2,3,4}')
@allowed([
  0
  1
  2
  3
  4
])
param alertSeverity int = 2

@description('Operator comparing the current value with the threshold value.')
@allowed([
  'Equals'
  'GreaterThan'
  'GreaterThanOrEqual'
  'LessThan'
  'LessThanOrEqual'
])
param operator string = 'GreaterThan'

@description('The threshold value at which the alert is activated.')
param threshold int = 80

@description('How the data that is collected should be combined over time.')
@allowed([
  'Average'
  'Minimum'
  'Maximum'
  'Total'
  'Count'
])
param timeAggregation string = 'Average'

@description('Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format.')
@allowed([
  'PT1M'
  'PT5M'
  'PT15M'
  'PT30M'
  'PT1H'
  'PT6H'
  'PT12H'
  'PT24H'
  'P1D'
])
param windowSize string = 'PT5M'

@description('how often the metric alert is evaluated represented in ISO 8601 duration format')
@allowed([
  'PT1M'
  'PT5M'
  'PT15M'
  'PT30M'
  'PT1H'
])
param evaluationFrequency string = 'PT1M'

@description('"The current date and time using the utcNow function. Used for deployment name uniqueness')
param currentDateTimeUtcNow string = utcNow()

@description('The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry.')
@allowed([
  'Yes'
  'No'
])
param telemetryOptOut string = 'No'

resource metricAlert 'Microsoft.Insights/metricAlerts@2018-03-01' = {
  name: alertName
  location: 'global'
  tags: {
    _deployed_by_amba: 'true'
  }
  properties: {
    description: alertDescription
    scopes: targetResourceId
    targetResourceType: targetResourceType
    targetResourceRegion: targetResourceRegion
    severity: alertSeverity
    enabled: isEnabled
    evaluationFrequency: evaluationFrequency
    windowSize: windowSize
    criteria: {
      'odata.type': 'Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria'
      allOf: [
        {
          name: '1st criterion'
          metricName: 'node_disk_usage_percentage'
          dimensions: [[]]
          operator: operator
          threshold: threshold
          timeAggregation: timeAggregation
          criterionType: 'StaticThresholdCriterion'
        }
      ]
    }
  }
}

var ambaTelemetryPidName = 'pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-${uniqueString(resourceGroup().id, alertName, currentDateTimeUtcNow)}'
resource ambaTelemetryPid 'Microsoft.Resources/deployments@2023-07-01' =  if (telemetryOptOut == 'No') {
  name: ambaTelemetryPidName
  tags: {
    _deployed_by_amba: 'true'
  }
  properties: {
    mode: 'Incremental'
    template: {
      '$schema': 'https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#'
      contentVersion: '1.0.0.0'
      resources: []
    }
  }
}
{
  "type": "Microsoft.Authorization/policyDefinitions",
  "apiVersion": "2021-06-01",
  "name": "aa3c5697-5cca-4c16-a37e-94f1d580701e",
  "properties": {
    "policyType": "Custom",
    "mode": "All",
    "displayName": "Deploy ContainerService managedClusters node_disk_usage_percentage Alert",
    "description": "Policy to Audit/Deploy ContainerService managedClusters node_disk_usage_percentage Alert",
    "metadata": {
      "version": "1.0.0-preview",
      "category": "ContainerService",
      "preview": true,
      "source": "https://github.com/Azure/azure-monitor-baseline-alerts/",
      "alzCloudEnvironments": [
        "AzureCloud"
      ],
      "_deployed_by_amba": "True"
    },
    "parameters": {
      "severity": {
        "type": "String",
        "metadata": {
          "displayName": "Severity",
          "description": "Severity of the Alert"
        },
        "allowedValues": [
          "0",
          "1",
          "2",
          "3",
          "4"
        ],
        "defaultValue": "2"
      },
      "windowSize": {
        "type": "String",
        "metadata": {
          "displayName": "Window Size",
          "description": "Window size for the alert"
        },
        "allowedValues": [
          "PT1M",
          "PT5M",
          "PT15M",
          "PT30M",
          "PT1H",
          "PT6H",
          "PT12H",
          "P1D"
        ],
        "defaultValue": "PT5M"
      },
      "evaluationFrequency": {
        "type": "String",
        "metadata": {
          "displayName": "Evaluation Frequency",
          "description": "Evaluation frequency for the alert"
        },
        "allowedValues": [
          "PT1M",
          "PT5M",
          "PT15M",
          "PT30M",
          "PT1H"
        ],
        "defaultValue": "PT1M"
      },
      "autoMitigate": {
        "type": "String",
        "metadata": {
          "displayName": "Auto Mitigate",
          "description": "Auto Mitigate for the alert"
        },
        "allowedValues": [
          "true",
          "false"
        ],
        "defaultValue": "true"
      },
      "enabled": {
        "type": "String",
        "metadata": {
          "displayName": "Alert State",
          "description": "Alert state for the alert"
        },
        "allowedValues": [
          "true",
          "false"
        ],
        "defaultValue": "true"
      },
      "threshold": {
        "type": "String",
        "metadata": {
          "displayName": "Threshold",
          "description": "Threshold for the alert"
        },
        "defaultValue": "80"
      },
      "effect": {
        "type": "String",
        "metadata": {
          "displayName": "Effect",
          "description": "Effect of the policy"
        },
        "allowedValues": [
          "deployIfNotExists",
          "disabled"
        ],
        "defaultValue": "deployIfNotExists"
      },
      "MonitorDisableTagName": {
        "type": "String",
        "metadata": {
          "displayName": "Monitoring disabled tag name",
          "description": "Tag name used to disable monitoring at the resource level. Set to true if monitoring should be disabled."
        },
        "defaultValue": "MonitorDisable"
      },
      "MonitorDisableTagValues": {
        "type": "Array",
        "metadata": {
          "displayName": "Monitoring disabled tag values(s)",
          "description": "Tag value(s) used to disable monitoring at the resource level. Set to true if monitoring should be disabled."
        },
        "defaultValue": [
          "true",
          "Test",
          "Dev",
          "Sandbox"
        ]
      }
    },
    "policyRule": {
      "if": {
        "allOf": [
          {
            "field": "type",
            "equals": "Microsoft.ContainerService/managedClusters"
          },
          {
            "field": "[[concat('tags[', parameters('MonitorDisableTagName'), ']')]",
            "notIn": "[[parameters('MonitorDisableTagValues')]"
          }
        ]
      },
      "then": {
        "effect": "[[parameters('effect')]",
        "details": {
          "roleDefinitionIds": [
            "/providers/Microsoft.Authorization/roleDefinitions/b24988ac-6180-42a0-ab88-20f7382dd24c"
          ],
          "type": "Microsoft.Insights/metricAlerts",
          "existenceCondition": {
            "allOf": [
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria.allOf[*].metricNamespace",
                "equals": "Microsoft.ContainerService/managedClusters"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria.allOf[*].metricName",
                "equals": "node_disk_usage_percentage"
              },
              {
                "field": "Microsoft.Insights/metricalerts/scopes[*]",
                "equals": "[[concat(subscription().id, '/resourceGroups/', resourceGroup().name, '/providers/Microsoft.ContainerService/managedClusters/', field('fullName'))]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/enabled",
                "equals": "[[parameters('enabled')]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/evaluationFrequency",
                "equals": "[[parameters('evaluationFrequency')]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/windowSize",
                "equals": "[[parameters('windowSize')]"
              },
              {
                "field": "Microsoft.Insights/metricalerts/severity",
                "equals": "[[parameters('severity')]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/autoMitigate",
                "equals": "[[parameters('autoMitigate')]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft-Azure-Monitor-SingleResourceMultipleMetricCriteria.allOf[*].timeAggregation",
                "equals": "Average"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria.allOf[*].StaticThresholdCriterion.operator",
                "equals": "GreaterThan"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria.allOf[*].StaticThresholdCriterion.threshold",
                "equals": "[[if(contains(field('tags'), '_amba-node_disk_usage_percentage-threshold-Override_'), field('tags._amba-node_disk_usage_percentage-threshold-Override_'), parameters('threshold'))]"
              }
            ]
          },
          "deployment": {
            "properties": {
              "mode": "incremental",
              "template": {
                "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#",
                "contentVersion": "1.0.0.0",
                "parameters": {
                  "resourceName": {
                    "type": "String",
                    "metadata": {
                      "displayName": "resourceName",
                      "description": "Name of the resource"
                    }
                  },
                  "resourceId": {
                    "type": "String",
                    "metadata": {
                      "displayName": "resourceId",
                      "description": "Resource ID of the resource emitting the metric that will be used for the comparison"
                    }
                  },
                  "severity": {
                    "type": "String"
                  },
                  "windowSize": {
                    "type": "String"
                  },
                  "evaluationFrequency": {
                    "type": "String"
                  },
                  "autoMitigate": {
                    "type": "String"
                  },
                  "enabled": {
                    "type": "String"
                  },
                  "threshold": {
                    "type": "String"
                  }
                },
                "variables": {},
                "resources": [
                  {
                    "type": "Microsoft.Insights/metricAlerts",
                    "apiVersion": "2018-03-01",
                    "name": "[[concat(parameters('resourceName'), '-node_disk_usage_percentage')]",
                    "location": "global",
                    "tags": {
                      "_deployed_by_amba": true
                    },
                    "properties": {
                      "description": "Metric Alert for ContainerService managedClusters node_disk_usage_percentage",
                      "severity": "[[parameters('severity')]",
                      "enabled": "[[parameters('enabled')]",
                      "scopes": [
                        "[[parameters('resourceId')]"
                      ],
                      "evaluationFrequency": "[[parameters('evaluationFrequency')]",
                      "windowSize": "[[parameters('windowSize')]",
                      "criteria": {
                        "allOf": [
                          {
                            "name": "node_disk_usage_percentage",
                            "metricNamespace": "Microsoft.ContainerService/managedClusters",
                            "metricName": "node_disk_usage_percentage",
                            "operator": "GreaterThan",
                            "threshold": "[[parameters('threshold')]",
                            "timeAggregation": "Average",
                            "criterionType": "StaticThresholdCriterion"
                          }
                        ],
                        "odata.type": "Microsoft.Azure.Monitor.SingleResourceMultipleMetricCriteria"
                      },
                      "autoMitigate": "[[parameters('autoMitigate')]",
                      "parameters": {
                        "severity": {
                          "value": "[[parameters('severity')]"
                        },
                        "windowSize": {
                          "value": "[[parameters('windowSize')]"
                        },
                        "evaluationFrequency": {
                          "value": "[[parameters('evaluationFrequency')]"
                        },
                        "autoMitigate": {
                          "value": "[[parameters('autoMitigate')]"
                        },
                        "enabled": {
                          "value": "[[parameters('enabled')]"
                        },
                        "threshold": {
                          "value": "[[parameters('threshold')]"
                        }
                      }
                    }
                  }
                ]
              },
              "parameters": {
                "resourceName": {
                  "value": "[[field('name')]"
                },
                "resourceId": {
                  "value": "[[field('id')]"
                },
                "severity": {
                  "value": "[[parameters('severity')]"
                },
                "windowSize": {
                  "value": "[[parameters('windowSize')]"
                },
                "evaluationFrequency": {
                  "value": "[[parameters('evaluationFrequency')]"
                },
                "autoMitigate": {
                  "value": "[[parameters('autoMitigate')]"
                },
                "enabled": {
                  "value": "[[parameters('enabled')]"
                },
                "threshold": {
                  "value": "[[if(contains(field('tags'), '_amba-node_disk_usage_percentage-threshold-Override_'), field('tags._amba-node_disk_usage_percentage-threshold-Override_'), parameters('threshold'))]"
                }
              }
            }
          }
        }
      }
    }
  }
}



node_memory_rss_percentage - Metric Alert

Container RSS memory used in percent

Properties:

criterionTypeStaticThresholdCriterion
evaluationFrequencyPT1M
metricNamenode_memory_rss_percentage
metricNamespaceMicrosoft.ContainerService/managedClusters
operatorGreaterThan
severity3
threshold90
timeAggregationAverage
windowSizePT5M

References:

Templates:

{
  "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#",
  "contentVersion": "1.0.0.0",
  "parameters": {
    "alertName": {
      "type": "string",
      "minLength": 1,
      "metadata": {
        "description": "Name of the alert"
      }
    },
    "alertDescription": {
      "type": "string",
      "defaultValue": "Container RSS memory used in percent",
      "metadata": {
        "description": "Description of alert"
      }
    },
    "targetResourceId": {
      "type": "string",
      "minLength": 1,
      "metadata": {
        "description": "List of Azure resource Ids seperated by a comma. For example - /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroup/resource-group-name/Microsoft.compute/virtualMachines/vm-name"
      }
    },
    "targetResourceRegion": {
      "type": "string",
      "metadata": {
        "description": "Azure region in which target resources to be monitored are in (without spaces). For example: EastUS"
      }
    },
    "targetResourceType": {
      "type": "string",
      "minLength": 1,
      "metadata": {
        "description": "Resource type of target resources to be monitored."
      }
    },
    "isEnabled": {
      "type": "bool",
      "defaultValue": true,
      "metadata": {
        "description": "Specifies whether the alert is enabled"
      }
    },
    "alertSeverity": {
      "type": "int",
      "defaultValue": 3,
      "allowedValues": [
        0,
        1,
        2,
        3,
        4
      ],
      "metadata": {
        "description": "Severity of alert {0,1,2,3,4}"
      }
    },
    "operator": {
      "type": "string",
      "defaultValue": "GreaterThan",
      "allowedValues": [
        "Equals",
        "GreaterThan",
        "GreaterThanOrEqual",
        "LessThan",
        "LessThanOrEqual"
      ],
      "metadata": {
        "description": "Operator comparing the current value with the threshold value."
      }
    },
    "threshold": {
      "type": "string",
      "defaultValue": "90",
      "metadata": {
        "description": "The threshold value at which the alert is activated."
      }
    },
    "timeAggregation": {
      "type": "string",
      "defaultValue": "Average",
      "allowedValues": [
        "Average",
        "Minimum",
        "Maximum",
        "Total",
        "Count"
      ],
      "metadata": {
        "description": "How the data that is collected should be combined over time."
      }
    },
    "windowSize": {
      "type": "string",
      "defaultValue": "PT5M",
      "allowedValues": [
        "PT1M",
        "PT5M",
        "PT15M",
        "PT30M",
        "PT1H",
        "PT6H",
        "PT12H",
        "PT24H",
        "PT1D"
      ],
      "metadata": {
        "description": "Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format."
      }
    },
    "evaluationFrequency": {
      "type": "string",
      "defaultValue": "PT1M",
      "allowedValues": [
        "PT1M",
        "PT5M",
        "PT15M",
        "PT30M",
        "PT1H"
      ],
      "metadata": {
        "description": "how often the metric alert is evaluated represented in ISO 8601 duration format"
      }
    },
    "currentDateTimeUtcNow": {
      "type": "string",
      "defaultValue": "[utcNow()]",
      "metadata": {
          "description": "The current date and time using the utcNow function. Used for deployment name uniqueness"
      }
    },
    "telemetryOptOut": {
      "type": "string",
      "defaultValue": "No",
      "allowedValues": [
          "Yes",
          "No"
      ],
      "metadata": {
          "description": "The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry."
      }
    }
  },
  "variables": {
    "pidDeploymentName": "[take(concat('pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-', uniqueString(resourceGroup().id, parameters('alertName'), parameters('currentDateTimeUtcNow'))), 64)]",
    "varTargetResourceId": "[split(parameters('targetResourceId'), ',')]"
  },
  "resources": [
    {
      "type": "Microsoft.Insights/metricAlerts",
      "apiVersion": "2018-03-01",
      "name": "[parameters('alertName')]",
      "location": "global",
      "tags": {
        "_deployed_by_amba": true
      },
      "properties": {
        "description": "[parameters('alertDescription')]",
        "scopes": "[variables('varTargetResourceId')]",
        "targetResourceType": "[parameters('targetResourceType')]",
        "targetResourceRegion": "[parameters('targetResourceRegion')]",
        "severity": "[parameters('alertSeverity')]",
        "enabled": "[parameters('isEnabled')]",
        "evaluationFrequency": "[parameters('evaluationFrequency')]",
        "windowSize": "[parameters('windowSize')]",
        "criteria": {
          "odata.type": "Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria",
          "allOf": [
            {
              "name": "1st criterion",
              "metricName": "node_memory_rss_percentage",
              "dimensions": [],
              "operator": "[parameters('operator')]",
              "threshold": "[parameters('threshold')]",
              "timeAggregation": "[parameters('timeAggregation')]",
              "criterionType": "StaticThresholdCriterion"
            }
          ]
        }
      }
    },
    {
      "condition": "[equals(parameters('telemetryOptOut'), 'No')]",
      "apiVersion": "2023-07-01",
      "name": "[variables('pidDeploymentName')]",
      "type": "Microsoft.Resources/deployments",
      "properties": {
          "mode": "Incremental",
          "template": {
              "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#",
              "contentVersion": "1.0.0.0",
              "resources": []
          }
      }
    }
  ]
}
@description('Name of the alert')
@minLength(1)
param alertName string

@description('Description of alert')
param alertDescription string = 'Container RSS memory used in percent'

@description('Array of Azure resource Ids. For example - /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroup/resource-group-name/Microsoft.compute/virtualMachines/vm-name')
@minLength(1)
param targetResourceId array

@description('Azure region in which target resources to be monitored are in (without spaces). For example: EastUS')
param targetResourceRegion string

@description('Resource type of target resources to be monitored.')
@minLength(1)
param targetResourceType string

@description('Specifies whether the alert is enabled')
param isEnabled bool = true

@description('Severity of alert {0,1,2,3,4}')
@allowed([
  0
  1
  2
  3
  4
])
param alertSeverity int = 3

@description('Operator comparing the current value with the threshold value.')
@allowed([
  'Equals'
  'GreaterThan'
  'GreaterThanOrEqual'
  'LessThan'
  'LessThanOrEqual'
])
param operator string = 'GreaterThan'

@description('The threshold value at which the alert is activated.')
param threshold int = 90

@description('How the data that is collected should be combined over time.')
@allowed([
  'Average'
  'Minimum'
  'Maximum'
  'Total'
  'Count'
])
param timeAggregation string = 'Average'

@description('Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format.')
@allowed([
  'PT1M'
  'PT5M'
  'PT15M'
  'PT30M'
  'PT1H'
  'PT6H'
  'PT12H'
  'PT24H'
  'P1D'
])
param windowSize string = 'PT5M'

@description('how often the metric alert is evaluated represented in ISO 8601 duration format')
@allowed([
  'PT1M'
  'PT5M'
  'PT15M'
  'PT30M'
  'PT1H'
])
param evaluationFrequency string = 'PT1M'

@description('"The current date and time using the utcNow function. Used for deployment name uniqueness')
param currentDateTimeUtcNow string = utcNow()

@description('The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry.')
@allowed([
  'Yes'
  'No'
])
param telemetryOptOut string = 'No'

resource metricAlert 'Microsoft.Insights/metricAlerts@2018-03-01' = {
  name: alertName
  location: 'global'
  tags: {
    _deployed_by_amba: 'true'
  }
  properties: {
    description: alertDescription
    scopes: targetResourceId
    targetResourceType: targetResourceType
    targetResourceRegion: targetResourceRegion
    severity: alertSeverity
    enabled: isEnabled
    evaluationFrequency: evaluationFrequency
    windowSize: windowSize
    criteria: {
      'odata.type': 'Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria'
      allOf: [
        {
          name: '1st criterion'
          metricName: 'node_memory_rss_percentage'
          dimensions: [[]]
          operator: operator
          threshold: threshold
          timeAggregation: timeAggregation
          criterionType: 'StaticThresholdCriterion'
        }
      ]
    }
  }
}

var ambaTelemetryPidName = 'pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-${uniqueString(resourceGroup().id, alertName, currentDateTimeUtcNow)}'
resource ambaTelemetryPid 'Microsoft.Resources/deployments@2023-07-01' =  if (telemetryOptOut == 'No') {
  name: ambaTelemetryPidName
  tags: {
    _deployed_by_amba: 'true'
  }
  properties: {
    mode: 'Incremental'
    template: {
      '$schema': 'https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#'
      contentVersion: '1.0.0.0'
      resources: []
    }
  }
}
{
  "type": "Microsoft.Authorization/policyDefinitions",
  "apiVersion": "2021-06-01",
  "name": "e89e023e-117b-4db4-bfb2-853849e273f5",
  "properties": {
    "policyType": "Custom",
    "mode": "All",
    "displayName": "Deploy ContainerService managedClusters node_memory_rss_percentage Alert",
    "description": "Policy to Audit/Deploy ContainerService managedClusters node_memory_rss_percentage Alert",
    "metadata": {
      "version": "1.0.0-preview",
      "category": "ContainerService",
      "preview": true,
      "source": "https://github.com/Azure/azure-monitor-baseline-alerts/",
      "alzCloudEnvironments": [
        "AzureCloud"
      ],
      "_deployed_by_amba": "True"
    },
    "parameters": {
      "severity": {
        "type": "String",
        "metadata": {
          "displayName": "Severity",
          "description": "Severity of the Alert"
        },
        "allowedValues": [
          "0",
          "1",
          "2",
          "3",
          "4"
        ],
        "defaultValue": "3"
      },
      "windowSize": {
        "type": "String",
        "metadata": {
          "displayName": "Window Size",
          "description": "Window size for the alert"
        },
        "allowedValues": [
          "PT1M",
          "PT5M",
          "PT15M",
          "PT30M",
          "PT1H",
          "PT6H",
          "PT12H",
          "P1D"
        ],
        "defaultValue": "PT5M"
      },
      "evaluationFrequency": {
        "type": "String",
        "metadata": {
          "displayName": "Evaluation Frequency",
          "description": "Evaluation frequency for the alert"
        },
        "allowedValues": [
          "PT1M",
          "PT5M",
          "PT15M",
          "PT30M",
          "PT1H"
        ],
        "defaultValue": "PT1M"
      },
      "autoMitigate": {
        "type": "String",
        "metadata": {
          "displayName": "Auto Mitigate",
          "description": "Auto Mitigate for the alert"
        },
        "allowedValues": [
          "true",
          "false"
        ],
        "defaultValue": "true"
      },
      "enabled": {
        "type": "String",
        "metadata": {
          "displayName": "Alert State",
          "description": "Alert state for the alert"
        },
        "allowedValues": [
          "true",
          "false"
        ],
        "defaultValue": "true"
      },
      "threshold": {
        "type": "String",
        "metadata": {
          "displayName": "Threshold",
          "description": "Threshold for the alert"
        },
        "defaultValue": "90"
      },
      "effect": {
        "type": "String",
        "metadata": {
          "displayName": "Effect",
          "description": "Effect of the policy"
        },
        "allowedValues": [
          "deployIfNotExists",
          "disabled"
        ],
        "defaultValue": "deployIfNotExists"
      },
      "MonitorDisableTagName": {
        "type": "String",
        "metadata": {
          "displayName": "Monitoring disabled tag name",
          "description": "Tag name used to disable monitoring at the resource level. Set to true if monitoring should be disabled."
        },
        "defaultValue": "MonitorDisable"
      },
      "MonitorDisableTagValues": {
        "type": "Array",
        "metadata": {
          "displayName": "Monitoring disabled tag values(s)",
          "description": "Tag value(s) used to disable monitoring at the resource level. Set to true if monitoring should be disabled."
        },
        "defaultValue": [
          "true",
          "Test",
          "Dev",
          "Sandbox"
        ]
      }
    },
    "policyRule": {
      "if": {
        "allOf": [
          {
            "field": "type",
            "equals": "Microsoft.ContainerService/managedClusters"
          },
          {
            "field": "[[concat('tags[', parameters('MonitorDisableTagName'), ']')]",
            "notIn": "[[parameters('MonitorDisableTagValues')]"
          }
        ]
      },
      "then": {
        "effect": "[[parameters('effect')]",
        "details": {
          "roleDefinitionIds": [
            "/providers/Microsoft.Authorization/roleDefinitions/b24988ac-6180-42a0-ab88-20f7382dd24c"
          ],
          "type": "Microsoft.Insights/metricAlerts",
          "existenceCondition": {
            "allOf": [
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria.allOf[*].metricNamespace",
                "equals": "Microsoft.ContainerService/managedClusters"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria.allOf[*].metricName",
                "equals": "node_memory_rss_percentage"
              },
              {
                "field": "Microsoft.Insights/metricalerts/scopes[*]",
                "equals": "[[concat(subscription().id, '/resourceGroups/', resourceGroup().name, '/providers/Microsoft.ContainerService/managedClusters/', field('fullName'))]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/enabled",
                "equals": "[[parameters('enabled')]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/evaluationFrequency",
                "equals": "[[parameters('evaluationFrequency')]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/windowSize",
                "equals": "[[parameters('windowSize')]"
              },
              {
                "field": "Microsoft.Insights/metricalerts/severity",
                "equals": "[[parameters('severity')]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/autoMitigate",
                "equals": "[[parameters('autoMitigate')]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft-Azure-Monitor-SingleResourceMultipleMetricCriteria.allOf[*].timeAggregation",
                "equals": "Average"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria.allOf[*].StaticThresholdCriterion.operator",
                "equals": "GreaterThan"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria.allOf[*].StaticThresholdCriterion.threshold",
                "equals": "[[if(contains(field('tags'), '_amba-node_memory_rss_percentage-threshold-Override_'), field('tags._amba-node_memory_rss_percentage-threshold-Override_'), parameters('threshold'))]"
              }
            ]
          },
          "deployment": {
            "properties": {
              "mode": "incremental",
              "template": {
                "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#",
                "contentVersion": "1.0.0.0",
                "parameters": {
                  "resourceName": {
                    "type": "String",
                    "metadata": {
                      "displayName": "resourceName",
                      "description": "Name of the resource"
                    }
                  },
                  "resourceId": {
                    "type": "String",
                    "metadata": {
                      "displayName": "resourceId",
                      "description": "Resource ID of the resource emitting the metric that will be used for the comparison"
                    }
                  },
                  "severity": {
                    "type": "String"
                  },
                  "windowSize": {
                    "type": "String"
                  },
                  "evaluationFrequency": {
                    "type": "String"
                  },
                  "autoMitigate": {
                    "type": "String"
                  },
                  "enabled": {
                    "type": "String"
                  },
                  "threshold": {
                    "type": "String"
                  }
                },
                "variables": {},
                "resources": [
                  {
                    "type": "Microsoft.Insights/metricAlerts",
                    "apiVersion": "2018-03-01",
                    "name": "[[concat(parameters('resourceName'), '-node_memory_rss_percentage')]",
                    "location": "global",
                    "tags": {
                      "_deployed_by_amba": true
                    },
                    "properties": {
                      "description": "Metric Alert for ContainerService managedClusters node_memory_rss_percentage",
                      "severity": "[[parameters('severity')]",
                      "enabled": "[[parameters('enabled')]",
                      "scopes": [
                        "[[parameters('resourceId')]"
                      ],
                      "evaluationFrequency": "[[parameters('evaluationFrequency')]",
                      "windowSize": "[[parameters('windowSize')]",
                      "criteria": {
                        "allOf": [
                          {
                            "name": "node_memory_rss_percentage",
                            "metricNamespace": "Microsoft.ContainerService/managedClusters",
                            "metricName": "node_memory_rss_percentage",
                            "operator": "GreaterThan",
                            "threshold": "[[parameters('threshold')]",
                            "timeAggregation": "Average",
                            "criterionType": "StaticThresholdCriterion"
                          }
                        ],
                        "odata.type": "Microsoft.Azure.Monitor.SingleResourceMultipleMetricCriteria"
                      },
                      "autoMitigate": "[[parameters('autoMitigate')]",
                      "parameters": {
                        "severity": {
                          "value": "[[parameters('severity')]"
                        },
                        "windowSize": {
                          "value": "[[parameters('windowSize')]"
                        },
                        "evaluationFrequency": {
                          "value": "[[parameters('evaluationFrequency')]"
                        },
                        "autoMitigate": {
                          "value": "[[parameters('autoMitigate')]"
                        },
                        "enabled": {
                          "value": "[[parameters('enabled')]"
                        },
                        "threshold": {
                          "value": "[[parameters('threshold')]"
                        }
                      }
                    }
                  }
                ]
              },
              "parameters": {
                "resourceName": {
                  "value": "[[field('name')]"
                },
                "resourceId": {
                  "value": "[[field('id')]"
                },
                "severity": {
                  "value": "[[parameters('severity')]"
                },
                "windowSize": {
                  "value": "[[parameters('windowSize')]"
                },
                "evaluationFrequency": {
                  "value": "[[parameters('evaluationFrequency')]"
                },
                "autoMitigate": {
                  "value": "[[parameters('autoMitigate')]"
                },
                "enabled": {
                  "value": "[[parameters('enabled')]"
                },
                "threshold": {
                  "value": "[[if(contains(field('tags'), '_amba-node_memory_rss_percentage-threshold-Override_'), field('tags._amba-node_memory_rss_percentage-threshold-Override_'), parameters('threshold'))]"
                }
              }
            }
          }
        }
      }
    }
  }
}



node_memory_working_set_percentage - Metric Alert

Container working set memory used in percent

Properties:

criterionTypeStaticThresholdCriterion
evaluationFrequencyPT5M
metricNamenode_memory_working_set_percentage
metricNamespaceMicrosoft.ContainerService/managedClusters
operatorGreaterThan
severity3
threshold100
timeAggregationAverage
windowSizePT5M

References:

Templates:

{
  "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#",
  "contentVersion": "1.0.0.0",
  "parameters": {
    "alertName": {
      "type": "string",
      "minLength": 1,
      "metadata": {
        "description": "Name of the alert"
      }
    },
    "alertDescription": {
      "type": "string",
      "defaultValue": "Container working set memory used in percent",
      "metadata": {
        "description": "Description of alert"
      }
    },
    "targetResourceId": {
      "type": "string",
      "minLength": 1,
      "metadata": {
        "description": "List of Azure resource Ids seperated by a comma. For example - /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroup/resource-group-name/Microsoft.compute/virtualMachines/vm-name"
      }
    },
    "targetResourceRegion": {
      "type": "string",
      "metadata": {
        "description": "Azure region in which target resources to be monitored are in (without spaces). For example: EastUS"
      }
    },
    "targetResourceType": {
      "type": "string",
      "minLength": 1,
      "metadata": {
        "description": "Resource type of target resources to be monitored."
      }
    },
    "isEnabled": {
      "type": "bool",
      "defaultValue": true,
      "metadata": {
        "description": "Specifies whether the alert is enabled"
      }
    },
    "alertSeverity": {
      "type": "int",
      "defaultValue": 3,
      "allowedValues": [
        0,
        1,
        2,
        3,
        4
      ],
      "metadata": {
        "description": "Severity of alert {0,1,2,3,4}"
      }
    },
    "operator": {
      "type": "string",
      "defaultValue": "GreaterThan",
      "allowedValues": [
        "Equals",
        "GreaterThan",
        "GreaterThanOrEqual",
        "LessThan",
        "LessThanOrEqual"
      ],
      "metadata": {
        "description": "Operator comparing the current value with the threshold value."
      }
    },
    "threshold": {
      "type": "string",
      "defaultValue": "100",
      "metadata": {
        "description": "The threshold value at which the alert is activated."
      }
    },
    "timeAggregation": {
      "type": "string",
      "defaultValue": "Average",
      "allowedValues": [
        "Average",
        "Minimum",
        "Maximum",
        "Total",
        "Count"
      ],
      "metadata": {
        "description": "How the data that is collected should be combined over time."
      }
    },
    "windowSize": {
      "type": "string",
      "defaultValue": "PT5M",
      "allowedValues": [
        "PT1M",
        "PT5M",
        "PT15M",
        "PT30M",
        "PT1H",
        "PT6H",
        "PT12H",
        "PT24H",
        "PT1D"
      ],
      "metadata": {
        "description": "Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format."
      }
    },
    "evaluationFrequency": {
      "type": "string",
      "defaultValue": "PT5M",
      "allowedValues": [
        "PT1M",
        "PT5M",
        "PT15M",
        "PT30M",
        "PT1H"
      ],
      "metadata": {
        "description": "how often the metric alert is evaluated represented in ISO 8601 duration format"
      }
    },
    "currentDateTimeUtcNow": {
      "type": "string",
      "defaultValue": "[utcNow()]",
      "metadata": {
          "description": "The current date and time using the utcNow function. Used for deployment name uniqueness"
      }
    },
    "telemetryOptOut": {
      "type": "string",
      "defaultValue": "No",
      "allowedValues": [
          "Yes",
          "No"
      ],
      "metadata": {
          "description": "The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry."
      }
    }
  },
  "variables": {
    "pidDeploymentName": "[take(concat('pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-', uniqueString(resourceGroup().id, parameters('alertName'), parameters('currentDateTimeUtcNow'))), 64)]",
    "varTargetResourceId": "[split(parameters('targetResourceId'), ',')]"
  },
  "resources": [
    {
      "type": "Microsoft.Insights/metricAlerts",
      "apiVersion": "2018-03-01",
      "name": "[parameters('alertName')]",
      "location": "global",
      "tags": {
        "_deployed_by_amba": true
      },
      "properties": {
        "description": "[parameters('alertDescription')]",
        "scopes": "[variables('varTargetResourceId')]",
        "targetResourceType": "[parameters('targetResourceType')]",
        "targetResourceRegion": "[parameters('targetResourceRegion')]",
        "severity": "[parameters('alertSeverity')]",
        "enabled": "[parameters('isEnabled')]",
        "evaluationFrequency": "[parameters('evaluationFrequency')]",
        "windowSize": "[parameters('windowSize')]",
        "criteria": {
          "odata.type": "Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria",
          "allOf": [
            {
              "name": "1st criterion",
              "metricName": "node_memory_working_set_percentage",
              "dimensions": [],
              "operator": "[parameters('operator')]",
              "threshold": "[parameters('threshold')]",
              "timeAggregation": "[parameters('timeAggregation')]",
              "criterionType": "StaticThresholdCriterion"
            }
          ]
        }
      }
    },
    {
      "condition": "[equals(parameters('telemetryOptOut'), 'No')]",
      "apiVersion": "2023-07-01",
      "name": "[variables('pidDeploymentName')]",
      "type": "Microsoft.Resources/deployments",
      "properties": {
          "mode": "Incremental",
          "template": {
              "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#",
              "contentVersion": "1.0.0.0",
              "resources": []
          }
      }
    }
  ]
}
@description('Name of the alert')
@minLength(1)
param alertName string

@description('Description of alert')
param alertDescription string = 'Container working set memory used in percent'

@description('Array of Azure resource Ids. For example - /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroup/resource-group-name/Microsoft.compute/virtualMachines/vm-name')
@minLength(1)
param targetResourceId array

@description('Azure region in which target resources to be monitored are in (without spaces). For example: EastUS')
param targetResourceRegion string

@description('Resource type of target resources to be monitored.')
@minLength(1)
param targetResourceType string

@description('Specifies whether the alert is enabled')
param isEnabled bool = true

@description('Severity of alert {0,1,2,3,4}')
@allowed([
  0
  1
  2
  3
  4
])
param alertSeverity int = 3

@description('Operator comparing the current value with the threshold value.')
@allowed([
  'Equals'
  'GreaterThan'
  'GreaterThanOrEqual'
  'LessThan'
  'LessThanOrEqual'
])
param operator string = 'GreaterThan'

@description('The threshold value at which the alert is activated.')
param threshold int = 100

@description('How the data that is collected should be combined over time.')
@allowed([
  'Average'
  'Minimum'
  'Maximum'
  'Total'
  'Count'
])
param timeAggregation string = 'Average'

@description('Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format.')
@allowed([
  'PT1M'
  'PT5M'
  'PT15M'
  'PT30M'
  'PT1H'
  'PT6H'
  'PT12H'
  'PT24H'
  'P1D'
])
param windowSize string = 'PT5M'

@description('how often the metric alert is evaluated represented in ISO 8601 duration format')
@allowed([
  'PT1M'
  'PT5M'
  'PT15M'
  'PT30M'
  'PT1H'
])
param evaluationFrequency string = 'PT5M'

@description('"The current date and time using the utcNow function. Used for deployment name uniqueness')
param currentDateTimeUtcNow string = utcNow()

@description('The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry.')
@allowed([
  'Yes'
  'No'
])
param telemetryOptOut string = 'No'

resource metricAlert 'Microsoft.Insights/metricAlerts@2018-03-01' = {
  name: alertName
  location: 'global'
  tags: {
    _deployed_by_amba: 'true'
  }
  properties: {
    description: alertDescription
    scopes: targetResourceId
    targetResourceType: targetResourceType
    targetResourceRegion: targetResourceRegion
    severity: alertSeverity
    enabled: isEnabled
    evaluationFrequency: evaluationFrequency
    windowSize: windowSize
    criteria: {
      'odata.type': 'Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria'
      allOf: [
        {
          name: '1st criterion'
          metricName: 'node_memory_working_set_percentage'
          dimensions: [[]]
          operator: operator
          threshold: threshold
          timeAggregation: timeAggregation
          criterionType: 'StaticThresholdCriterion'
        }
      ]
    }
  }
}

var ambaTelemetryPidName = 'pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-${uniqueString(resourceGroup().id, alertName, currentDateTimeUtcNow)}'
resource ambaTelemetryPid 'Microsoft.Resources/deployments@2023-07-01' =  if (telemetryOptOut == 'No') {
  name: ambaTelemetryPidName
  tags: {
    _deployed_by_amba: 'true'
  }
  properties: {
    mode: 'Incremental'
    template: {
      '$schema': 'https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#'
      contentVersion: '1.0.0.0'
      resources: []
    }
  }
}
{
  "type": "Microsoft.Authorization/policyDefinitions",
  "apiVersion": "2021-06-01",
  "name": "9ae2dfbf-d69b-4802-8497-8b7836bef5e9",
  "properties": {
    "policyType": "Custom",
    "mode": "All",
    "displayName": "Deploy ContainerService managedClusters node_memory_working_set_percentage Alert",
    "description": "Policy to Audit/Deploy ContainerService managedClusters node_memory_working_set_percentage Alert",
    "metadata": {
      "version": "1.0.0-preview",
      "category": "ContainerService",
      "preview": true,
      "source": "https://github.com/Azure/azure-monitor-baseline-alerts/",
      "alzCloudEnvironments": [
        "AzureCloud"
      ],
      "_deployed_by_amba": "True"
    },
    "parameters": {
      "severity": {
        "type": "String",
        "metadata": {
          "displayName": "Severity",
          "description": "Severity of the Alert"
        },
        "allowedValues": [
          "0",
          "1",
          "2",
          "3",
          "4"
        ],
        "defaultValue": "3"
      },
      "windowSize": {
        "type": "String",
        "metadata": {
          "displayName": "Window Size",
          "description": "Window size for the alert"
        },
        "allowedValues": [
          "PT1M",
          "PT5M",
          "PT15M",
          "PT30M",
          "PT1H",
          "PT6H",
          "PT12H",
          "P1D"
        ],
        "defaultValue": "PT5M"
      },
      "evaluationFrequency": {
        "type": "String",
        "metadata": {
          "displayName": "Evaluation Frequency",
          "description": "Evaluation frequency for the alert"
        },
        "allowedValues": [
          "PT1M",
          "PT5M",
          "PT15M",
          "PT30M",
          "PT1H"
        ],
        "defaultValue": "PT5M"
      },
      "autoMitigate": {
        "type": "String",
        "metadata": {
          "displayName": "Auto Mitigate",
          "description": "Auto Mitigate for the alert"
        },
        "allowedValues": [
          "true",
          "false"
        ],
        "defaultValue": "true"
      },
      "enabled": {
        "type": "String",
        "metadata": {
          "displayName": "Alert State",
          "description": "Alert state for the alert"
        },
        "allowedValues": [
          "true",
          "false"
        ],
        "defaultValue": "true"
      },
      "threshold": {
        "type": "String",
        "metadata": {
          "displayName": "Threshold",
          "description": "Threshold for the alert"
        },
        "defaultValue": "100"
      },
      "effect": {
        "type": "String",
        "metadata": {
          "displayName": "Effect",
          "description": "Effect of the policy"
        },
        "allowedValues": [
          "deployIfNotExists",
          "disabled"
        ],
        "defaultValue": "deployIfNotExists"
      },
      "MonitorDisableTagName": {
        "type": "String",
        "metadata": {
          "displayName": "Monitoring disabled tag name",
          "description": "Tag name used to disable monitoring at the resource level. Set to true if monitoring should be disabled."
        },
        "defaultValue": "MonitorDisable"
      },
      "MonitorDisableTagValues": {
        "type": "Array",
        "metadata": {
          "displayName": "Monitoring disabled tag values(s)",
          "description": "Tag value(s) used to disable monitoring at the resource level. Set to true if monitoring should be disabled."
        },
        "defaultValue": [
          "true",
          "Test",
          "Dev",
          "Sandbox"
        ]
      }
    },
    "policyRule": {
      "if": {
        "allOf": [
          {
            "field": "type",
            "equals": "Microsoft.ContainerService/managedClusters"
          },
          {
            "field": "[[concat('tags[', parameters('MonitorDisableTagName'), ']')]",
            "notIn": "[[parameters('MonitorDisableTagValues')]"
          }
        ]
      },
      "then": {
        "effect": "[[parameters('effect')]",
        "details": {
          "roleDefinitionIds": [
            "/providers/Microsoft.Authorization/roleDefinitions/b24988ac-6180-42a0-ab88-20f7382dd24c"
          ],
          "type": "Microsoft.Insights/metricAlerts",
          "existenceCondition": {
            "allOf": [
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria.allOf[*].metricNamespace",
                "equals": "Microsoft.ContainerService/managedClusters"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria.allOf[*].metricName",
                "equals": "node_memory_working_set_percentage"
              },
              {
                "field": "Microsoft.Insights/metricalerts/scopes[*]",
                "equals": "[[concat(subscription().id, '/resourceGroups/', resourceGroup().name, '/providers/Microsoft.ContainerService/managedClusters/', field('fullName'))]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/enabled",
                "equals": "[[parameters('enabled')]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/evaluationFrequency",
                "equals": "[[parameters('evaluationFrequency')]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/windowSize",
                "equals": "[[parameters('windowSize')]"
              },
              {
                "field": "Microsoft.Insights/metricalerts/severity",
                "equals": "[[parameters('severity')]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/autoMitigate",
                "equals": "[[parameters('autoMitigate')]"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft-Azure-Monitor-SingleResourceMultipleMetricCriteria.allOf[*].timeAggregation",
                "equals": "Average"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria.allOf[*].StaticThresholdCriterion.operator",
                "equals": "GreaterThan"
              },
              {
                "field": "Microsoft.Insights/metricAlerts/criteria.Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria.allOf[*].StaticThresholdCriterion.threshold",
                "equals": "[[if(contains(field('tags'), '_amba-node_memory_working_set_percentage-threshold-Override_'), field('tags._amba-node_memory_working_set_percentage-threshold-Override_'), parameters('threshold'))]"
              }
            ]
          },
          "deployment": {
            "properties": {
              "mode": "incremental",
              "template": {
                "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#",
                "contentVersion": "1.0.0.0",
                "parameters": {
                  "resourceName": {
                    "type": "String",
                    "metadata": {
                      "displayName": "resourceName",
                      "description": "Name of the resource"
                    }
                  },
                  "resourceId": {
                    "type": "String",
                    "metadata": {
                      "displayName": "resourceId",
                      "description": "Resource ID of the resource emitting the metric that will be used for the comparison"
                    }
                  },
                  "severity": {
                    "type": "String"
                  },
                  "windowSize": {
                    "type": "String"
                  },
                  "evaluationFrequency": {
                    "type": "String"
                  },
                  "autoMitigate": {
                    "type": "String"
                  },
                  "enabled": {
                    "type": "String"
                  },
                  "threshold": {
                    "type": "String"
                  }
                },
                "variables": {},
                "resources": [
                  {
                    "type": "Microsoft.Insights/metricAlerts",
                    "apiVersion": "2018-03-01",
                    "name": "[[concat(parameters('resourceName'), '-node_memory_working_set_percentage')]",
                    "location": "global",
                    "tags": {
                      "_deployed_by_amba": true
                    },
                    "properties": {
                      "description": "Metric Alert for ContainerService managedClusters node_memory_working_set_percentage",
                      "severity": "[[parameters('severity')]",
                      "enabled": "[[parameters('enabled')]",
                      "scopes": [
                        "[[parameters('resourceId')]"
                      ],
                      "evaluationFrequency": "[[parameters('evaluationFrequency')]",
                      "windowSize": "[[parameters('windowSize')]",
                      "criteria": {
                        "allOf": [
                          {
                            "name": "node_memory_working_set_percentage",
                            "metricNamespace": "Microsoft.ContainerService/managedClusters",
                            "metricName": "node_memory_working_set_percentage",
                            "operator": "GreaterThan",
                            "threshold": "[[parameters('threshold')]",
                            "timeAggregation": "Average",
                            "criterionType": "StaticThresholdCriterion"
                          }
                        ],
                        "odata.type": "Microsoft.Azure.Monitor.SingleResourceMultipleMetricCriteria"
                      },
                      "autoMitigate": "[[parameters('autoMitigate')]",
                      "parameters": {
                        "severity": {
                          "value": "[[parameters('severity')]"
                        },
                        "windowSize": {
                          "value": "[[parameters('windowSize')]"
                        },
                        "evaluationFrequency": {
                          "value": "[[parameters('evaluationFrequency')]"
                        },
                        "autoMitigate": {
                          "value": "[[parameters('autoMitigate')]"
                        },
                        "enabled": {
                          "value": "[[parameters('enabled')]"
                        },
                        "threshold": {
                          "value": "[[parameters('threshold')]"
                        }
                      }
                    }
                  }
                ]
              },
              "parameters": {
                "resourceName": {
                  "value": "[[field('name')]"
                },
                "resourceId": {
                  "value": "[[field('id')]"
                },
                "severity": {
                  "value": "[[parameters('severity')]"
                },
                "windowSize": {
                  "value": "[[parameters('windowSize')]"
                },
                "evaluationFrequency": {
                  "value": "[[parameters('evaluationFrequency')]"
                },
                "autoMitigate": {
                  "value": "[[parameters('autoMitigate')]"
                },
                "enabled": {
                  "value": "[[parameters('enabled')]"
                },
                "threshold": {
                  "value": "[[if(contains(field('tags'), '_amba-node_memory_working_set_percentage-threshold-Override_'), field('tags._amba-node_memory_working_set_percentage-threshold-Override_'), parameters('threshold'))]"
                }
              }
            }
          }
        }
      }
    }
  }
}