Skip to content

Collector Config

Collector is configured with a TOML-formatted file. In Kubernetes deployments, this is typically within a ConfigMap mounted into the collector pod. A default config can be generated by running ./collector config.

Global Config

This is the top level configuration for the collector. The only required fields are Endpoint and StorageDir.

# Ingestor URL to send collected telemetry.
endpoint = 'https://ingestor.adx-mon.svc.cluster.local'
# Path to kubernetes client config
kube-config = '.kube/config'
# Skip TLS verification.
insecure-skip-verify = true
# Address to listen on for endpoints.
listen-addr = ':8080'
# Region is a location identifier.
region = 'eastus'
# Optional path to the TLS key file.
tls-key-file = '/etc/certs/collector.key'
# Optional path to the TLS cert bundle file.
tls-cert-file = '/etc/certs/collector.pem'
# Maximum number of connections to accept.
max-connections = 100
# Maximum number of samples to send in a single batch.
max-batch-size = 1000
# Max segment agent in seconds.
max-segment-age-seconds = 30
# Maximum segment size in bytes.
max-segment-size = 52428800
# Maximum allowed size in bytes of all segments on disk.
max-disk-usage = 53687091200
# Interval to flush the WAL. (default 100)
wal-flush-interval-ms = 100
# Storage directory for the WAL and log cursors.
storage-dir = '/var/lib/adx-mon'
# Enable pprof endpoints.
enable-pprof = true
# Default to dropping all metrics.  Only metrics matching a keep rule will be kept.
default-drop-metrics = false
# Global Regexes of metrics to drop.
drop-metrics = [
  '^kube_pod_ips$',
  'etcd_grpc.*'
]
# Global Regexes of metrics to keep.
keep-metrics = [
  'nginx.*'
]
# Attributes lifted from the Body field and added to Attributes.
lift-attributes = [
  'host'
]

# Global Key/value pairs of labels to add to all metrics.
[add-labels]
  collectedBy = 'collector'

# Global labels to drop if they match a metrics regex in the format <metrics regex>=<label name>. These are dropped from all metrics collected by this agent
[drop-labels]
  podname = '.*'

# Global Regexes of metrics to keep if they have the given label and value. These are kept from all metrics collected by this agent
[[keep-metrics-with-label-value]]
  # The regex to match the label value against.  If the label value matches, the metric will be kept.
  label-regex = 'owner'
  # The regex to match the label value against.  If the label value matches, the metric will be kept.
  value-regex = 'platform'

[[keep-metrics-with-label-value]]
  # The regex to match the label value against.  If the label value matches, the metric will be kept.
  label-regex = 'type'
  # The regex to match the label value against.  If the label value matches, the metric will be kept.
  value-regex = 'frontend|backend'

# Global labels to lift from the metric to top level columns
[[lift-labels]]
  # The name of the label to lift.
  name = 'Host'
  # The name of the column to lift the label to.
  column = ''

[[lift-labels]]
  # The name of the label to lift.
  name = 'cluster_name'
  # The name of the column to lift the label to.
  column = 'Cluster'

# Key/value pairs of attributes to add to all logs.
[add-attributes]
  cluster = 'cluster1'
  geo = 'eu'

Prometheus Scrape

Prometheus scrape discovers pods with the adx-mon/scrape annotation as well as any defined static scrape targets. It ships any metrics to the defined ADX database.

# Defines a prometheus format endpoint scraper.
[prometheus-scrape]
  # Database to store metrics in.
  database = 'Metrics'
  # Scrape interval in seconds.
  scrape-interval = 10
  # Scrape timeout in seconds.
  scrape-timeout = 5
  # Disable metrics forwarding to endpoints.
  disable-metrics-forwarding = false
  # Disable discovery of kubernetes pod targets.
  disable-discovery = false
  # Regexes of metrics to drop.
  drop-metrics = [
    '^kube_pod_ips$',
    'etcd_grpc.*'
  ]
  # Regexes of metrics to keep.
  keep-metrics = [
    'nginx.*'
  ]

  # Defines a static scrape target.
  [[prometheus-scrape.static-scrape-target]]
    # The regex to match the host name against.  If the hostname matches, the URL will be scraped.
    host-regex = '.*'
    # The URL to scrape.
    url = 'http://localhost:9090/metrics'
    # The namespace label to add for metrics scraped at this URL.
    namespace = 'monitoring'
    # The pod label to add for metrics scraped at this URL.
    pod = 'host-monitor'
    # The container label to add for metrics scraped at this URL.
    container = 'host-monitor'

  # Regexes of metrics to keep if they have the given label and value.
  [[prometheus-scrape.keep-metrics-with-label-value]]
    # The regex to match the label value against.  If the label value matches, the metric will be kept.
    label-regex = 'owner'
    # The regex to match the label value against.  If the label value matches, the metric will be kept.
    value-regex = 'platform'

  [[prometheus-scrape.keep-metrics-with-label-value]]
    # The regex to match the label value against.  If the label value matches, the metric will be kept.
    label-regex = 'type'
    # The regex to match the label value against.  If the label value matches, the metric will be kept.
    value-regex = 'frontend|backend'

Prometheus Remote Write

Prometheus remote write accepts metrics from Prometheus remote write protocol. It ships metrics to the defined ADX database.

# Defines a prometheus remote write endpoint.
[[prometheus-remote-write]]
  # Database to store metrics in.
  database = 'Metrics'
  # The path to listen on for prometheus remote write requests.  Defaults to /receive.
  path = '/receive'
  # Regexes of metrics to drop.
  drop-metrics = [
    '^kube_pod_ips$',
    'etcd_grpc.*'
  ]
  # Regexes of metrics to keep.
  keep-metrics = [
    'nginx.*'
  ]

  # Key/value pairs of labels to add to all metrics.
  [prometheus-remote-write.add-labels]
    cluster = 'cluster1'

  # Labels to drop if they match a metrics regex in the format <metrics regex>=<label name>.
  [prometheus-remote-write.drop-labels]
    podname = '.*'

  # Regexes of metrics to keep if they have the given label and value.
  [[prometheus-remote-write.keep-metrics-with-label-value]]
    # The regex to match the label value against.  If the label value matches, the metric will be kept.
    label-regex = 'owner'
    # The regex to match the label value against.  If the label value matches, the metric will be kept.
    value-regex = 'platform'

  [[prometheus-remote-write.keep-metrics-with-label-value]]
    # The regex to match the label value against.  If the label value matches, the metric will be kept.
    label-regex = 'type'
    # The regex to match the label value against.  If the label value matches, the metric will be kept.
    value-regex = 'frontend|backend'

Otel Log

The Otel log endpoint accepts OTLP/HTTP logs from an OpenTelemetry sender. By default, this listens under the path /v1/logs.

# Defines an OpenTelemetry log endpoint. Accepts OTLP/HTTP.
[otel-log]
  # Attributes lifted from the Body and added to Attributes.
  lift-attributes = [
    'host'
  ]

  # Key/value pairs of attributes to add to all logs.
  [otel-log.add-attributes]
    cluster = 'cluster1'
    geo = 'eu'

Otel Metrics

The Otel metrics endpoint accepts OTLP/HTTP and/or OTLP/gRPC metrics from an OpenTelemetry sender.

# Defines an OpenTelemetry metric endpoint. Accepts OTLP/HTTP and/or OTLP/gRPC.
[[otel-metric]]
  # Database to store metrics in.
  database = 'Metrics'
  # The path to listen on for OTLP/HTTP requests.
  path = '/v1/otlpmetrics'
  # The port to listen on for OTLP/gRPC requests.
  grpc-port = 4317
  # Regexes of metrics to drop.
  drop-metrics = [
    '^kube_pod_ips$',
    'etcd_grpc.*'
  ]
  # Regexes of metrics to keep.
  keep-metrics = [
    'nginx.*'
  ]

  # Key/value pairs of labels to add to all metrics.
  [otel-metric.add-labels]
    cluster = 'cluster1'

  # Labels to drop if they match a metrics regex in the format <metrics regex>=<label name>.  These are dropped from all metrics collected by this agent
  [otel-metric.drop-labels]
    podname = '.*'

  # Regexes of metrics to keep if they have the given label and value.
  [[otel-metric.keep-metrics-with-label-value]]
    # The regex to match the label value against.  If the label value matches, the metric will be kept.
    label-regex = 'owner'
    # The regex to match the label value against.  If the label value matches, the metric will be kept.
    value-regex = 'platform'

  [[otel-metric.keep-metrics-with-label-value]]
    # The regex to match the label value against.  If the label value matches, the metric will be kept.
    label-regex = 'type'
    # The regex to match the label value against.  If the label value matches, the metric will be kept.
    value-regex = 'frontend|backend'

Host Log

The host log config configures file and journald log collection. By default, Kubernetes pods with adx-mon/log-destination annotation will have their logs scraped and sent to the appropriate destinations.

# Defines a host log scraper.
[[host-log]]
  # Disable discovery of Kubernetes pod targets. Only one HostLog configuration can use Kubernetes discovery.
  disable-kube-discovery = false
  # Defines a list of transforms to apply to log lines.
  transforms = []

  # Key/value pairs of attributes to add to all logs.
  [host-log.add-attributes]
    cluster = 'cluster1'
    geo = 'eu'

  # Defines a tail file target.
  [[host-log.file-target]]
    # The path to the file to tail.
    file-path = '/var/log/nginx/access.log'
    # The type of log being output. This defines how timestamps and log messages are extracted from structured log types like docker json files. Options are: docker, plain.
    log-type = 'plain'
    # Database to store logs in.
    database = 'Logs'
    # Table to store logs in.
    table = 'NginxAccess'
    # Parsers to apply sequentially to the log line.
    parsers = []

  [[host-log.file-target]]
    # The path to the file to tail.
    file-path = '/var/log/myservice/service.log'
    # The type of log being output. This defines how timestamps and log messages are extracted from structured log types like docker json files. Options are: docker, plain.
    log-type = 'plain'
    # Database to store logs in.
    database = 'Logs'
    # Table to store logs in.
    table = 'NginxAccess'
    # Parsers to apply sequentially to the log line.
    parsers = [
      'json'
    ]

  # Defines a journal target to scrape.
  [[host-log.journal-target]]
    # Matches for the journal reader based on journalctl MATCHES. To select a systemd unit, use the field _SYSTEMD_UNIT. (e.g. '_SYSTEMD_UNIT=avahi-daemon.service' for selecting logs from the avahi-daemon service.)
    matches = [
      '_SYSTEMD_UNIT=docker.service',
      '_TRANSPORT=journal'
    ]
    # Database to store logs in.
    database = 'Logs'
    # Table to store logs in.
    table = 'Docker'
    # Parsers to apply sequentially to the log line.
    parsers = []