Configuration file

M3 Coordinator and M3 Query share the same configuration options.

# The server listen address
listenAddress: <url>

# Options for emitting metrics
metrics:
  # Metrics scope
  scope:
    # Prefix prepended to metrics collected
    prefix: <string>
    # Reporting frequendy of metrics collected
    reportingInterval: <duration>
    # Tags shared by metrics collected
    tags: <map of strings>
  # Configuration for a Prometheus reporter (if used)
  prometheus:
    # Metrics collection endpoint for application
    # Default = "/metrics"
    handlerPath: <string>
    # Listen address for metrics
    # Default = "0.0.0.0:7203"
    listenAddress: <url>
    # The default Prometheus type to use for Tally timers, valid options: [histogram, summary]
    timerType: <string>
    # If specified sets the default histogram objectives used by the reporter
    defaultHistogramBuckets:
      # Upper bound of the bucket
      upper: <float>
    # If specified sets the default summary objectives used by the reporter
    defaultSummaryObjectives:
      percentile: <float>
      allowedError: <float>
    # What to do with errors when listening on the specified listen address or registering a metric with Prometheus, valid options: [stderr, log, none]
    # Default = panic and stop exceution of go routine
    onError: <string>
  # Metric sanitization type, valid options: [none, m3, prometheus]
  # Default = "none"
  sanitization: <string>
  # Metrics sampling rate. min=0.0, max=1.0
  samplingRate: <float>
  # Enable Go runtime metrics, valid options: [none, simple, moderate, detailed]
  # See https://github.com/m3db/m3/blob/master/src/x/instrument/extended.go#L39:L64 for more details
  extended: <string>

# Logging configuration
logging:
  # Log file location
  file: <string>
  # Error logging level
  level: <string>
  # Key-value pairs to send to logging
  fields: <map_of_strings>

# Enables tracing, if nothing configured, tracing is disabled
tracing:
  # Name for tracing service
  serviceName: <string>
  # Tracing backen to use, valid options: [jaeger, lightstep]
  backend: <string>
  # If using Jaeger, options to send to tracing backend
  jaeger:
  # If using Lightstep, options to send to tracing backend
  lightstep:

clusters:
    client:
      config:
        service:
          # Configures the etcd keyspace, valid options: [default_env, user_defined]
          # default_env is bare metal single node, user-defined matches a kubernetes-namespace/cluster-name pattern
          env: <string>
          # Availability zone, valid options: [user-defined, embedded]
          # user-defined should match the availabilty zone for your hosting proider
          zone: <string>
          # Service name
          service: <string>
          # Directory to store cached etcd data
          cacheDir: <string>
          # Identify the etcd hosts this node should connect to
          etcdClusters:
            # Availability zone, valid options: [user-defined, embedded]
            # user-defined should match the availabilty zone for your hosting proider
            - zone: <string>
              # Member nodes of the etcd cluster, in form url:port
              endpoints:
                - <url>
              # Keep alive header behavior
              keepAlive:
                # Enable keep alive
                enabled: <bool>
                # Duration of time after which if the client doesn't see any activity the client pings the server to see if transport is alive.
                period: <duration>
                # Add jittering to keep alive period to avoid a large number of clients sending keepalive probes at the same time.
                jitter: <duration>
                # Time the client waits for a response for the keep alive probe. If the response is not received in this time, the connection is closed.
                timeout: <duration>
              # TLS configuration
              tls:
                # Certificate path
                crtPath: <string>
                # Certificiate authority path
                caCrtPath: <string>
                # Key store path
                keyPath: <string>
              autoSyncInterval: <duration>    
        # Seed node configuration, mostly used for single node setups
        seedNodes:
          # Path to key-value store directory
          rootDir: <string>
          initialAdvertisePeerUrls: <array_of_strings>
          advertiseClientUrls: <array_of_strings>
          listenPeerUrls: <array_of_strings>
          listenClientUrls: <array_of_strings>
          # A seed node for the cluster
          initialCluster:
            # Identifier for node
            - hostID: <string>
              # URL of node
              endpoint: <url>
              # Specify that this is an existing cluster, or leave blank if it is now, valid options: [existing]
              clusterState: <string>
          # Seed node client security configuration
          clientTransportSecurity:
            # Certificiate authority path
            caFile: <string>
            # Certificate path
            certFile: <string>
            # Key store path
            keyFile: <string>
            trustedCaFile: <string>
            clientCertAuth: <bool>
            autoTls: <bool>
          # Seed node peer security configuration
          peerTransportSecurity:
            # Certificiate authority path
            caFile: <string>
            # Certificate path
            certFile: <string>
            # Key store path
            keyFile: <string>
            trustedCaFile: <string>
            clientCertAuth: <bool>
            autoTls: <bool>
      # The consistency level for writing to a cluster, valid options: [none, one, majority, all]
      writeConsistencyLevel: <string>
      # The consistency level for reading from a cluster, valid options: [none, one, unstrict_majority, majority, unstrict_all, all]
      readConsistencyLevel: <string>
      # The timeout for writing data
      # Default = 10s
      writeTimeout: <duration>
      # The fetch timeout for any given query
      # Range = 30s to 5m
      fetchTimeout: <duration>
      # The cluster connect timeout    
      connectTimeout: <duration>
      # Configuration for retrying write operations
      writeRetry:
        # Defaults to 5s.
        initialBackoff: <duration>
        # Factor for exponential backoff
        backoffFactor: <float>
        # Maximum backoff time
        maxBackoff: <duration>
        # Maximum retry attempts
        maxRetries: <int>
        # Retry connection forever until the attempt succeeds, or the retry condition becomes false
        forever: <bool>        
        # Add randomness to wait intervals
        jitter: <bool>
      # Configuration for retrying fetch operations
      fetchRetry:
        initialBackoff: <duration>
        # Factor for exponential backoff
        backoffFactor: <float>
        # Maximum backoff time
        maxBackoff: <duration>
        # Maximum retry attempts
        maxRetries: <int>
        # Retry connection forever until the attempt succeeds, or the retry condition becomes false
        forever: <bool>        
        # Add randomness to wait intervals
        jitter: <bool>
      # The amount of times a background check fails before a connection is taken out of consideration
      backgroundHealthCheckFailLimit: <int>
      # The factor of the host connect time when sleeping between a failed health check and the next check
      backgroundHealthCheckFailThrottleFactor: <float>

# Local embedded configuration if running embedded coordinator
local:
  # Describes the namespaces in a static cluster
  namespaces:
    # Name for the namespace
    namespace: <string>
    # The type of values stored by the namespace, current, valid options: [unaggregated, aggregated]
    type: <string>
    # How long to store metrics data
    retention: <duration>
    # Metrics sampling resolution
    resolution: <duration>
    # Configuration for downsampling options on an aggregated cluster namespace
    downsample:
      all: <bool>

# Configuration for the placemement, namespaces and database management endpoints.
clusterManagement:
  # etcd client configuration
  etcd:
    # Configures the etcd keyspace, valid options: [default_env, user_defined]
    # default_env is bare metal single node, user-defined matches a kubernetes-namespace/cluster-name pattern
    env: <string>
    # Availability zone, valid options: [user-defined, embedded]
    # user-defined should match the availabilty zone for your hosting proider
    zone: <string>
    # Service name
    service: <string>
    # Directory to store cached etcd data
    cacheDir: <string>
    # Identify the etcd hosts this node should connect to
    etcdClusters:
      # Availability zone, valid options: [user-defined, embedded]
      # user-defined should match the availabilty zone for your hosting proider
      - zone: <string>
        # Member nodes of the etcd cluster, in form url:port
        endpoints:
          - <url>
        # Keep alive header behavior
        keepAlive:
          # Enable keep alive
          enabled: <bool>
          # Duration of time after which if the client doesn't see any activity the client pings the server to see if transport is alive.
          period: <duration>
          # Add jittering to keep alive period to avoid a large number of clients sending keepalive probes at the same time.
          jitter: <duration>
          # Time the client waits for a response for the keep alive probe. If the response is not received in this time, the connection is closed.
          timeout: <duration>
        # TLS configuration
        tls:
          # Certificate path
          crtPath: <string>
          # Certificiate authority path
          caCrtPath: <string>
          # Key store path
          keyPath: <string>
        autoSyncInterval: <duration>    
    # M3 service discovery configuration
    m3sd:
      # Initialization timeout
      # Default = 5s
      initTimeout: <duration>
    # The revision that watch requests start from
    watchWithRevision: <int>
    # Changes permissions and mode of cache directory
    newDirectoryMode: <string>
    # Configuration for retrying connection operations
    retry:
      initialBackoff: <duration>
      # Factor for exponential backoff
      backoffFactor: <float>
      # Maximum backoff time
      maxBackoff: <duration>
      # Maximum retry attempts
      maxRetries: <int>
      # Retry connection forever until the attempt succeeds, or the retry condition becomes false
      forever: <bool>      
      # Add randomness to wait intervals
      jitter: <bool>
    # The timeout for etcd requests
    requestTimeout: <duration>
    # The timeout for a watchChan initialization
    # Default = 10s
    watchChanInitTimeout: <duration>
    # Frequency to check if a watch chan is no longer subscribed and should be closed
    # Default = 10s    
    watchChanCheckInterval: <duration>
    # The delay before resetting the etcd watch chan
    # Default = 10s  
    watchChanResetInterval: <duration>

# Filters for write/read/complete tags storage filters
# All have the same configuration, so only explained once
filter:
  read:
    # Specifies to use local only storage
    local_only: <string>
    # Specifies to use remote only storage
    remote_only: <string>
    # Specifies to use all storage
    allow_all: <string>
    # Specifies to use no storage
    allow_none: <string>
  write:
  completeTags:

# The RPC configuration for the coordinator for the GRPC server used for remote coordinator to coordinator calls
rpc:
  # Enable coordinator RPC for remote calls
  enabled: <bool>
  # The RPC server listen address
  listenAddress: <url>
  # Configuration settings for remote coordinator zones
  remotes:
    name: <string>
    # Remote listen addresses to call for remote coordinator calls in the zone
    remoteListenAddresses: <array of urls>
    # Overrides the default error behavior for this host, valid options: [fail, warning, container]
    # Default = warning
    errorBehavior: <string>
  # Overrides the default error behavior for all rpc hosts, valid options: [fail, warning, container]
  # Default = warning
  errorBehavior: <string>
  # Enable reflection on the GRPC server, useful for testing connectivity with grpcurl, etc.
  reflectionEnabled: <bool>


# Configures methods to contact remote coordinators to distribute M3 clusters across data centers
# Backend store for query service, valid options: [grpc, m3db, noop-etcd, prom-remote].
# Default = m3db
backend: <string>

# Configures Prometheus Remote backend when prom-remote backend is used.
prometheusRemoteBackend:
  # Array of Prometheus remote write compatible endpoints.
  # If storage policy is specified for endpoint only aggregated data matching policy will be sent to it.
  # Endpoints which do not specify storagePolicy will receive unaggregated writes.
  endpoints:
      # Unique endpoint name
    - name: <string>
      # HTTP url of and endpoint that accepts Prometheus remote writes.
      address: <url>
      # Optional configuration to configure
      storagePolicy:
        # How long to store metrics data. This is only used to filter endpoints.
        retention: <duration>
        # Metrics sampling resolution. This is only used to filter endpoints.
        resolution: <duration>
        # Configuration for downsampling options on an aggregated endpoint
        # If not specified will default to all=true
        downsample:
          all: <bool>

# The worker pool policy for read requests
readWorkerPoolPolicy:
  # Worker pool automatically grows to capacity
  grow: <bool>
  # Static pool size, or initial size for dynamically growing pools
  size: <int>

# The worker pool policy for write requests
writeWorkerPoolPolicy:
  # Worker pool automatically grows to capacity
  grow: <bool>
  # Static pool size, or initial size for dynamically growing pools
  size: <int>

# Write forwarding to other Prometheus backends for mirroring, high availability etc
writeForwarding:
  # Forwarding options for prometheus write handler
  promRemoteWrite:
    maxConcurrency: <int>
    timeout: <duration>
    # Configuration for retrying connection operations
    retry:
      initialBackoff: <duration>
      # Factor for exponential backoff
      backoffFactor: <float>
      # Maximum backoff time
      maxBackoff: <duration>
      # Maximum retry attempts
      maxRetries: <int>
      # Retry connection forever until the attempt succeeds, or the retry condition becomes false
      forever: <bool>      
      # Add randomness to wait intervals
      jitter: <bool>
    # Prometheus write handler forwarder target
    targets:
      # URL of the target to send to
      url: <string>
      # Method defaults to POST if not set
      method: <string>
      # Headers to send with requests to the target
      headers: <map of strings>

# How to downsample metrics
downsample:
  # The configuration for the downsampler matcher
  matcher:
  # Downsample rules which overrides any rules set in the KV store
  rules:
    # Rules (multiple) that set retention and resolution for metrics given a filter to match metrics against
    mappingRules:
      # String separated label name to label value glob patterns to filter the mapping rule
      filter: <string>
      # Aggregations to apply to the set of metrics
      # Read https://m3db.io/docs/operational_guide/mapping_rollup/ for full list
      aggregations: <array_of_strings>
      # Retention/resolution storage policies to keep matched metrics
      storagePolicies:
        # How long to store metrics data
        retention: <duration>
        # Metrics sampling resolution
        resolution: <duration>
      # Drop any metrics that match the filter rather than keeping them with a storage policy
      drop: <bool>
      # Tags to add to the metric while applying the mapping rule
      tags: <array_of_strings>
      # Name for the mapping rule
      name: <string>
    # Rules (multiple) that sets aggregations for sets of metrics given a filter to match metrics against
    rollupRules:
      # String separated label name to label value glob patterns to filter the mapping rule
      filter: <string>
      # A set of of rollup rule transforms
      transforms:
        rollup:
          # Name of the new metric emitted after the rollup is applied with its aggregations and group bys
          metricName: <string>
          # Set of labels to group by, only these remain on the new metric name produced by the rollup operation
          groupBy: <array_of_strings>
          # Aggregations to apply to the set of metrics
          # Read https://m3db.io/docs/operational_guide/mapping_rollup/ for full list
          aggregations: <array_of_strings>
        # Aggregation operation type
        aggregate:
          # Read https://m3db.io/docs/operational_guide/mapping_rollup/ for full list
          type: <string>
        # Transform operation type
        transform:
          # Read https://m3db.io/docs/operational_guide/mapping_rollup/ for full list
          type: <string>
      # Retention/resolution storage policies to keep matched metrics
      storagePolicies:
        # How long to store metrics data
        retention: <duration>
        # Metrics sampling resolution
        resolution: <duration>
      # Name for the rollup rule
      name: <string>
  # Pool of counter elements
  counterElemPool:
    # Size of the pool
    size: <int>
    watermark:
      # The low watermark to start refilling the pool
      # min=0.0, max=1.0
      low: <float>
      # The high watermark to start refilling the pool
      # min=0.0, max=1.0
      high: <float>
  # Pool of timer elements
  timerElemPool:
    # Size of the pool
    size: <int>
    watermark:
      # The low watermark to start refilling the pool
      # min=0.0, max=1.0
      low: <float>
      # The high watermark to start refilling the pool
      # min=0.0, max=1.0
      high: <float>
  # Pool of gauge elements
  gaugeElemPool:
    # Size of the pool
    size: <int>
    watermark:
      # The low watermark to start refilling the pool
      # min=0.0, max=1.0
      low: <float>
      # The high watermark to start refilling the pool
      # min=0.0, max=1.0
      high: <float>
  # Specifies a custom buffer past limit for aggregation tiles
  bufferPastLimits:
    resolution: <duration>
    bufferPast: <duration>
  # How long an entry remains alive before it is expired due to inactivity
  entryTTL: <duration>
  # Augment the metric type used within the filter for rules
  augmentM3Tags: <bool>
  # Include rollup rules when deciding if the downsampler should ignore auto mapping rules based on the storage polices for a given rule
  includeRollupsOnDefaultRuleFiltering: <bool>

# Ingestion server configuration
ingest:
  ingester:
    workerPoolSize: <int>
    # Write operation pool size
    opPool:
      size: <int>
      watermark:
        # The low watermark to start refilling the pool
        # min=0.0, max=1.0
        low: <float>
        # The high watermark to start refilling the pool
        # min=0.0, max=1.0
        high: <float>
    retry:
      initialBackoff: <duration>
      # Factor for exponential backoff
      backoffFactor: <float>
      # Maximum backoff time
      maxBackoff: <duration>
      # Maximum retry attempts
      maxRetries: <int>
      # Retry connection forever until the attempt succeeds, or the retry condition becomes false
      forever: <bool>      
      # Add randomness to wait intervals
      jitter: <bool>
    logSampleRate: <float>

# Configuration for the carbon server that offers graphite metrics support
carbon:
  ingester:
    workerPoolSize: <int>
    # Write operation pool size
    opPool:
      size: <int>
      watermark:
        # The low watermark to start refilling the pool
        # min=0.0, max=1.0
        low: <float>
        # The high watermark to start refilling the pool
        # min=0.0, max=1.0
        high: <float>
    retry:
      initialBackoff: <duration>
      # Factor for exponential backoff
      backoffFactor: <float>
      # Maximum backoff time
      maxBackoff: <duration>
      # Maximum retry attempts
      maxRetries: <int>
      # Retry connection forever until the attempt succeeds, or the retry condition becomes false
      forever: <bool>      
      # Add randomness to wait intervals
      jitter: <bool>
    logSampleRate: <float>
  aggregateNamespacesAllData:
  # A constant time to shift start by
  shiftTimeStart: <duration>
  # A constant time to shift end by
  shiftTimeEnd: <duration>
  # A constant set of steps to shift start by
  shiftStepsStart: <int>
  # A constant set of steps to shift end by
  shiftStepsEnd: <int>
  # A constant set of steps to shift start by, if and only if, the end is an exact match to the resolution boundary of a query, and the start is an exact match to the resolution boundary
  shiftStepsStartWhenAtResolutionBoundary: <int>
  # A constant set of steps to shift end by, if and only if, the start is an exact match to the resolution boundary of a query, and the end is an exact match to the resolution boundary
  shiftStepsEndWhenAtResolutionBoundary: <int>
  # A constant set of steps to shift start by, if and only if, the start is an exact match to the resolution boundary of a query, and the end is NOT an exact match to the resolution boundary
  shiftStepsEndWhenStartAtResolutionBoundary: <int>
  # A constant set of steps to shift end by, if and only if, the end is an exact match to the resolution boundary of a query, and the start is NOT an exact match to the resolution boundary
  shiftStepsStartWhenEndAtResolutionBoundary: <int>
  # Render partial datapoints when the start time is between a datapoint's resolution step size
  renderPartialStart: <bool>
  # Render partial datapoints when the end time is between a datapoint's resolution step size
  renderPartialEnd: <bool>
  # Render series that have only NaNs for entire output instead of returning an empty array of datapoints
  renderSeriesAllNaNs: <bool>
  # escape all characters using a backslash in a quoted string instead of only escaping quotes
  compileEscapeAllNotOnlyQuotes: <bool>

# Configuration for M3 Query component
query:
  # Query timeout
  timeout: <duration>
  # The default query engine, valid options: [prometheus, m3query]
  defaultEngine: <string>
  # Configuration for consolidating fetched queries
  consolidation:
    # Determines the options by which series should match, valid options: [ids, tags]
    matchType: <string>
  # Prometheus client configuration
  prometheus:
    # The limit on fetched samples per query
    maxSamplesPerQuery: <int>
  # Optional configuration to restrict all queries with certain tags
  restrictTags:
    match:
      # Tag to match
      name: <string>
      # How to match, valid options: [EQUAL, NOTEQUAL, REGEXP, NOTREGEXP, EXISTS, NOTEXISTS, ALL]
      type: <string>
      # Value of tag
      value: <string>
    # Tags to strip from response 
    strip: <array_of_strings>

# Specifies limitations on resource usage in the query instance. Limits are split between per-query and global limits
limits:
  # Configures limits on resource usage within a single query. Zero or negative values imply no limit
  perQuery:
    # Limits the number of time series returned for any given individual storage node per query, before returning result to query service
    maxFetchedSeries: <int>
    # Limits the number of index documents matched for any given individual storage node per query, before returning result to query service
    maxFetchedDocs: <int>
    # Generate an error if the query exceeds any limit
    requireExhaustive: <bool>

# Sets the lookback duration for queries
# Default = 5m
lookbackDuration: <duration>

# The result options for a query
resultOptions:
  # Keeps NaNs before returning query results.
  # Default = false
  keepNans: <bool>

# Controls if metrics type stored or not
storeMetricsType: <bool>

# Multi-process configuration
multiProcess:
  # Enable multi-process execution
  enabled: <bool>
  # The number of sub-processes to run
  # use 0 to auto-detect based on number of CPUs
  count: <int>
  # The factor of processes to run per CPU, leave
  # use 0 to use the default of 0.5 per CPU, i.e. one process for every two CPUs
  # min=0.0, max=1.0
  perCPU: <float>
  # Explicitly sets the child GOMAXPROCs env var
  goMaxProcs: <int>

# Debug package configuration
debug:
  # Sets the runtime to report mutex contention events, read https://tip.golang.org/pkg/runtime/#SetMutexProfileFraction for more details about the value
  mutexProfileFraction: <int>
  # Sets the runtime to report blocking events, read https://golang.org/pkg/runtime/#SetBlockProfileRate for more details about the value
  blockProfileRate: <int>