135 lines
3.1 KiB
Plaintext
135 lines
3.1 KiB
Plaintext
additionalPrometheusRulesMap:
|
|
custom-app-rules:
|
|
groups:
|
|
- name: aspnetcore
|
|
interval: 5m
|
|
rules:
|
|
- alert: HighRequestLatency
|
|
expr: histogram_quantile(0.95, sum by (job, instance) (rate(http_request_duration_seconds_bucket[5m]))) > 0.5
|
|
for: 5m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: "High request latency on {{ $labels.instance }}"
|
|
description: "95th percentile latency is above 500ms (current value: {{ $value }}s)"
|
|
- alert: HighErrorRate
|
|
expr: 'rate(http_requests_total{status=~"5.."}[5m]) > 0.05'
|
|
for: 5m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
summary: "High error rate on {{ $labels.instance }}"
|
|
description: "Error rate is above 5% (current value: {{ $value }})"
|
|
|
|
prometheus:
|
|
prometheusSpec:
|
|
retention: 24h
|
|
retentionSize: 10GB
|
|
|
|
resources:
|
|
requests:
|
|
memory: 200Mi
|
|
cpu: 100m
|
|
limits:
|
|
memory: 500Mi
|
|
cpu: 500m
|
|
|
|
# Remote write to VictoriaMetrics
|
|
remoteWrite:
|
|
- url: ${remote_write_url}
|
|
queueConfig:
|
|
maxSamplesPerSend: 10000
|
|
maxShards: 5
|
|
minShards: 1
|
|
batchSendDeadline: 5s
|
|
basicAuth:
|
|
username:
|
|
name: prometheus-remote-write-auth
|
|
key: username
|
|
password:
|
|
name: prometheus-remote-write-auth
|
|
key: password
|
|
writeRelabelConfigs:
|
|
- sourceLabels: ["__name__"]
|
|
regex: "(up|kube_.*|container_.*|node_.*|http_.*|process_.*)"
|
|
action: keep
|
|
|
|
# Remote read from VictoriaMetrics for old data
|
|
remoteRead:
|
|
- url: ${remote_read_url}
|
|
basicAuth:
|
|
username:
|
|
name: prometheus-remote-write-auth
|
|
key: username
|
|
password:
|
|
name: prometheus-remote-write-auth
|
|
key: password
|
|
readRecent: false # Only read data older than local retention
|
|
|
|
alertmanager:
|
|
enabled: true
|
|
alertmanagerSpec:
|
|
replicas: 1
|
|
resources:
|
|
requests:
|
|
memory: 50Mi
|
|
cpu: 10m
|
|
limits:
|
|
memory: 150Mi
|
|
cpu: 100m
|
|
retention: 24h
|
|
|
|
grafana:
|
|
resources:
|
|
requests:
|
|
memory: 100Mi
|
|
cpu: 50m
|
|
limits:
|
|
memory: 300Mi
|
|
cpu: 200m
|
|
persistence:
|
|
enabled: true
|
|
size: 1Gi
|
|
adminUser: admin
|
|
adminPassword: ${grafana_admin_password}
|
|
|
|
kubeStateMetrics:
|
|
resources:
|
|
requests:
|
|
memory: 50Mi
|
|
cpu: 10m
|
|
limits:
|
|
memory: 150Mi
|
|
cpu: 100m
|
|
|
|
nodeExporter:
|
|
resources:
|
|
requests:
|
|
memory: 30Mi
|
|
cpu: 10m
|
|
limits:
|
|
memory: 100Mi
|
|
cpu: 100m
|
|
|
|
prometheusOperator:
|
|
resources:
|
|
requests:
|
|
memory: 100Mi
|
|
cpu: 50m
|
|
limits:
|
|
memory: 300Mi
|
|
cpu: 200m
|
|
|
|
defaultRules:
|
|
create: true
|
|
rules:
|
|
alertmanager: true
|
|
etcd: false
|
|
general: true
|
|
k8s: true
|
|
kubernetesApps: true
|
|
kubernetesResources: true
|
|
kubernetesStorage: true
|
|
kubernetesSystem: true
|
|
node: true
|
|
prometheus: true |