additionalPrometheusRulesMap: custom-app-rules: groups: - name: aspnetcore interval: 5m rules: - alert: HighRequestLatency expr: histogram_quantile(0.95, sum by (job, instance) (rate(http_request_duration_seconds_bucket[5m]))) > 0.5 for: 5m labels: severity: warning annotations: summary: "High request latency on {{ $labels.instance }}" description: "95th percentile latency is above 500ms (current value: {{ $value }}s)" - alert: HighErrorRate expr: 'rate(http_requests_total{status=~"5.."}[5m]) > 0.05' for: 5m labels: severity: critical annotations: summary: "High error rate on {{ $labels.instance }}" description: "Error rate is above 5% (current value: {{ $value }})" prometheus: prometheusSpec: retention: 24h retentionSize: 10GB resources: requests: memory: 200Mi cpu: 100m limits: memory: 500Mi cpu: 500m # Remote write to VictoriaMetrics remoteWrite: - url: ${remote_write_url} queueConfig: maxSamplesPerSend: 10000 maxShards: 5 minShards: 1 batchSendDeadline: 5s basicAuth: username: name: prometheus-remote-write-auth key: username password: name: prometheus-remote-write-auth key: password writeRelabelConfigs: - sourceLabels: ["__name__"] regex: "(up|kube_.*|container_.*|node_.*|http_.*|process_.*)" action: keep # Remote read from VictoriaMetrics for old data remoteRead: - url: ${remote_read_url} basicAuth: username: name: prometheus-remote-write-auth key: username password: name: prometheus-remote-write-auth key: password readRecent: false # Only read data older than local retention alertmanager: enabled: true alertmanagerSpec: replicas: 1 resources: requests: memory: 50Mi cpu: 10m limits: memory: 150Mi cpu: 100m retention: 24h grafana: resources: requests: memory: 100Mi cpu: 50m limits: memory: 300Mi cpu: 200m persistence: enabled: true size: 1Gi adminUser: admin adminPassword: ${grafana_admin_password} kubeStateMetrics: resources: requests: memory: 50Mi cpu: 10m limits: memory: 150Mi cpu: 100m nodeExporter: resources: requests: memory: 30Mi cpu: 10m limits: memory: 100Mi cpu: 100m prometheusOperator: resources: requests: memory: 100Mi cpu: 50m limits: memory: 300Mi cpu: 200m defaultRules: create: true rules: alertmanager: true etcd: false general: true k8s: true kubernetesApps: true kubernetesResources: true kubernetesStorage: true kubernetesSystem: true node: true prometheus: true