88 lines
3.0 KiB
Plaintext
88 lines
3.0 KiB
Plaintext
Monitoring
|
|
Prometheus
|
|
|
|
mkdir ~/monitoring && cd ~/monitoring
|
|
|
|
helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
|
|
helm repo update
|
|
|
|
helm upgrade --install --namespace monitoring \
|
|
--create-namespace \
|
|
prometheus prometheus-community/prometheus
|
|
|
|
# cloud instance
|
|
IP4=$(/sbin/ip -o -4 addr list eth0 | awk '{print $4}' | cut -d/ -f1)
|
|
# notebook
|
|
IP4=$(/sbin/ip -o -4 addr list br2 | awk '{print $4}' | cut -d/ -f1)
|
|
|
|
export POD_NAME=$(kubectl get pods --namespace monitoring -l "app=prometheus,component=server" -o jsonpath="{.items[0].metadata.name}")
|
|
kubectl --namespace monitoring port-forward --address $IP4 $POD_NAME 9090 &
|
|
|
|
|
|
|
|
Add Alert Rules to prometheus
|
|
|
|
cat >values.yaml <<EOF
|
|
serverFiles:
|
|
## Alerts configuration
|
|
## Ref: https://prometheus.io/docs/prometheus/latest/configuration/alerting_rules/
|
|
alerting_rules.yml:
|
|
groups:
|
|
- name: Instances
|
|
rules:
|
|
- alert: InstanceDown
|
|
expr: up == 0
|
|
for: 5m
|
|
labels:
|
|
severity: page
|
|
annotations:
|
|
description: '{{ \$labels.instance }} of job {{ \$labels.job }} has been down for more than 5 minutes.'
|
|
summary: 'Instance {{ \$labels.instance }} down'
|
|
EOF
|
|
helm upgrade --install --namespace monitoring \
|
|
--create-namespace \
|
|
prometheus prometheus-community/prometheus \
|
|
--values values.yaml
|
|
|
|
Review Alerts at Prometheus UI later at grafana
|
|
|
|
Grafana
|
|
|
|
https://github.com/grafana/helm-charts/tree/main/charts/grafana
|
|
|
|
helm repo add grafana https://grafana.github.io/helm-charts
|
|
helm repo update
|
|
helm upgrade --install --namespace monitoring --create-namespace grafana grafana/grafana
|
|
|
|
kubectl get secret --namespace monitoring grafana -o jsonpath="{.data.admin-password}" | base64 --decode ; echo
|
|
export POD_NAME=$(kubectl get pods --namespace monitoring -l "app.kubernetes.io/name=grafana,app.kubernetes.io/instance=grafana" -o jsonpath="{.items[0].metadata.name}")
|
|
kubectl --namespace monitoring port-forward $POD_NAME --address=$IP4 3000 &
|
|
|
|
create prometheus datasoruce http://prometheus-server:80
|
|
Import K3s nodeexporter dashboard
|
|
.https://grafana.com/grafana/dashboards/15282-k8s-rke-cluster-monitoring/
|
|
|
|
Loki
|
|
|
|
#helm repo add grafana https://grafana.github.io/helm-charts
|
|
#helm repo update
|
|
helm upgrade --install loki --namespace=monitoring --create-namespace grafana/loki-stack
|
|
|
|
Add Loki Datasource http://loki:3100
|
|
Loki Dashboard
|
|
https://grafana.com/grafana/dashboards/12019-loki-dashboard-quick-search/
|
|
|
|
kube prometheus stack
|
|
|
|
helm upgrade --install --namespace monitoring \
|
|
--create-namespace \
|
|
prometheus prometheus-community/kube-prometheus-stack
|
|
IP4=$(/sbin/ip -o -4 addr list eth0 | awk '{print $4}' | cut -d/ -f1)
|
|
kubectl -n monitoring port-forward prometheus-prometheus-kube-prometheus-prometheus-0 --address $IP4 9090:9090 &
|
|
|
|
kubectl -n monitoring port-forward prometheus-grafana-689f7dbb7-k69kx --address $IP4 3000:3000 &
|
|
|
|
kubectl -n monitoring get secret prometheus-grafana -o jsonpath="{.data.admin-password}" | base64 --decode ; echo
|
|
prom-operator
|
|
|