1. prometheus
1
2
3
| wget https://github.com/prometheus/prometheus/releases/download/v2.40.4/prometheus-2.40.4.linux-amd64.tar.gz
tar xzvf prometheus-2.40.4.linux-amd64.tar.gz
mv prometheus-2.40.4.linux-amd64 /opt/prometheus
|
service:
/lib/systemd/system/prometheus.service
1
2
3
4
5
6
7
8
9
10
11
12
| [Unit]
Description=Prometheus
Documentation=https://prometheus.io/
After=network.target
[Service]
Type=simple
ExecStart=/opt/prometheus/prometheus --config.file=/opt/prometheus/prometheus.yml
Restart=on-failure
[Install]
WantedBy=multi-user.target
|
systemctl enable prometheus
控制页面: http://127.0.0.1:9090
vim /opt/prometheus/prometheus.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
| # my global config
global:
scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
# scrape_timeout is set to the global default (10s).
# Alertmanager configuration
alerting:
alertmanagers:
- static_configs:
- targets:
# - alertmanager:9093
# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
# - "first_rules.yml"
# - "second_rules.yml"
# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
- job_name: "prometheus"
# metrics_path defaults to '/metrics'
# scheme defaults to 'http'.
static_configs:
- targets: ["localhost:9090"]
- job_name: node
static_configs:
- targets: ['10.2.1.3:9100','10.2.1.2:9100']
# kube-state-metrics
- job_name: k8s
static_configs:
- targets: ['10.2.1.3:8080']
- job_name: 'pushgateway'
honor_labels: true
static_configs:
- targets: [':9091']
labels:
instance: pushgateway
|
2. node_exporter:
1
2
3
4
| wget https://github.com/prometheus/node_exporter/releases/download/v1.4.0/node_exporter-1.4.0.linux-amd64.tar.gz
tar xzfv node_exporter-1.4.0.linux-amd64.tar.gz -C /opt
mv /opt/node_exporter-1.4.0.linux-amd64 /opt/node_exporter
|
service:
/lib/systemd/system/node_exprter.service
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
| [Unit]
Description=Prometheus exporter for machine metrics, written in Go with pluggable metric collectors.
Documentation=https://github.com/prometheus/node_exporter
After=network.target
[Service]
LimitNOFILE=65535
LimitNPROC=65535
LimitCORE=infinity
ExecStart=/opt/node_exporter/node_exporter
Restart=on-failure
StartLimitInterval=1
RestartSec=3
[Install]
WantedBy=multi-user.target
|
3. grafana:
1
2
3
4
5
6
7
8
9
10
| wget https://dl.grafana.com/enterprise/release/grafana-enterprise_9.3.0_amd64.deb
sudo apt-get install -y adduser libfontconfig1
dpkg -i grafana-enterprise_9.3.0_amd64.deb
systemctl enable grafana-server.service
systemctl start grafana-server.service
grafana-cli plugins install grafana-piechart-panel
# 如果超时,通过科学下载后
unzip grafana-piechart-panel-1.6.4.any.zip
mv grafana-piechart-panel /var/lib/grafana/plugins/
systemctl restart grafana-server.service
|
页面:
http://127.0.0.1:3000/ admin admin
k8s:Grafana导入id:13105 14518
node_exporter:1860
4. kube-state-metrics 外部prometheus监控k8s集群。
下载所有的文件:
https://github.com/kubernetes/kube-state-metrics/tree/master/examples/standard
1
2
| # ls
cluster-role-binding.yaml cluster-role.yaml deployment.yaml service-account.yaml service.yaml
|
修改deployment.yaml 将网络改为host模式:
hostNetwork: true
修改镜像地址:
image: bitnami/kube-state-metrics:2.1.0
kubectl apply -f ./
5. pushgateway:
wget https://github.com/prometheus/pushgateway/releases/download/v1.5.1/pushgateway-1.5.1.linux-amd64.tar.gz
vim /lib/systemd/system/pushgateway.service
1
2
3
4
5
6
7
8
9
10
11
12
| [Unit]
Description=pushgateway
Documentation=https://prometheus.io/
After=network.target
[Service]
Type=simple
ExecStart=/opt/pushgateway/pushgateway
Restart=on-failure
[Install]
WantedBy=multi-user.target
|
mv pushgateway-1.5.1.linux-amd64 /opt/pushgateway