Add prometheus and grafana

This commit is contained in:
Wojciech Kozlowski 2023-10-22 18:31:32 +02:00
parent ce892b83c5
commit d787e4d65a
17 changed files with 437 additions and 0 deletions

View File

@ -107,6 +107,8 @@ services:
music:
domain: "{{ vault_services.music.domain }}"
password_encryption_key: "{{ vault_services.music.password_encryption_key }}"
metrics: {}
dash: {}
# --------------------------------------------------------------------------------------------------
# services:user_setup

View File

@ -174,6 +174,18 @@ services_host_services:
collection_path: "{{ music_user_data_collection_directory }}"
archive_path: "{{ music_user_data_archive_directory }}"
restic: true
metrics:
inet_address: "{{ vpn_bridge_inet_prefix }}.8"
inet6_address: "{{ vpn_bridge_inet6_prefix }}::8"
target_inet: "{{ vpn_wireguard_inet6_address }}"
target_inet6: "{{ vpn_wireguard_inet6_address }}"
target_port: 19999
tcp: [9090]
restic: true
dash:
inet_address: "{{ vpn_bridge_inet_prefix }}.9"
inet6_address: "{{ vpn_bridge_inet6_prefix }}::9"
restic: true
# --------------------------------------------------------------------------------------------------
# services:backups

View File

@ -0,0 +1,45 @@
server {
listen [::]:80;
listen 80;
server_name dash.thenineworlds.net;
location ^~ /.well-known {
allow all;
root /var/www/html;
}
location / {
return 301 https://$server_name$request_uri;
}
}
server {
listen [::]:8443 ssl proxy_protocol;
listen 8443 ssl proxy_protocol;
server_name dash.thenineworlds.net;
ssl_certificate /etc/letsencrypt/live/dash.thenineworlds.net/fullchain.pem;
ssl_certificate_key /etc/letsencrypt/live/dash.thenineworlds.net/privkey.pem;
ssl_trusted_certificate /etc/letsencrypt/live/dash.thenineworlds.net/chain.pem;
set_real_ip_from {{ services_all_services.rproxy.inet_address }};
set_real_ip_from {{ services_all_services.rproxy.inet6_address }};
set_real_ip_from {{ services_all_services.lrproxy.inet_address }};
set_real_ip_from {{ services_all_services.lrproxy.inet6_address }};
real_ip_header proxy_protocol;
location / {
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $remote_addr;
proxy_set_header Host $host;
proxy_pass http://pod-dash:3000;
}
error_page 500 502 503 504 /50x.html;
location = /50x.html {
root /usr/share/nginx/html;
}
}

View File

@ -0,0 +1,22 @@
---
argument_specs:
main:
options:
ansible_hostname:
type: "str"
required: true
services_root_directory:
type: "str"
required: true
services_home_directory:
type: "str"
required: true
services_data_directory:
type: "str"
required: true
services_containers_directory:
type: "str"
required: true
services_service_name:
type: "str"
required: true

View File

@ -0,0 +1,62 @@
---
- name: "set the user variables"
ansible.builtin.import_role:
name: "services/include"
vars_from: "user"
- name: "stat the grafana password file"
ansible.builtin.stat:
path: "{{ services_service_user_home }}/.config/service/admin.password"
register: services_deploy_dash_grafana_password_file_stat
- block:
- name: "configure hosts file"
ansible.builtin.template:
src: "./hosts"
dest: "{{ services_service_user_home }}/.config/service/hosts"
mode: 0644
register: services_deploy_dash_hosts_file
- name: "configure systemd service"
ansible.builtin.template:
src: "./systemd/{{ item }}"
dest: "{{ services_service_user_home }}/.config/systemd/user/{{ item }}"
mode: 0600
loop:
- "pod-dash.service"
- "container-dash-grafana.service"
register: services_deploy_dash_systemd_files
- name: "systemd user daemon reload"
ansible.builtin.systemd:
daemon_reload: true
scope: "user"
when:
services_deploy_dash_systemd_files.changed
- name: "get uid"
ansible.builtin.getent:
database: "passwd"
key: "{{ services_service_user_name }}"
- name: "get service status"
ansible.builtin.command: >-
systemctl --user show --property ActiveState --value
pod-{{ services_service_name }}.service
environment:
XDG_RUNTIME_DIR: "/run/user/{{ getent_passwd[services_service_user_name].1 }}"
changed_when: false
register: services_deploy_dash_service_active_state
- name: "restart the service"
ansible.builtin.systemd:
name: "pod-{{ services_service_name }}.service"
state: "restarted"
scope: "user"
when:
(services_deploy_dash_hosts_file.changed or
services_deploy_dash_systemd_files.changed) and
services_deploy_dash_service_active_state.stdout == "active"
become_user: "{{ services_service_user_name }}"

View File

@ -0,0 +1,9 @@
127.0.0.1 localhost
::1 localhost ip6-localhost ip6-loopback
ff02::1 ip6-allnodes
ff02::2 ip6-allrouters
127.0.1.1 dash
{{ services_all_services.metrics.inet6_address }} pod-metrics
{{ services_all_services.metrics.inet_address }} pod-metrics

View File

@ -0,0 +1,35 @@
[Unit]
Description=Podman container-dash-grafana.service
Documentation=man:podman-generate-systemd(1)
Wants=network.target
After=network-online.target
BindsTo=pod-dash.service
After=pod-dash.service
OnFailure=status-mail@%n.service
[Service]
Environment=PODMAN_SYSTEMD_UNIT=%n
Restart=on-failure
TimeoutStopSec=70
ExecStartPre=/bin/rm -f %t/container-dash-grafana.pid %t/container-dash-grafana.ctr-id
ExecStart=/usr/bin/podman run \
--conmon-pidfile %t/container-dash-grafana.pid \
--cidfile %t/container-dash-grafana.ctr-id \
--cgroups=no-conmon \
--pod-id-file %t/pod-dash.pod-id \
--replace \
--label "io.containers.autoupdate=image" \
--log-driver=journald \
-dt \
-v {{ system_etc_root_directory }}/resolv.conf:/etc/resolv.conf:ro \
-v ./.config/service/hosts:/etc/hosts:ro \
-v {{ services_data_directory }}/pod-dash/data/_data:/var/lib/grafana:U \
--name=pod-dash-grafana \
docker.io/grafana/grafana:{{ services_service_deploy_versions.grafana }}
ExecStop=/usr/bin/podman stop --ignore --cidfile %t/container-dash-grafana.ctr-id -t 10
ExecStopPost=/usr/bin/podman rm --ignore -f --cidfile %t/container-dash-grafana.ctr-id
PIDFile=%t/container-dash-grafana.pid
Type=forking
[Install]
WantedBy=multi-user.target default.target

View File

@ -0,0 +1,33 @@
[Unit]
Description=Podman pod-dash.service
Documentation=man:podman-generate-systemd(1)
Wants=network.target
After=network-online.target
Requires=container-dash-grafana.service
Before=container-dash-grafana.service
OnFailure=status-mail@%n.service
[Service]
Environment=PODMAN_SYSTEMD_UNIT=%n
Restart=on-failure
TimeoutStopSec=70
ExecStartPre=/bin/rm -f %t/pod-dash.pid %t/pod-dash.pod-id
ExecStartPre=/usr/bin/podman pod create \
--infra-conmon-pidfile %t/pod-dash.pid \
--pod-id-file %t/pod-dash.pod-id \
--name=dash \
--network=none \
--replace
ExecStart=/usr/bin/podman pod start --pod-id-file %t/pod-dash.pod-id
ExecStartPost=/usr/bin/sh -c 'podman inspect --format "{% raw %}{{ .State.Pid }}{% endraw %}" $(podman inspect --format "{% raw %}{{ .InfraContainerID }}{% endraw %}" dash) > {{ services_containers_directory }}/pod-dash/pidfile'
ExecStartPost=/usr/bin/sudo /usr/sbin/ifdown --ignore-errors veth-dash
ExecStartPost=/usr/bin/sudo /usr/sbin/ifup veth-dash
ExecStop=/usr/bin/sudo /usr/sbin/ifdown --ignore-errors veth-dash
ExecStop=/usr/bin/podman pod stop --ignore --pod-id-file %t/pod-dash.pod-id -t 10
ExecStopPost=/usr/bin/podman pod rm --ignore -f --pod-id-file %t/pod-dash.pod-id
ExecStopPost=/bin/rm -f {{ services_containers_directory }}/pod-dash/pidfile
PIDFile=%t/pod-dash.pid
Type=forking
[Install]
WantedBy=multi-user.target default.target

View File

@ -0,0 +1,22 @@
---
argument_specs:
main:
options:
ansible_hostname:
type: "str"
required: true
services_root_directory:
type: "str"
required: true
services_home_directory:
type: "str"
required: true
services_data_directory:
type: "str"
required: true
services_containers_directory:
type: "str"
required: true
services_service_name:
type: "str"
required: true

View File

@ -0,0 +1,65 @@
---
- name: "set the user variables"
ansible.builtin.import_role:
name: "services/include"
vars_from: "user"
- block:
- name: "{{ services_service_name }} : configure hosts file"
ansible.builtin.template:
src: "hosts"
dest: "{{ services_service_user_home }}/.config/service/hosts"
mode: 0644
register: services_deploy_metrics_hosts_file
- name: "{{ services_service_name }} : configure prometheus"
ansible.builtin.template:
src: "prometheus.yml"
dest: "{{ services_service_user_home }}/.config/service/prometheus.yml"
mode: 0644
register: services_deploy_metrics_prometheus_file
- name: "configure systemd service"
ansible.builtin.template:
src: "./systemd/{{ item }}"
dest: "{{ services_service_user_home }}/.config/systemd/user/{{ item }}"
mode: 0600
loop:
- "pod-metrics.service"
- "container-metrics-prometheus.service"
register: services_deploy_metrics_systemd_files
- name: "systemd user daemon reload"
ansible.builtin.systemd:
daemon_reload: true
scope: "user"
when:
services_deploy_metrics_systemd_files.changed
- name: "get uid"
ansible.builtin.getent:
database: "passwd"
key: "{{ services_service_user_name }}"
- name: "get service status"
ansible.builtin.command: >-
systemctl --user show --property ActiveState --value
pod-{{ services_service_name }}.service
environment:
XDG_RUNTIME_DIR: "/run/user/{{ getent_passwd[services_service_user_name].1 }}"
changed_when: false
register: services_deploy_metrics_service_active_state
- name: "restart the service"
ansible.builtin.systemd:
name: "pod-{{ services_service_name }}.service"
state: "restarted"
scope: "user"
when:
(services_deploy_metrics_hosts_file.changed or
services_deploy_metrics_prometheus_file.changed or
services_deploy_metrics_systemd_files.changed) and
services_deploy_metrics_service_active_state.stdout == "active"
become_user: "{{ services_service_user_name }}"

View File

@ -0,0 +1,9 @@
127.0.0.1 localhost
::1 localhost ip6-localhost ip6-loopback
ff02::1 ip6-allnodes
ff02::2 ip6-allrouters
127.0.1.1 metrics
{{ services_host_services.metrics.target_inet6 }} metrics-target
{{ services_host_services.metrics.target_inet }} metrics-target

View File

@ -0,0 +1,43 @@
# my global config
global:
scrape_interval: "15s" # Set the scrape interval to every 5 seconds. Default is every 1 minute.
evaluation_interval: "15s" # Evaluate rules every 5 seconds. The default is every 1 minute.
# scrape_timeout is set to the global default (10s).
# Attach these labels to any time series or alerts when communicating with
# external systems (federation, remote storage, Alertmanager).
external_labels:
monitor: "{{ ansible_hostname }}"
# A scrape configuration containing exactly one endpoint to scrape:
# Here it"s Prometheus itself.
scrape_configs:
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
- job_name: "prometheus"
# metrics_path defaults to "/metrics"
# scheme defaults to "http".
static_configs:
- targets: [ "0.0.0.0:9090" ]
- job_name: "netdata"
metrics_path: "/api/v1/allmetrics"
params:
# format: prometheus | prometheus_all_hosts
# You can use `prometheus_all_hosts` if you want Prometheus to set the `instance` to your
# hostname instead of IP
format: [ "prometheus_all_hosts" ]
#
# sources: as-collected | raw | average | sum | volume
# default is: average
source: [ "average" ]
#
# server name for this prometheus - the default is the client IP
# for Netdata to uniquely identify it
#server: [ "prometheus1" ]
honor_labels: true
static_configs:
- targets: [ "metrics-target:19999" ]

View File

@ -0,0 +1,36 @@
[Unit]
Description=Podman container-metrics-prometheus.service
Documentation=man:podman-generate-systemd(1)
Wants=network.target
After=network-online.target
BindsTo=pod-metrics.service
After=pod-metrics.service
OnFailure=status-mail@%n.service
[Service]
Environment=PODMAN_SYSTEMD_UNIT=%n
Restart=on-failure
TimeoutStopSec=70
ExecStartPre=/bin/rm -f %t/container-metrics-prometheus.pid %t/container-metrics-prometheus.ctr-id
ExecStart=/usr/bin/podman run \
--conmon-pidfile %t/container-metrics-prometheus.pid \
--cidfile %t/container-metrics-prometheus.ctr-id \
--cgroups=no-conmon \
--pod-id-file %t/pod-metrics.pod-id \
--replace \
--label "io.containers.autoupdate=image" \
--log-driver=journald \
-dt \
-v {{ system_etc_root_directory }}/resolv.conf:/etc/resolv.conf:ro \
-v ./.config/service/hosts:/etc/hosts:ro \
-v ./.config/service/prometheus.yml:/etc/prometheus/prometheus.yml:U \
-v {{ services_data_directory }}/pod-metrics/data/_data:/prometheus:U \
--name=pod-metrics-prometheus \
docker.io/prom/prometheus:{{ services_service_deploy_versions.prometheus }}
ExecStop=/usr/bin/podman stop --ignore --cidfile %t/container-metrics-prometheus.ctr-id -t 10
ExecStopPost=/usr/bin/podman rm --ignore -f --cidfile %t/container-metrics-prometheus.ctr-id
PIDFile=%t/container-metrics-prometheus.pid
Type=forking
[Install]
WantedBy=multi-user.target default.target

View File

@ -0,0 +1,33 @@
[Unit]
Description=Podman pod-metrics.service
Documentation=man:podman-generate-systemd(1)
Wants=network.target
After=network-online.target
Requires=container-metrics-prometheus.service
Before=container-metrics-prometheus.service
OnFailure=status-mail@%n.service
[Service]
Environment=PODMAN_SYSTEMD_UNIT=%n
Restart=on-failure
TimeoutStopSec=70
ExecStartPre=/bin/rm -f %t/pod-metrics.pid %t/pod-metrics.pod-id
ExecStartPre=/usr/bin/podman pod create \
--infra-conmon-pidfile %t/pod-metrics.pid \
--pod-id-file %t/pod-metrics.pod-id \
--name=metrics \
--network=none \
--replace
ExecStart=/usr/bin/podman pod start --pod-id-file %t/pod-metrics.pod-id
ExecStartPost=/usr/bin/sh -c 'podman inspect --format "{% raw %}{{ .State.Pid }}{% endraw %}" $(podman inspect --format "{% raw %}{{ .InfraContainerID }}{% endraw %}" metrics) > {{ services_containers_directory }}/pod-metrics/pidfile'
ExecStartPost=/usr/bin/sudo /usr/sbin/ifdown --ignore-errors veth-metrics
ExecStartPost=/usr/bin/sudo /usr/sbin/ifup veth-metrics
ExecStop=/usr/bin/sudo /usr/sbin/ifdown --ignore-errors veth-metrics
ExecStop=/usr/bin/podman pod stop --ignore --pod-id-file %t/pod-metrics.pod-id -t 10
ExecStopPost=/usr/bin/podman pod rm --ignore -f --pod-id-file %t/pod-metrics.pod-id
ExecStopPost=/bin/rm -f {{ services_containers_directory }}/pod-metrics/pidfile
PIDFile=%t/pod-metrics.pid
Type=forking
[Install]
WantedBy=multi-user.target default.target

View File

@ -89,6 +89,7 @@
services_deploy_rproxy_nginx_subdomain_config_files:
- "files/services/deploy/lrproxy/nginx-conf.d/archive.music.thenineworlds.net.conf"
- "files/services/deploy/lrproxy/nginx-conf.d/cloud.wojciechkozlowski.eu.conf"
- "files/services/deploy/lrproxy/nginx-conf.d/dash.thenineworlds.net.conf"
- "files/services/deploy/lrproxy/nginx-conf.d/database.thenineworlds.net.conf"
- "files/services/deploy/lrproxy/nginx-conf.d/git.thenineworlds.net.conf"
- "files/services/deploy/lrproxy/nginx-conf.d/music.thenineworlds.net.conf"

View File

@ -19,3 +19,7 @@ services_deploy_versions:
joplin: "2.12-beta"
music:
navidrome: "latest"
metrics:
prometheus: "latest"
dash:
grafana: "latest"

View File

@ -24,3 +24,7 @@ services_volumes:
music:
collection:
archive:
metrics:
data:
dash:
data: