From d787e4d65a6dc16ad46be36cc74095aa13fd8c7e Mon Sep 17 00:00:00 2001 From: Wojciech Kozlowski Date: Sun, 22 Oct 2023 18:31:32 +0200 Subject: [PATCH] Add prometheus and grafana --- inventory/group_vars/asgard/vars.yml | 2 + inventory/host_vars/yggdrasil/vars.yml | 12 ++++ .../nginx-conf.d/dash.thenineworlds.net.conf | 45 +++++++++++++ .../deploy/dash/meta/argument_specs.yml | 22 +++++++ .../roles/services/deploy/dash/tasks/main.yml | 62 ++++++++++++++++++ .../services/deploy/dash/templates/hosts | 9 +++ .../systemd/container-dash-grafana.service | 35 ++++++++++ .../dash/templates/systemd/pod-dash.service | 33 ++++++++++ .../deploy/metrics/meta/argument_specs.yml | 22 +++++++ .../services/deploy/metrics/tasks/main.yml | 65 +++++++++++++++++++ .../services/deploy/metrics/templates/hosts | 9 +++ .../deploy/metrics/templates/prometheus.yml | 43 ++++++++++++ .../container-metrics-prometheus.service | 36 ++++++++++ .../templates/systemd/pod-metrics.service | 33 ++++++++++ playbooks/services.yml | 1 + playbooks/vars/services/deploy/versions.yml | 4 ++ playbooks/vars/services/volumes.yml | 4 ++ 17 files changed, 437 insertions(+) create mode 100644 playbooks/files/services/deploy/lrproxy/nginx-conf.d/dash.thenineworlds.net.conf create mode 100644 playbooks/roles/services/deploy/dash/meta/argument_specs.yml create mode 100644 playbooks/roles/services/deploy/dash/tasks/main.yml create mode 100644 playbooks/roles/services/deploy/dash/templates/hosts create mode 100644 playbooks/roles/services/deploy/dash/templates/systemd/container-dash-grafana.service create mode 100644 playbooks/roles/services/deploy/dash/templates/systemd/pod-dash.service create mode 100644 playbooks/roles/services/deploy/metrics/meta/argument_specs.yml create mode 100644 playbooks/roles/services/deploy/metrics/tasks/main.yml create mode 100644 playbooks/roles/services/deploy/metrics/templates/hosts create mode 100644 playbooks/roles/services/deploy/metrics/templates/prometheus.yml create mode 100644 playbooks/roles/services/deploy/metrics/templates/systemd/container-metrics-prometheus.service create mode 100644 playbooks/roles/services/deploy/metrics/templates/systemd/pod-metrics.service diff --git a/inventory/group_vars/asgard/vars.yml b/inventory/group_vars/asgard/vars.yml index 8ee6099..5d2e5a0 100644 --- a/inventory/group_vars/asgard/vars.yml +++ b/inventory/group_vars/asgard/vars.yml @@ -107,6 +107,8 @@ services: music: domain: "{{ vault_services.music.domain }}" password_encryption_key: "{{ vault_services.music.password_encryption_key }}" + metrics: {} + dash: {} # -------------------------------------------------------------------------------------------------- # services:user_setup diff --git a/inventory/host_vars/yggdrasil/vars.yml b/inventory/host_vars/yggdrasil/vars.yml index 4da1d2d..ee285de 100644 --- a/inventory/host_vars/yggdrasil/vars.yml +++ b/inventory/host_vars/yggdrasil/vars.yml @@ -174,6 +174,18 @@ services_host_services: collection_path: "{{ music_user_data_collection_directory }}" archive_path: "{{ music_user_data_archive_directory }}" restic: true + metrics: + inet_address: "{{ vpn_bridge_inet_prefix }}.8" + inet6_address: "{{ vpn_bridge_inet6_prefix }}::8" + target_inet: "{{ vpn_wireguard_inet6_address }}" + target_inet6: "{{ vpn_wireguard_inet6_address }}" + target_port: 19999 + tcp: [9090] + restic: true + dash: + inet_address: "{{ vpn_bridge_inet_prefix }}.9" + inet6_address: "{{ vpn_bridge_inet6_prefix }}::9" + restic: true # -------------------------------------------------------------------------------------------------- # services:backups diff --git a/playbooks/files/services/deploy/lrproxy/nginx-conf.d/dash.thenineworlds.net.conf b/playbooks/files/services/deploy/lrproxy/nginx-conf.d/dash.thenineworlds.net.conf new file mode 100644 index 0000000..0c311b9 --- /dev/null +++ b/playbooks/files/services/deploy/lrproxy/nginx-conf.d/dash.thenineworlds.net.conf @@ -0,0 +1,45 @@ +server { + listen [::]:80; + listen 80; + server_name dash.thenineworlds.net; + + location ^~ /.well-known { + allow all; + root /var/www/html; + } + + location / { + return 301 https://$server_name$request_uri; + } +} + +server { + listen [::]:8443 ssl proxy_protocol; + listen 8443 ssl proxy_protocol; + server_name dash.thenineworlds.net; + + ssl_certificate /etc/letsencrypt/live/dash.thenineworlds.net/fullchain.pem; + ssl_certificate_key /etc/letsencrypt/live/dash.thenineworlds.net/privkey.pem; + ssl_trusted_certificate /etc/letsencrypt/live/dash.thenineworlds.net/chain.pem; + + set_real_ip_from {{ services_all_services.rproxy.inet_address }}; + set_real_ip_from {{ services_all_services.rproxy.inet6_address }}; + + set_real_ip_from {{ services_all_services.lrproxy.inet_address }}; + set_real_ip_from {{ services_all_services.lrproxy.inet6_address }}; + + real_ip_header proxy_protocol; + + location / { + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $remote_addr; + proxy_set_header Host $host; + proxy_pass http://pod-dash:3000; + } + + error_page 500 502 503 504 /50x.html; + location = /50x.html { + root /usr/share/nginx/html; + } + +} diff --git a/playbooks/roles/services/deploy/dash/meta/argument_specs.yml b/playbooks/roles/services/deploy/dash/meta/argument_specs.yml new file mode 100644 index 0000000..0f8598b --- /dev/null +++ b/playbooks/roles/services/deploy/dash/meta/argument_specs.yml @@ -0,0 +1,22 @@ +--- +argument_specs: + main: + options: + ansible_hostname: + type: "str" + required: true + services_root_directory: + type: "str" + required: true + services_home_directory: + type: "str" + required: true + services_data_directory: + type: "str" + required: true + services_containers_directory: + type: "str" + required: true + services_service_name: + type: "str" + required: true diff --git a/playbooks/roles/services/deploy/dash/tasks/main.yml b/playbooks/roles/services/deploy/dash/tasks/main.yml new file mode 100644 index 0000000..4f79bc3 --- /dev/null +++ b/playbooks/roles/services/deploy/dash/tasks/main.yml @@ -0,0 +1,62 @@ +--- +- name: "set the user variables" + ansible.builtin.import_role: + name: "services/include" + vars_from: "user" + +- name: "stat the grafana password file" + ansible.builtin.stat: + path: "{{ services_service_user_home }}/.config/service/admin.password" + register: services_deploy_dash_grafana_password_file_stat + +- block: + + - name: "configure hosts file" + ansible.builtin.template: + src: "./hosts" + dest: "{{ services_service_user_home }}/.config/service/hosts" + mode: 0644 + register: services_deploy_dash_hosts_file + + - name: "configure systemd service" + ansible.builtin.template: + src: "./systemd/{{ item }}" + dest: "{{ services_service_user_home }}/.config/systemd/user/{{ item }}" + mode: 0600 + loop: + - "pod-dash.service" + - "container-dash-grafana.service" + register: services_deploy_dash_systemd_files + + - name: "systemd user daemon reload" + ansible.builtin.systemd: + daemon_reload: true + scope: "user" + when: + services_deploy_dash_systemd_files.changed + + - name: "get uid" + ansible.builtin.getent: + database: "passwd" + key: "{{ services_service_user_name }}" + + - name: "get service status" + ansible.builtin.command: >- + systemctl --user show --property ActiveState --value + pod-{{ services_service_name }}.service + environment: + XDG_RUNTIME_DIR: "/run/user/{{ getent_passwd[services_service_user_name].1 }}" + changed_when: false + register: services_deploy_dash_service_active_state + + - name: "restart the service" + ansible.builtin.systemd: + name: "pod-{{ services_service_name }}.service" + state: "restarted" + scope: "user" + when: + (services_deploy_dash_hosts_file.changed or + services_deploy_dash_systemd_files.changed) and + services_deploy_dash_service_active_state.stdout == "active" + + become_user: "{{ services_service_user_name }}" diff --git a/playbooks/roles/services/deploy/dash/templates/hosts b/playbooks/roles/services/deploy/dash/templates/hosts new file mode 100644 index 0000000..4650053 --- /dev/null +++ b/playbooks/roles/services/deploy/dash/templates/hosts @@ -0,0 +1,9 @@ +127.0.0.1 localhost +::1 localhost ip6-localhost ip6-loopback +ff02::1 ip6-allnodes +ff02::2 ip6-allrouters + +127.0.1.1 dash + +{{ services_all_services.metrics.inet6_address }} pod-metrics +{{ services_all_services.metrics.inet_address }} pod-metrics diff --git a/playbooks/roles/services/deploy/dash/templates/systemd/container-dash-grafana.service b/playbooks/roles/services/deploy/dash/templates/systemd/container-dash-grafana.service new file mode 100644 index 0000000..818e4bb --- /dev/null +++ b/playbooks/roles/services/deploy/dash/templates/systemd/container-dash-grafana.service @@ -0,0 +1,35 @@ +[Unit] +Description=Podman container-dash-grafana.service +Documentation=man:podman-generate-systemd(1) +Wants=network.target +After=network-online.target +BindsTo=pod-dash.service +After=pod-dash.service +OnFailure=status-mail@%n.service + +[Service] +Environment=PODMAN_SYSTEMD_UNIT=%n +Restart=on-failure +TimeoutStopSec=70 +ExecStartPre=/bin/rm -f %t/container-dash-grafana.pid %t/container-dash-grafana.ctr-id +ExecStart=/usr/bin/podman run \ + --conmon-pidfile %t/container-dash-grafana.pid \ + --cidfile %t/container-dash-grafana.ctr-id \ + --cgroups=no-conmon \ + --pod-id-file %t/pod-dash.pod-id \ + --replace \ + --label "io.containers.autoupdate=image" \ + --log-driver=journald \ + -dt \ + -v {{ system_etc_root_directory }}/resolv.conf:/etc/resolv.conf:ro \ + -v ./.config/service/hosts:/etc/hosts:ro \ + -v {{ services_data_directory }}/pod-dash/data/_data:/var/lib/grafana:U \ + --name=pod-dash-grafana \ + docker.io/grafana/grafana:{{ services_service_deploy_versions.grafana }} +ExecStop=/usr/bin/podman stop --ignore --cidfile %t/container-dash-grafana.ctr-id -t 10 +ExecStopPost=/usr/bin/podman rm --ignore -f --cidfile %t/container-dash-grafana.ctr-id +PIDFile=%t/container-dash-grafana.pid +Type=forking + +[Install] +WantedBy=multi-user.target default.target diff --git a/playbooks/roles/services/deploy/dash/templates/systemd/pod-dash.service b/playbooks/roles/services/deploy/dash/templates/systemd/pod-dash.service new file mode 100644 index 0000000..c0cdfd1 --- /dev/null +++ b/playbooks/roles/services/deploy/dash/templates/systemd/pod-dash.service @@ -0,0 +1,33 @@ +[Unit] +Description=Podman pod-dash.service +Documentation=man:podman-generate-systemd(1) +Wants=network.target +After=network-online.target +Requires=container-dash-grafana.service +Before=container-dash-grafana.service +OnFailure=status-mail@%n.service + +[Service] +Environment=PODMAN_SYSTEMD_UNIT=%n +Restart=on-failure +TimeoutStopSec=70 +ExecStartPre=/bin/rm -f %t/pod-dash.pid %t/pod-dash.pod-id +ExecStartPre=/usr/bin/podman pod create \ + --infra-conmon-pidfile %t/pod-dash.pid \ + --pod-id-file %t/pod-dash.pod-id \ + --name=dash \ + --network=none \ + --replace +ExecStart=/usr/bin/podman pod start --pod-id-file %t/pod-dash.pod-id +ExecStartPost=/usr/bin/sh -c 'podman inspect --format "{% raw %}{{ .State.Pid }}{% endraw %}" $(podman inspect --format "{% raw %}{{ .InfraContainerID }}{% endraw %}" dash) > {{ services_containers_directory }}/pod-dash/pidfile' +ExecStartPost=/usr/bin/sudo /usr/sbin/ifdown --ignore-errors veth-dash +ExecStartPost=/usr/bin/sudo /usr/sbin/ifup veth-dash +ExecStop=/usr/bin/sudo /usr/sbin/ifdown --ignore-errors veth-dash +ExecStop=/usr/bin/podman pod stop --ignore --pod-id-file %t/pod-dash.pod-id -t 10 +ExecStopPost=/usr/bin/podman pod rm --ignore -f --pod-id-file %t/pod-dash.pod-id +ExecStopPost=/bin/rm -f {{ services_containers_directory }}/pod-dash/pidfile +PIDFile=%t/pod-dash.pid +Type=forking + +[Install] +WantedBy=multi-user.target default.target diff --git a/playbooks/roles/services/deploy/metrics/meta/argument_specs.yml b/playbooks/roles/services/deploy/metrics/meta/argument_specs.yml new file mode 100644 index 0000000..0f8598b --- /dev/null +++ b/playbooks/roles/services/deploy/metrics/meta/argument_specs.yml @@ -0,0 +1,22 @@ +--- +argument_specs: + main: + options: + ansible_hostname: + type: "str" + required: true + services_root_directory: + type: "str" + required: true + services_home_directory: + type: "str" + required: true + services_data_directory: + type: "str" + required: true + services_containers_directory: + type: "str" + required: true + services_service_name: + type: "str" + required: true diff --git a/playbooks/roles/services/deploy/metrics/tasks/main.yml b/playbooks/roles/services/deploy/metrics/tasks/main.yml new file mode 100644 index 0000000..f2165bd --- /dev/null +++ b/playbooks/roles/services/deploy/metrics/tasks/main.yml @@ -0,0 +1,65 @@ +--- +- name: "set the user variables" + ansible.builtin.import_role: + name: "services/include" + vars_from: "user" + +- block: + + - name: "{{ services_service_name }} : configure hosts file" + ansible.builtin.template: + src: "hosts" + dest: "{{ services_service_user_home }}/.config/service/hosts" + mode: 0644 + register: services_deploy_metrics_hosts_file + + - name: "{{ services_service_name }} : configure prometheus" + ansible.builtin.template: + src: "prometheus.yml" + dest: "{{ services_service_user_home }}/.config/service/prometheus.yml" + mode: 0644 + register: services_deploy_metrics_prometheus_file + + - name: "configure systemd service" + ansible.builtin.template: + src: "./systemd/{{ item }}" + dest: "{{ services_service_user_home }}/.config/systemd/user/{{ item }}" + mode: 0600 + loop: + - "pod-metrics.service" + - "container-metrics-prometheus.service" + register: services_deploy_metrics_systemd_files + + - name: "systemd user daemon reload" + ansible.builtin.systemd: + daemon_reload: true + scope: "user" + when: + services_deploy_metrics_systemd_files.changed + + - name: "get uid" + ansible.builtin.getent: + database: "passwd" + key: "{{ services_service_user_name }}" + + - name: "get service status" + ansible.builtin.command: >- + systemctl --user show --property ActiveState --value + pod-{{ services_service_name }}.service + environment: + XDG_RUNTIME_DIR: "/run/user/{{ getent_passwd[services_service_user_name].1 }}" + changed_when: false + register: services_deploy_metrics_service_active_state + + - name: "restart the service" + ansible.builtin.systemd: + name: "pod-{{ services_service_name }}.service" + state: "restarted" + scope: "user" + when: + (services_deploy_metrics_hosts_file.changed or + services_deploy_metrics_prometheus_file.changed or + services_deploy_metrics_systemd_files.changed) and + services_deploy_metrics_service_active_state.stdout == "active" + + become_user: "{{ services_service_user_name }}" diff --git a/playbooks/roles/services/deploy/metrics/templates/hosts b/playbooks/roles/services/deploy/metrics/templates/hosts new file mode 100644 index 0000000..84b8106 --- /dev/null +++ b/playbooks/roles/services/deploy/metrics/templates/hosts @@ -0,0 +1,9 @@ +127.0.0.1 localhost +::1 localhost ip6-localhost ip6-loopback +ff02::1 ip6-allnodes +ff02::2 ip6-allrouters + +127.0.1.1 metrics + +{{ services_host_services.metrics.target_inet6 }} metrics-target +{{ services_host_services.metrics.target_inet }} metrics-target diff --git a/playbooks/roles/services/deploy/metrics/templates/prometheus.yml b/playbooks/roles/services/deploy/metrics/templates/prometheus.yml new file mode 100644 index 0000000..8480152 --- /dev/null +++ b/playbooks/roles/services/deploy/metrics/templates/prometheus.yml @@ -0,0 +1,43 @@ +# my global config +global: + scrape_interval: "15s" # Set the scrape interval to every 5 seconds. Default is every 1 minute. + evaluation_interval: "15s" # Evaluate rules every 5 seconds. The default is every 1 minute. + # scrape_timeout is set to the global default (10s). + + # Attach these labels to any time series or alerts when communicating with + # external systems (federation, remote storage, Alertmanager). + external_labels: + monitor: "{{ ansible_hostname }}" + +# A scrape configuration containing exactly one endpoint to scrape: +# Here it"s Prometheus itself. +scrape_configs: + # The job name is added as a label `job=` to any timeseries scraped from this config. + - job_name: "prometheus" + + # metrics_path defaults to "/metrics" + # scheme defaults to "http". + + static_configs: + - targets: [ "0.0.0.0:9090" ] + + - job_name: "netdata" + + metrics_path: "/api/v1/allmetrics" + params: + # format: prometheus | prometheus_all_hosts + # You can use `prometheus_all_hosts` if you want Prometheus to set the `instance` to your + # hostname instead of IP + format: [ "prometheus_all_hosts" ] + # + # sources: as-collected | raw | average | sum | volume + # default is: average + source: [ "average" ] + # + # server name for this prometheus - the default is the client IP + # for Netdata to uniquely identify it + #server: [ "prometheus1" ] + honor_labels: true + + static_configs: + - targets: [ "metrics-target:19999" ] diff --git a/playbooks/roles/services/deploy/metrics/templates/systemd/container-metrics-prometheus.service b/playbooks/roles/services/deploy/metrics/templates/systemd/container-metrics-prometheus.service new file mode 100644 index 0000000..d8438da --- /dev/null +++ b/playbooks/roles/services/deploy/metrics/templates/systemd/container-metrics-prometheus.service @@ -0,0 +1,36 @@ +[Unit] +Description=Podman container-metrics-prometheus.service +Documentation=man:podman-generate-systemd(1) +Wants=network.target +After=network-online.target +BindsTo=pod-metrics.service +After=pod-metrics.service +OnFailure=status-mail@%n.service + +[Service] +Environment=PODMAN_SYSTEMD_UNIT=%n +Restart=on-failure +TimeoutStopSec=70 +ExecStartPre=/bin/rm -f %t/container-metrics-prometheus.pid %t/container-metrics-prometheus.ctr-id +ExecStart=/usr/bin/podman run \ + --conmon-pidfile %t/container-metrics-prometheus.pid \ + --cidfile %t/container-metrics-prometheus.ctr-id \ + --cgroups=no-conmon \ + --pod-id-file %t/pod-metrics.pod-id \ + --replace \ + --label "io.containers.autoupdate=image" \ + --log-driver=journald \ + -dt \ + -v {{ system_etc_root_directory }}/resolv.conf:/etc/resolv.conf:ro \ + -v ./.config/service/hosts:/etc/hosts:ro \ + -v ./.config/service/prometheus.yml:/etc/prometheus/prometheus.yml:U \ + -v {{ services_data_directory }}/pod-metrics/data/_data:/prometheus:U \ + --name=pod-metrics-prometheus \ + docker.io/prom/prometheus:{{ services_service_deploy_versions.prometheus }} +ExecStop=/usr/bin/podman stop --ignore --cidfile %t/container-metrics-prometheus.ctr-id -t 10 +ExecStopPost=/usr/bin/podman rm --ignore -f --cidfile %t/container-metrics-prometheus.ctr-id +PIDFile=%t/container-metrics-prometheus.pid +Type=forking + +[Install] +WantedBy=multi-user.target default.target diff --git a/playbooks/roles/services/deploy/metrics/templates/systemd/pod-metrics.service b/playbooks/roles/services/deploy/metrics/templates/systemd/pod-metrics.service new file mode 100644 index 0000000..e01409c --- /dev/null +++ b/playbooks/roles/services/deploy/metrics/templates/systemd/pod-metrics.service @@ -0,0 +1,33 @@ +[Unit] +Description=Podman pod-metrics.service +Documentation=man:podman-generate-systemd(1) +Wants=network.target +After=network-online.target +Requires=container-metrics-prometheus.service +Before=container-metrics-prometheus.service +OnFailure=status-mail@%n.service + +[Service] +Environment=PODMAN_SYSTEMD_UNIT=%n +Restart=on-failure +TimeoutStopSec=70 +ExecStartPre=/bin/rm -f %t/pod-metrics.pid %t/pod-metrics.pod-id +ExecStartPre=/usr/bin/podman pod create \ + --infra-conmon-pidfile %t/pod-metrics.pid \ + --pod-id-file %t/pod-metrics.pod-id \ + --name=metrics \ + --network=none \ + --replace +ExecStart=/usr/bin/podman pod start --pod-id-file %t/pod-metrics.pod-id +ExecStartPost=/usr/bin/sh -c 'podman inspect --format "{% raw %}{{ .State.Pid }}{% endraw %}" $(podman inspect --format "{% raw %}{{ .InfraContainerID }}{% endraw %}" metrics) > {{ services_containers_directory }}/pod-metrics/pidfile' +ExecStartPost=/usr/bin/sudo /usr/sbin/ifdown --ignore-errors veth-metrics +ExecStartPost=/usr/bin/sudo /usr/sbin/ifup veth-metrics +ExecStop=/usr/bin/sudo /usr/sbin/ifdown --ignore-errors veth-metrics +ExecStop=/usr/bin/podman pod stop --ignore --pod-id-file %t/pod-metrics.pod-id -t 10 +ExecStopPost=/usr/bin/podman pod rm --ignore -f --pod-id-file %t/pod-metrics.pod-id +ExecStopPost=/bin/rm -f {{ services_containers_directory }}/pod-metrics/pidfile +PIDFile=%t/pod-metrics.pid +Type=forking + +[Install] +WantedBy=multi-user.target default.target diff --git a/playbooks/services.yml b/playbooks/services.yml index 23e66a3..dadc672 100644 --- a/playbooks/services.yml +++ b/playbooks/services.yml @@ -89,6 +89,7 @@ services_deploy_rproxy_nginx_subdomain_config_files: - "files/services/deploy/lrproxy/nginx-conf.d/archive.music.thenineworlds.net.conf" - "files/services/deploy/lrproxy/nginx-conf.d/cloud.wojciechkozlowski.eu.conf" + - "files/services/deploy/lrproxy/nginx-conf.d/dash.thenineworlds.net.conf" - "files/services/deploy/lrproxy/nginx-conf.d/database.thenineworlds.net.conf" - "files/services/deploy/lrproxy/nginx-conf.d/git.thenineworlds.net.conf" - "files/services/deploy/lrproxy/nginx-conf.d/music.thenineworlds.net.conf" diff --git a/playbooks/vars/services/deploy/versions.yml b/playbooks/vars/services/deploy/versions.yml index 23c1708..0ed597e 100644 --- a/playbooks/vars/services/deploy/versions.yml +++ b/playbooks/vars/services/deploy/versions.yml @@ -19,3 +19,7 @@ services_deploy_versions: joplin: "2.12-beta" music: navidrome: "latest" + metrics: + prometheus: "latest" + dash: + grafana: "latest" diff --git a/playbooks/vars/services/volumes.yml b/playbooks/vars/services/volumes.yml index 80ac43b..8499cf1 100644 --- a/playbooks/vars/services/volumes.yml +++ b/playbooks/vars/services/volumes.yml @@ -24,3 +24,7 @@ services_volumes: music: collection: archive: + metrics: + data: + dash: + data: