From e0ac5d14f370c4ece897b20944488e4c77d26189 Mon Sep 17 00:00:00 2001 From: Wojciech Kozlowski Date: Fri, 14 Oct 2022 21:32:51 +0200 Subject: [PATCH] Add snapshots and backups to yggdrasil --- group_vars/.template | 6 + playbooks/03-backups.yml | 7 + .../etc/logcheck/ignore.d.server/_hostname.j2 | 6 + .../yggdrasil/etc/restic.password.j2 | 1 + .../yggdrasil/etc/sanoid/sanoid.conf.j2 | 136 ++++++++++++++++++ .../filesystem/yggdrasil/etc/scaleway.keys.j2 | 2 + .../system/restic-service-data.service.j2 | 10 ++ .../systemd/system/restic-service-data.timer | 10 ++ .../system/syncoid-service-data.service.j2 | 11 ++ .../usr/local/sbin/restic-service-data.j2 | 48 +++++++ playbooks/tasks/backups/00-zfs-snapshots.yml | 38 +++++ playbooks/tasks/backups/01-restic-setup.yml | 73 ++++++++++ .../tasks/services/setup/01-zfs-datasets.yml | 20 +++ 13 files changed, 368 insertions(+) create mode 100644 playbooks/03-backups.yml create mode 100644 playbooks/filesystem/yggdrasil/etc/restic.password.j2 create mode 100644 playbooks/filesystem/yggdrasil/etc/sanoid/sanoid.conf.j2 create mode 100644 playbooks/filesystem/yggdrasil/etc/scaleway.keys.j2 create mode 100644 playbooks/filesystem/yggdrasil/etc/systemd/system/restic-service-data.service.j2 create mode 100644 playbooks/filesystem/yggdrasil/etc/systemd/system/restic-service-data.timer create mode 100644 playbooks/filesystem/yggdrasil/etc/systemd/system/syncoid-service-data.service.j2 create mode 100644 playbooks/filesystem/yggdrasil/usr/local/sbin/restic-service-data.j2 create mode 100644 playbooks/tasks/backups/00-zfs-snapshots.yml create mode 100644 playbooks/tasks/backups/01-restic-setup.yml diff --git a/group_vars/.template b/group_vars/.template index 1fe24fd..350e0de 100644 --- a/group_vars/.template +++ b/group_vars/.template @@ -25,3 +25,9 @@ services: { address: X.X.X.X, }, } + +# Backup parameters +scw_bucket_endpoint: +scw_access_key: +scw_secret_key: +restic_password: diff --git a/playbooks/03-backups.yml b/playbooks/03-backups.yml new file mode 100644 index 0000000..2a8c7fd --- /dev/null +++ b/playbooks/03-backups.yml @@ -0,0 +1,7 @@ +--- +- name: Configure yggdrasil backups + hosts: yggdrasil + + tasks: + - import_tasks: tasks/backups/00-zfs-snapshots.yml + - import_tasks: tasks/backups/01-restic-setup.yml diff --git a/playbooks/filesystem/common/etc/logcheck/ignore.d.server/_hostname.j2 b/playbooks/filesystem/common/etc/logcheck/ignore.d.server/_hostname.j2 index f33c855..988c74d 100644 --- a/playbooks/filesystem/common/etc/logcheck/ignore.d.server/_hostname.j2 +++ b/playbooks/filesystem/common/etc/logcheck/ignore.d.server/_hostname.j2 @@ -4,3 +4,9 @@ ^[[:alpha:]]{3} [ :[:digit:]]{11} [._[:alnum:]\-]+ systemd\[[0-9]+\]: (apt-daily-upgrade\.service): Consumed [0-9]+\.[0-9]+s CPU time\.$ ^[[:alpha:]]{3} [ :[:digit:]]{11} [._[:alnum:]\-]+ systemd\[[0-9]+\]: rsyslog\.service: Sent signal SIGHUP to main process [[:digit:]]+ (rsyslogd) on client request\.$ ^[[:alpha:]]{3} [ :[:digit:]]{11} [._[:alnum:]\-]+ systemd\[[0-9]+\]: var-lib-containers-storage-overlay\.mount: Succeeded\.$ +^[[:alpha:]]{3} [ :[:digit:]]{11} [._[:alnum:]\-]+ sanoid\[[0-9]+\]: INFO: .*$ +^[[:alpha:]]{3} [ :[:digit:]]{11} [._[:alnum:]\-]+ sanoid\[[0-9]+\]: taking snapshot .*$ +^[[:alpha:]]{3} [ :[:digit:]]{11} [._[:alnum:]\-]+ syncoid\[[0-9]+\]: INFO: .*$ +^[[:alpha:]]{3} [ :[:digit:]]{11} [._[:alnum:]\-]+ syncoid\[[0-9]+\]: NEWEST SNAPSHOT: .*$ +^[[:alpha:]]{3} [ :[:digit:]]{11} [._[:alnum:]\-]+ syncoid\[[0-9]+\]: Sending incremental .*$ +^[[:alpha:]]{3} [ :[:digit:]]{11} [._[:alnum:]\-]+ systemd\[[0-9]+\]: Finished (Snapshot ZFS filesystems|Prune ZFS snapshots|Replicate service data snapshots)\.$ diff --git a/playbooks/filesystem/yggdrasil/etc/restic.password.j2 b/playbooks/filesystem/yggdrasil/etc/restic.password.j2 new file mode 100644 index 0000000..cc6ad5f --- /dev/null +++ b/playbooks/filesystem/yggdrasil/etc/restic.password.j2 @@ -0,0 +1 @@ +{{ restic_password }} diff --git a/playbooks/filesystem/yggdrasil/etc/sanoid/sanoid.conf.j2 b/playbooks/filesystem/yggdrasil/etc/sanoid/sanoid.conf.j2 new file mode 100644 index 0000000..8fefd36 --- /dev/null +++ b/playbooks/filesystem/yggdrasil/etc/sanoid/sanoid.conf.j2 @@ -0,0 +1,136 @@ +################################ +# This is a sanoid.conf file. # +# It should go in /etc/sanoid. # +################################ + +## name your backup modules with the path to their ZFS dataset - no leading slash. +#[zpoolname/datasetname] +# # pick one or more templates - they're defined (and editable) below. Comma separated, processed in order. +# # in this example, template_demo's daily value overrides template_production's daily value. +# use_template = production,demo +# +# # if you want to, you can override settings in the template directly inside module definitions like this. +# # in this example, we override the template to only keep 12 hourly and 1 monthly snapshot for this dataset. +# hourly = 12 +# monthly = 1 +# +## you can also handle datasets recursively. +#[zpoolname/parent] +# use_template = production +# recursive = yes +# # if you want sanoid to manage the child datasets but leave this one alone, set process_children_only. +# process_children_only = yes +# +## you can selectively override settings for child datasets which already fall under a recursive definition. +#[zpoolname/parent/child] +# # child datasets already initialized won't be wiped out, so if you use a new template, it will +# # only override the values already set by the parent template, not replace it completely. +# use_template = demo + + +# you can also handle datasets recursively in an atomic way without the possibility to override settings for child datasets. +[bpool/BOOT] + use_template = production + recursive = yes + process_children_only = yes + +[rpool/ROOT] + use_template = production + recursive = yes + process_children_only = yes + +[rpool/home] + use_template = production + recursive = yes + process_children_only = yes + +[rpool/var/lib/{{ ansible_hostname }}/data] + use_template = production + recursive = yes + process_children_only = yes + +[hpool/backup/{{ ansible_hostname }}/data] + use_template = backup + recursive = yes + process_children_only = yes + +############################# +# templates below this line # +############################# + +# name your templates template_templatename. you can create your own, and use them in your module definitions above. + +[template_demo] + daily = 60 + +[template_production] + frequently = 0 + hourly = 36 + daily = 30 + monthly = 3 + yearly = 0 + autosnap = yes + autoprune = yes + +[template_backup] + autoprune = yes + frequently = 0 + hourly = 30 + daily = 90 + monthly = 12 + yearly = 0 + + ### don't take new snapshots - snapshots on backup + ### datasets are replicated in from source, not + ### generated locally + autosnap = no + + ### monitor hourlies and dailies, but don't warn or + ### crit until they're over 48h old, since replication + ### is typically daily only + hourly_warn = 2880 + hourly_crit = 3600 + daily_warn = 48 + daily_crit = 60 + +[template_hotspare] + autoprune = yes + frequently = 0 + hourly = 30 + daily = 90 + monthly = 3 + yearly = 0 + + ### don't take new snapshots - snapshots on backup + ### datasets are replicated in from source, not + ### generated locally + autosnap = no + + ### monitor hourlies and dailies, but don't warn or + ### crit until they're over 4h old, since replication + ### is typically hourly only + hourly_warn = 4h + hourly_crit = 6h + daily_warn = 2d + daily_crit = 4d + +[template_scripts] + ### information about the snapshot will be supplied as environment variables, + ### see the README.md file for details about what is passed when. + ### run script before snapshot + pre_snapshot_script = /path/to/script.sh + ### run script after snapshot + post_snapshot_script = /path/to/script.sh + ### run script after pruning snapshot + pruning_script = /path/to/script.sh + ### don't take an inconsistent snapshot (skip if pre script fails) + #no_inconsistent_snapshot = yes + ### run post_snapshot_script when pre_snapshot_script is failing + #force_post_snapshot_script = yes + ### limit allowed execution time of scripts before continuing (<= 0: infinite) + script_timeout = 5 + +[template_ignore] + autoprune = no + autosnap = no + monitor = no diff --git a/playbooks/filesystem/yggdrasil/etc/scaleway.keys.j2 b/playbooks/filesystem/yggdrasil/etc/scaleway.keys.j2 new file mode 100644 index 0000000..181ee3a --- /dev/null +++ b/playbooks/filesystem/yggdrasil/etc/scaleway.keys.j2 @@ -0,0 +1,2 @@ +AWS_ACCESS_KEY_ID={{ scw_access_key }} +AWS_SECRET_ACCESS_KEY={{ scw_secret_key }} diff --git a/playbooks/filesystem/yggdrasil/etc/systemd/system/restic-service-data.service.j2 b/playbooks/filesystem/yggdrasil/etc/systemd/system/restic-service-data.service.j2 new file mode 100644 index 0000000..7c4da53 --- /dev/null +++ b/playbooks/filesystem/yggdrasil/etc/systemd/system/restic-service-data.service.j2 @@ -0,0 +1,10 @@ +[Unit] +Description=Backup service data snapshots using restic +Documentation=man:restic(8) + +[Service] +Type=oneshot +Environment=TZ=UTC +Environment=RESTIC_CACHE_DIR=/var/cache/restic +EnvironmentFile=/etc/scaleway.keys +ExecStart=/usr/local/sbin/restic-service-data --password-file /etc/restic.password --data-root /var/lib/{{ ansible_hostname }}/data --bucket-endpoint {{ scw_bucket_endpoint }} diff --git a/playbooks/filesystem/yggdrasil/etc/systemd/system/restic-service-data.timer b/playbooks/filesystem/yggdrasil/etc/systemd/system/restic-service-data.timer new file mode 100644 index 0000000..56cf7d1 --- /dev/null +++ b/playbooks/filesystem/yggdrasil/etc/systemd/system/restic-service-data.timer @@ -0,0 +1,10 @@ +[Unit] +Description=Daily restic backup +Documentation=man:restic(8) + +[Timer] +OnCalendar=*-*-* 04:05:00 +Persistent=true + +[Install] +WantedBy=timers.target diff --git a/playbooks/filesystem/yggdrasil/etc/systemd/system/syncoid-service-data.service.j2 b/playbooks/filesystem/yggdrasil/etc/systemd/system/syncoid-service-data.service.j2 new file mode 100644 index 0000000..4dae54b --- /dev/null +++ b/playbooks/filesystem/yggdrasil/etc/systemd/system/syncoid-service-data.service.j2 @@ -0,0 +1,11 @@ +[Unit] +Description=Replicate service data snapshots +Documentation=man:syncoid(8) +After=sanoid.service + +[Service] +Type=oneshot +ExecStart=/usr/sbin/syncoid --recursive --skip-parent --no-sync-snap rpool/var/lib/{{ ansible_hostname }}/data hpool/backup/{{ ansible_hostname }}/data + +[Install] +WantedBy=sanoid.service diff --git a/playbooks/filesystem/yggdrasil/usr/local/sbin/restic-service-data.j2 b/playbooks/filesystem/yggdrasil/usr/local/sbin/restic-service-data.j2 new file mode 100644 index 0000000..ea8af09 --- /dev/null +++ b/playbooks/filesystem/yggdrasil/usr/local/sbin/restic-service-data.j2 @@ -0,0 +1,48 @@ +#!/usr/bin/env python3 + +import argparse +import os +import subprocess + + +def get_service_data_paths(data_root): + dirs = os.listdir(data_root) + return { d: os.path.join(data_root, d) for d in dirs } + +def get_last_daily_snapshot_name(dataset_path): + dataset = ''.join(["rpool", dataset_path]) + snapshots = subprocess.getoutput( + f"zfs list -t snapshot -H -r {dataset} -o name -s creation" + ) + daily_snapshots = filter(lambda s: s.endswith("_daily"), snapshots.split('\n')) + return list(daily_snapshots)[-1] + +def get_snapshot_mount_path(snapshot): + return snapshot.replace("rpool/", "/", 1).replace("@", "/.zfs/snapshot/", 1) + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Backup service data using restic") + parser.add_argument("--data-root", type=str, required=True, + help="Service data root") + parser.add_argument("--bucket-endpoint", type=str, required=True, + help="S3 bucket endpoint") + parser.add_argument("--password-file", type=str, required=True, + help="Password file for restic repo") + args = parser.parse_args() + + snapshots_for_backup = {} + for service, service_data_root in get_service_data_paths(args.data_root).items(): + last_daily_snapshot = get_last_daily_snapshot_name(service_data_root) + snapshots_for_backup[service] = get_snapshot_mount_path(last_daily_snapshot) + + for service, snapshot_path in snapshots_for_backup.items(): + print(f"Backing up {service} @ {snapshot_path}") + + restic_cmd_base = "restic " \ + f"--password-file {args.password_file} " \ + f"--repo s3:https://{args.bucket_endpoint}/{{ ansible_hostname }}-{service}" + + subprocess.run(f"{restic_cmd_base} snapshots || {restic_cmd_base} init", + shell=True, check=True) + subprocess.run(f"{restic_cmd_base} backup -o s3.storage-class=ONEZONE_IA {snapshot_path}", + shell=True, check=True) diff --git a/playbooks/tasks/backups/00-zfs-snapshots.yml b/playbooks/tasks/backups/00-zfs-snapshots.yml new file mode 100644 index 0000000..b83bec2 --- /dev/null +++ b/playbooks/tasks/backups/00-zfs-snapshots.yml @@ -0,0 +1,38 @@ +- name: Install sanoid + apt: + name: sanoid + +- name: Create sanoid directory + file: + path: /etc/sanoid + state: directory + mode: 0755 + +- name: Configure sanoid + template: + src: ./filesystem/{{ ansible_hostname }}/etc/sanoid/sanoid.conf.j2 + dest: /etc/sanoid/sanoid.conf + +- name: Copy service for {{ ansible_hostname }} data replication + template: + src: ./filesystem/{{ ansible_hostname }}/etc/systemd/system/syncoid-service-data.service.j2 + dest: /etc/systemd/system/syncoid-service-data.service + mode: 0644 + register: systemd_syncoid_service_data_service_file + +- name: SystemD daemon reload + systemd: + daemon_reload: true + when: + systemd_syncoid_service_data_service_file is changed + +- name: Enable the replication service + systemd: + name: syncoid-service-data.service + enabled: yes + +- name: Enable the sanoid timer + systemd: + name: sanoid.timer + enabled: yes + state: started diff --git a/playbooks/tasks/backups/01-restic-setup.yml b/playbooks/tasks/backups/01-restic-setup.yml new file mode 100644 index 0000000..95b8a7a --- /dev/null +++ b/playbooks/tasks/backups/01-restic-setup.yml @@ -0,0 +1,73 @@ +- name: Check if restic is insalled + stat: + path: /usr/local/bin/restic + register: restic_path + +- block: + - name: Download restic binary + get_url: + url: https://github.com/restic/restic/releases/download/v0.14.0/restic_0.14.0_linux_amd64.bz2 + dest: /usr/local/bin/restic.bz2 + mode: 0644 + + - name: Unpack restic binary + command: bunzip2 /usr/local/bin/restic.bz2 + + when: + not restic_path.stat.exists + +- name: Ensure restic is executable + file: + path: /usr/local/bin/restic + mode: 0755 + +- name: Create scaleway key file + template: + src: ./filesystem/{{ ansible_hostname }}/etc/scaleway.keys.j2 + dest: /etc/scaleway.keys + mode: 0600 + +- name: Create restic password file + template: + src: ./filesystem/{{ ansible_hostname }}/etc/restic.password.j2 + dest: /etc/restic.password + mode: 0600 + +- name: Create a cache directory for restic + file: + path: /var/cache/restic + state: directory + mode: 0755 + +- name: Install the restic backup script + template: + src: ./filesystem/{{ ansible_hostname }}/usr/local/sbin/restic-service-data.j2 + dest: /usr/local/sbin/restic-service-data + mode: 0755 + +- name: Install the restic backup service file + template: + src: ./filesystem/{{ ansible_hostname }}/etc/systemd/system/restic-service-data.service.j2 + dest: /etc/systemd/system/restic-service-data.service + mode: 0644 + register: systemd_restic_service_data_service_file + +- name: Install the restic backup timer file + copy: + src: ./filesystem/{{ ansible_hostname }}/etc/systemd/system/restic-service-data.timer + dest: /etc/systemd/system/restic-service-data.timer + mode: 0644 + register: systemd_restic_service_data_timer_file + +- name: SystemD daemon reload + systemd: + daemon_reload: true + when: + systemd_restic_service_data_service_file is changed or + systemd_restic_service_data_timer_file is changed + +- name: Enable restic backup + systemd: + name: restic-service-data.timer + enabled: yes + state: started diff --git a/playbooks/tasks/services/setup/01-zfs-datasets.yml b/playbooks/tasks/services/setup/01-zfs-datasets.yml index 9d5e408..feec279 100644 --- a/playbooks/tasks/services/setup/01-zfs-datasets.yml +++ b/playbooks/tasks/services/setup/01-zfs-datasets.yml @@ -47,3 +47,23 @@ state: present extra_zfs_properties: canmount: "off" + +- name: Create backup dataset + zfs: + name: hpool/backup + state: present + extra_zfs_properties: + canmount: "off" + "com.sun:auto-snapshot": "false" + +- name: Create service backup dataset + zfs: + name: hpool/backup/{{ ansible_hostname }} + state: present + +- name: Create service data backup dataset + zfs: + name: hpool/backup/{{ ansible_hostname }}/data + state: present + extra_zfs_properties: + canmount: "off"