Add snapshots and backups to yggdrasil

This commit is contained in:
Wojciech Kozlowski 2022-10-14 21:32:51 +02:00
parent 5ff15a8ff5
commit e0ac5d14f3
13 changed files with 368 additions and 0 deletions

View File

@ -25,3 +25,9 @@ services: {
address: X.X.X.X, address: X.X.X.X,
}, },
} }
# Backup parameters
scw_bucket_endpoint:
scw_access_key:
scw_secret_key:
restic_password:

7
playbooks/03-backups.yml Normal file
View File

@ -0,0 +1,7 @@
---
- name: Configure yggdrasil backups
hosts: yggdrasil
tasks:
- import_tasks: tasks/backups/00-zfs-snapshots.yml
- import_tasks: tasks/backups/01-restic-setup.yml

View File

@ -4,3 +4,9 @@
^[[:alpha:]]{3} [ :[:digit:]]{11} [._[:alnum:]\-]+ systemd\[[0-9]+\]: (apt-daily-upgrade\.service): Consumed [0-9]+\.[0-9]+s CPU time\.$ ^[[:alpha:]]{3} [ :[:digit:]]{11} [._[:alnum:]\-]+ systemd\[[0-9]+\]: (apt-daily-upgrade\.service): Consumed [0-9]+\.[0-9]+s CPU time\.$
^[[:alpha:]]{3} [ :[:digit:]]{11} [._[:alnum:]\-]+ systemd\[[0-9]+\]: rsyslog\.service: Sent signal SIGHUP to main process [[:digit:]]+ (rsyslogd) on client request\.$ ^[[:alpha:]]{3} [ :[:digit:]]{11} [._[:alnum:]\-]+ systemd\[[0-9]+\]: rsyslog\.service: Sent signal SIGHUP to main process [[:digit:]]+ (rsyslogd) on client request\.$
^[[:alpha:]]{3} [ :[:digit:]]{11} [._[:alnum:]\-]+ systemd\[[0-9]+\]: var-lib-containers-storage-overlay\.mount: Succeeded\.$ ^[[:alpha:]]{3} [ :[:digit:]]{11} [._[:alnum:]\-]+ systemd\[[0-9]+\]: var-lib-containers-storage-overlay\.mount: Succeeded\.$
^[[:alpha:]]{3} [ :[:digit:]]{11} [._[:alnum:]\-]+ sanoid\[[0-9]+\]: INFO: .*$
^[[:alpha:]]{3} [ :[:digit:]]{11} [._[:alnum:]\-]+ sanoid\[[0-9]+\]: taking snapshot .*$
^[[:alpha:]]{3} [ :[:digit:]]{11} [._[:alnum:]\-]+ syncoid\[[0-9]+\]: INFO: .*$
^[[:alpha:]]{3} [ :[:digit:]]{11} [._[:alnum:]\-]+ syncoid\[[0-9]+\]: NEWEST SNAPSHOT: .*$
^[[:alpha:]]{3} [ :[:digit:]]{11} [._[:alnum:]\-]+ syncoid\[[0-9]+\]: Sending incremental .*$
^[[:alpha:]]{3} [ :[:digit:]]{11} [._[:alnum:]\-]+ systemd\[[0-9]+\]: Finished (Snapshot ZFS filesystems|Prune ZFS snapshots|Replicate service data snapshots)\.$

View File

@ -0,0 +1 @@
{{ restic_password }}

View File

@ -0,0 +1,136 @@
################################
# This is a sanoid.conf file. #
# It should go in /etc/sanoid. #
################################
## name your backup modules with the path to their ZFS dataset - no leading slash.
#[zpoolname/datasetname]
# # pick one or more templates - they're defined (and editable) below. Comma separated, processed in order.
# # in this example, template_demo's daily value overrides template_production's daily value.
# use_template = production,demo
#
# # if you want to, you can override settings in the template directly inside module definitions like this.
# # in this example, we override the template to only keep 12 hourly and 1 monthly snapshot for this dataset.
# hourly = 12
# monthly = 1
#
## you can also handle datasets recursively.
#[zpoolname/parent]
# use_template = production
# recursive = yes
# # if you want sanoid to manage the child datasets but leave this one alone, set process_children_only.
# process_children_only = yes
#
## you can selectively override settings for child datasets which already fall under a recursive definition.
#[zpoolname/parent/child]
# # child datasets already initialized won't be wiped out, so if you use a new template, it will
# # only override the values already set by the parent template, not replace it completely.
# use_template = demo
# you can also handle datasets recursively in an atomic way without the possibility to override settings for child datasets.
[bpool/BOOT]
use_template = production
recursive = yes
process_children_only = yes
[rpool/ROOT]
use_template = production
recursive = yes
process_children_only = yes
[rpool/home]
use_template = production
recursive = yes
process_children_only = yes
[rpool/var/lib/{{ ansible_hostname }}/data]
use_template = production
recursive = yes
process_children_only = yes
[hpool/backup/{{ ansible_hostname }}/data]
use_template = backup
recursive = yes
process_children_only = yes
#############################
# templates below this line #
#############################
# name your templates template_templatename. you can create your own, and use them in your module definitions above.
[template_demo]
daily = 60
[template_production]
frequently = 0
hourly = 36
daily = 30
monthly = 3
yearly = 0
autosnap = yes
autoprune = yes
[template_backup]
autoprune = yes
frequently = 0
hourly = 30
daily = 90
monthly = 12
yearly = 0
### don't take new snapshots - snapshots on backup
### datasets are replicated in from source, not
### generated locally
autosnap = no
### monitor hourlies and dailies, but don't warn or
### crit until they're over 48h old, since replication
### is typically daily only
hourly_warn = 2880
hourly_crit = 3600
daily_warn = 48
daily_crit = 60
[template_hotspare]
autoprune = yes
frequently = 0
hourly = 30
daily = 90
monthly = 3
yearly = 0
### don't take new snapshots - snapshots on backup
### datasets are replicated in from source, not
### generated locally
autosnap = no
### monitor hourlies and dailies, but don't warn or
### crit until they're over 4h old, since replication
### is typically hourly only
hourly_warn = 4h
hourly_crit = 6h
daily_warn = 2d
daily_crit = 4d
[template_scripts]
### information about the snapshot will be supplied as environment variables,
### see the README.md file for details about what is passed when.
### run script before snapshot
pre_snapshot_script = /path/to/script.sh
### run script after snapshot
post_snapshot_script = /path/to/script.sh
### run script after pruning snapshot
pruning_script = /path/to/script.sh
### don't take an inconsistent snapshot (skip if pre script fails)
#no_inconsistent_snapshot = yes
### run post_snapshot_script when pre_snapshot_script is failing
#force_post_snapshot_script = yes
### limit allowed execution time of scripts before continuing (<= 0: infinite)
script_timeout = 5
[template_ignore]
autoprune = no
autosnap = no
monitor = no

View File

@ -0,0 +1,2 @@
AWS_ACCESS_KEY_ID={{ scw_access_key }}
AWS_SECRET_ACCESS_KEY={{ scw_secret_key }}

View File

@ -0,0 +1,10 @@
[Unit]
Description=Backup service data snapshots using restic
Documentation=man:restic(8)
[Service]
Type=oneshot
Environment=TZ=UTC
Environment=RESTIC_CACHE_DIR=/var/cache/restic
EnvironmentFile=/etc/scaleway.keys
ExecStart=/usr/local/sbin/restic-service-data --password-file /etc/restic.password --data-root /var/lib/{{ ansible_hostname }}/data --bucket-endpoint {{ scw_bucket_endpoint }}

View File

@ -0,0 +1,10 @@
[Unit]
Description=Daily restic backup
Documentation=man:restic(8)
[Timer]
OnCalendar=*-*-* 04:05:00
Persistent=true
[Install]
WantedBy=timers.target

View File

@ -0,0 +1,11 @@
[Unit]
Description=Replicate service data snapshots
Documentation=man:syncoid(8)
After=sanoid.service
[Service]
Type=oneshot
ExecStart=/usr/sbin/syncoid --recursive --skip-parent --no-sync-snap rpool/var/lib/{{ ansible_hostname }}/data hpool/backup/{{ ansible_hostname }}/data
[Install]
WantedBy=sanoid.service

View File

@ -0,0 +1,48 @@
#!/usr/bin/env python3
import argparse
import os
import subprocess
def get_service_data_paths(data_root):
dirs = os.listdir(data_root)
return { d: os.path.join(data_root, d) for d in dirs }
def get_last_daily_snapshot_name(dataset_path):
dataset = ''.join(["rpool", dataset_path])
snapshots = subprocess.getoutput(
f"zfs list -t snapshot -H -r {dataset} -o name -s creation"
)
daily_snapshots = filter(lambda s: s.endswith("_daily"), snapshots.split('\n'))
return list(daily_snapshots)[-1]
def get_snapshot_mount_path(snapshot):
return snapshot.replace("rpool/", "/", 1).replace("@", "/.zfs/snapshot/", 1)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Backup service data using restic")
parser.add_argument("--data-root", type=str, required=True,
help="Service data root")
parser.add_argument("--bucket-endpoint", type=str, required=True,
help="S3 bucket endpoint")
parser.add_argument("--password-file", type=str, required=True,
help="Password file for restic repo")
args = parser.parse_args()
snapshots_for_backup = {}
for service, service_data_root in get_service_data_paths(args.data_root).items():
last_daily_snapshot = get_last_daily_snapshot_name(service_data_root)
snapshots_for_backup[service] = get_snapshot_mount_path(last_daily_snapshot)
for service, snapshot_path in snapshots_for_backup.items():
print(f"Backing up {service} @ {snapshot_path}")
restic_cmd_base = "restic " \
f"--password-file {args.password_file} " \
f"--repo s3:https://{args.bucket_endpoint}/{{ ansible_hostname }}-{service}"
subprocess.run(f"{restic_cmd_base} snapshots || {restic_cmd_base} init",
shell=True, check=True)
subprocess.run(f"{restic_cmd_base} backup -o s3.storage-class=ONEZONE_IA {snapshot_path}",
shell=True, check=True)

View File

@ -0,0 +1,38 @@
- name: Install sanoid
apt:
name: sanoid
- name: Create sanoid directory
file:
path: /etc/sanoid
state: directory
mode: 0755
- name: Configure sanoid
template:
src: ./filesystem/{{ ansible_hostname }}/etc/sanoid/sanoid.conf.j2
dest: /etc/sanoid/sanoid.conf
- name: Copy service for {{ ansible_hostname }} data replication
template:
src: ./filesystem/{{ ansible_hostname }}/etc/systemd/system/syncoid-service-data.service.j2
dest: /etc/systemd/system/syncoid-service-data.service
mode: 0644
register: systemd_syncoid_service_data_service_file
- name: SystemD daemon reload
systemd:
daemon_reload: true
when:
systemd_syncoid_service_data_service_file is changed
- name: Enable the replication service
systemd:
name: syncoid-service-data.service
enabled: yes
- name: Enable the sanoid timer
systemd:
name: sanoid.timer
enabled: yes
state: started

View File

@ -0,0 +1,73 @@
- name: Check if restic is insalled
stat:
path: /usr/local/bin/restic
register: restic_path
- block:
- name: Download restic binary
get_url:
url: https://github.com/restic/restic/releases/download/v0.14.0/restic_0.14.0_linux_amd64.bz2
dest: /usr/local/bin/restic.bz2
mode: 0644
- name: Unpack restic binary
command: bunzip2 /usr/local/bin/restic.bz2
when:
not restic_path.stat.exists
- name: Ensure restic is executable
file:
path: /usr/local/bin/restic
mode: 0755
- name: Create scaleway key file
template:
src: ./filesystem/{{ ansible_hostname }}/etc/scaleway.keys.j2
dest: /etc/scaleway.keys
mode: 0600
- name: Create restic password file
template:
src: ./filesystem/{{ ansible_hostname }}/etc/restic.password.j2
dest: /etc/restic.password
mode: 0600
- name: Create a cache directory for restic
file:
path: /var/cache/restic
state: directory
mode: 0755
- name: Install the restic backup script
template:
src: ./filesystem/{{ ansible_hostname }}/usr/local/sbin/restic-service-data.j2
dest: /usr/local/sbin/restic-service-data
mode: 0755
- name: Install the restic backup service file
template:
src: ./filesystem/{{ ansible_hostname }}/etc/systemd/system/restic-service-data.service.j2
dest: /etc/systemd/system/restic-service-data.service
mode: 0644
register: systemd_restic_service_data_service_file
- name: Install the restic backup timer file
copy:
src: ./filesystem/{{ ansible_hostname }}/etc/systemd/system/restic-service-data.timer
dest: /etc/systemd/system/restic-service-data.timer
mode: 0644
register: systemd_restic_service_data_timer_file
- name: SystemD daemon reload
systemd:
daemon_reload: true
when:
systemd_restic_service_data_service_file is changed or
systemd_restic_service_data_timer_file is changed
- name: Enable restic backup
systemd:
name: restic-service-data.timer
enabled: yes
state: started

View File

@ -47,3 +47,23 @@
state: present state: present
extra_zfs_properties: extra_zfs_properties:
canmount: "off" canmount: "off"
- name: Create backup dataset
zfs:
name: hpool/backup
state: present
extra_zfs_properties:
canmount: "off"
"com.sun:auto-snapshot": "false"
- name: Create service backup dataset
zfs:
name: hpool/backup/{{ ansible_hostname }}
state: present
- name: Create service data backup dataset
zfs:
name: hpool/backup/{{ ansible_hostname }}/data
state: present
extra_zfs_properties:
canmount: "off"