Walk datasets rather than directory for backups

This commit is contained in:
Wojciech Kozlowski 2022-10-25 22:24:43 +02:00
parent 4c2ae32143
commit 322cb15995
2 changed files with 29 additions and 21 deletions

View File

@ -8,4 +8,4 @@ Environment=TZ=UTC
Environment=RESTIC_CACHE_DIR=/var/cache/restic
Environment=RESTIC_PASSWORD_FILE=/etc/restic.password
EnvironmentFile=/etc/scaleway.keys
ExecStart=/usr/local/sbin/restic-service-data --data-root /var/lib/{{ ansible_hostname }}/data --bucket-endpoint {{ scw_bucket_endpoint }}
ExecStart=/usr/local/sbin/restic-service-data --root-dataset rpool/var/lib/{{ ansible_hostname }}/data --bucket-endpoint {{ scw_bucket_endpoint }}

View File

@ -5,42 +5,50 @@ import os
import subprocess
def get_service_dataset_paths(data_root):
return { d: os.path.join(data_root, d) for d in os.listdir(data_root) }
def get_service_datasets(root_dataset):
zfs_list = subprocess.getoutput(
f"zfs list -H -r {root_dataset} -o name,mountpoint"
)
zfs_list_lines = zfs_list.split('\n')
zfs_list_lines_items = map(lambda l: l.split(), zfs_list_lines)
return {
os.path.basename(dataset): {
"dataset": dataset,
"mountpoint": mountpoint,
} for dataset, mountpoint in zfs_list_lines_items if dataset != root_dataset
}
def get_last_daily_snapshot_name(dataset_path):
dataset = ''.join(["rpool", dataset_path])
def get_last_daily_snapshot(dataset):
snapshots = subprocess.getoutput(
f"zfs list -t snapshot -H -r {dataset} -o name -s creation"
)
daily_snapshots = filter(lambda s: s.endswith("_daily"), snapshots.split('\n'))
return list(daily_snapshots)[-1]
last_daily_snapshot = list(daily_snapshots)[-1]
assert '@' in last_daily_snapshot
assert last_daily_snapshot.split('@')[0] == dataset
return last_daily_snapshot
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Backup service data using restic")
parser.add_argument("--data-root", type=str, required=True,
help="Service data root")
parser.add_argument("--root-dataset", type=str, required=True,
help="The root data root whose children are to be backed up")
parser.add_argument("--bucket-endpoint", type=str, required=True,
help="S3 bucket endpoint")
help="S3 bucket endpoint for the backups")
args = parser.parse_args()
snapshots_for_backup = {
service: {
"dataset_path": service_dataset_path,
"snapshot": get_last_daily_snapshot_name(service_dataset_path),
} for service, service_dataset_path in get_service_dataset_paths(args.data_root).items()
}
service_datasets = get_service_datasets(args.root_dataset)
for service, properties in service_datasets.items():
properties["snapshot"] = get_last_daily_snapshot(properties["dataset"])
for service, info in snapshots_for_backup.items():
backup_path = os.path.normpath(
os.path.join("/", "mnt", os.path.relpath(info["dataset_path"], "/"))
)
snapshot = info["snapshot"]
for service, properties in service_datasets.items():
mountpoint = properties["mountpoint"]
snapshot = properties["snapshot"]
backup_path = os.path.normpath(os.path.join("/", "mnt", os.path.relpath(mountpoint, "/")))
restic_cmd_base = "restic " \
f"--repo s3:https://{args.bucket_endpoint}/the-nine-worlds---{service} " \
"--option s3.storage-class=ONEZONE_IA"
print(f"Backing up {service} : {snapshot}")
print(f"Backing up {service} : {snapshot}", flush=True)
subprocess.run(f"zfs clone -o mountpoint={backup_path} {snapshot} rpool/restic",
shell=True, check=True)