#!/usr/bin/env python3 import argparse import os import subprocess import yaml def load_and_validate_config_dir(config_dir): if not os.path.isdir(config_dir): raise ValueError(f"{config_dir} is not a directory") return [ load_and_validate_config_file(os.path.join(config_dir, file)) for file in os.listdir(config_dir) ] def load_and_validate_config_file(config_file_path): if not os.path.isfile(config_file_path): raise ValueError(f"{config_file_path} is not a file") with open(config_file_path, encoding="utf-8") as config_file: config = yaml.safe_load(config_file) for key in [ "dataset", "aws_bucket_keys_file", "aws_bucket_endpoint", "aws_bucket_prefix", "restic_password_file", "restic_keep_daily", "restic_keep_monthly", ]: if key not in config: raise KeyError(f"{key} must be present in {config_file_path}") for file in [config["restic_password_file"], config["aws_bucket_keys_file"]]: if not os.path.isfile(file): raise ValueError(f"{file} is not a file") return config def get_volume_datasets(root_dataset): zfs_list = subprocess.getoutput( f"zfs list -H -r {root_dataset} -o name,mountpoint" ) zfs_list_lines = zfs_list.split('\n') zfs_list_lines_items = map(lambda l: l.split(), zfs_list_lines) return { os.path.relpath(dataset, root_dataset): { "dataset": dataset, "mountpoint": mountpoint, } for dataset, mountpoint in zfs_list_lines_items if os.path.ismount(mountpoint) } def get_last_daily_snapshot(dataset): snapshots = subprocess.getoutput( f"zfs list -t snapshot -H -r {dataset} -o name -s creation" ) daily_snapshots = filter(lambda s: s.endswith("_daily"), snapshots.split('\n')) last_daily_snapshot = list(daily_snapshots)[-1] assert '@' in last_daily_snapshot assert last_daily_snapshot.split('@')[0] == dataset return last_daily_snapshot if __name__ == "__main__": parser = argparse.ArgumentParser(description="Backup service data using restic") parser.add_argument("--config-dir", type=str, default="/etc/restic-batch.d", help="Path to directory with YAML config files") args = parser.parse_args() config_list = load_and_validate_config_dir(args.config_dir) for config in config_list: volume_datasets = get_volume_datasets(config["dataset"]) for volume, properties in volume_datasets.items(): properties["snapshot"] = get_last_daily_snapshot(properties["dataset"]) config["volume_datasets"] = volume_datasets for config in config_list: for volume, properties in config["volume_datasets"].items(): snapshot = properties["snapshot"] mountpoint = properties["mountpoint"] backup_path = os.path.normpath( os.path.join("/", "mnt", os.path.relpath(mountpoint, "/")) ) bucket_name = config["aws_bucket_prefix"] if volume != ".": bucket_name = f"{bucket_name}---{volume.replace('/', '---')}" bucket_repo = f"s3:https://{config['aws_bucket_endpoint']}/{bucket_name}" restic_cmd_base = [ "/usr/local/bin/restic", "--password-file", config["restic_password_file"], "--repo", bucket_repo, "--option", "s3.storage-class=ONEZONE_IA", ] with open(config["aws_bucket_keys_file"], encoding="utf-8") as keys_file: aws_keys = yaml.safe_load(keys_file) environ = {**os.environ, **aws_keys} print(f"Backing up {bucket_name} : {snapshot}", flush=True) subprocess.run(["/usr/sbin/zfs", "clone", "-o", f"mountpoint={backup_path}", snapshot, "rpool/restic"], check=True, ) try: try: subprocess.run(restic_cmd_base + ["snapshots"], env=environ, check=True) except subprocess.CalledProcessError: subprocess.run(restic_cmd_base + ["init"], env=environ, check=True) subprocess.run(restic_cmd_base + ["backup", "."], cwd=backup_path, env=environ, check=True) subprocess.run( restic_cmd_base + ["forget", "--prune", "--keep-daily", str(config["restic_keep_daily"]), "--keep-monthly", str(config["restic_keep_monthly"])], env=environ, check=True, ) subprocess.run(restic_cmd_base + ["check"], env=environ, check=True) finally: subprocess.run(["/usr/sbin/zfs", "destroy", "rpool/restic"], check=True)