From 3cfbcfd390d41dbec7b06a4651c58e7607038032 Mon Sep 17 00:00:00 2001 From: Eddie Nielsen <“ed”@edcore.dk”> Date: Tue, 24 Mar 2026 13:40:06 +0000 Subject: [PATCH] release: prepare v0.1.0 --- CHANGELOG.md | 23 ++ README.md | 459 ++++++++------------------------------ dockervault/classifier.py | 308 +++++++++++++++++++++++-- 3 files changed, 404 insertions(+), 386 deletions(-) create mode 100644 CHANGELOG.md diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..86b38c4 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,23 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +--- + +## [0.1.0] - 2026-03-24 + +### Added +- Initial DockerVault CLI +- Recursive Docker Compose scanning +- Classification engine (critical / review / skip) +- Named volume detection and resolution +- Missing path detection +- Borg backup command generation +- Automation mode (--automation, --quiet) +- Exit codes for scripting +- Initial pytest test suite +- Project README and documentation + +### Notes +- First public foundation release of DockerVault +- Focused on backup discovery for real Docker environments diff --git a/README.md b/README.md index 148adb2..53268cd 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ > Intelligent Docker backup discovery for real systems -DockerVault scans your Docker environments and figures out **what actually matters to back up** — automatically. +DockerVault scans your Docker environments and figures out what actually matters to back up — automatically. No guesswork. No forgotten volumes. No broken restores. @@ -14,448 +14,175 @@ No guesswork. No forgotten volumes. No broken restores. ## 📚 Contents -* [🚀 What is DockerVault?](#what-is-dockervault) -* [⚡ Quick Start](#quick-start) -* [🧠 How it Works](#how-it-works) -* [🗂 Classification Model](#classification-model) -* [💾 Borg Integration](#borg-integration) -* [🤖 Automation Mode](#automation-mode) -* [🔢 Exit Codes](#exit-codes) -* [🛠 Tech Stack](#tech-stack) -* [🔍 Example](#example) -* [🧱 Current Features](#current-features) -* [🔥 Roadmap](#roadmap) -* [🔮 Future Ideas](#future-ideas) -* [🧠 Philosophy](#philosophy) -* [📜 License](#license) -* [❤️ Credits](#credits) +* 🚀 What is DockerVault? +* ⚡ Quick Start +* 🧠 How it Works +* 🗂 Classification Model +* 💾 Borg Integration +* 🤖 Automation Mode +* 🔢 Exit Codes +* 🛠 Tech Stack +* 🔍 Example +* 🔥 Future Ideas +* 📜 License +* ❤️ Credits --- ## 🚀 What is DockerVault? -DockerVault analyzes your `docker-compose.yml` and identifies: +DockerVault is a CLI tool that scans Docker environments and determines what actually needs to be backed up. -* What **must** be backed up -* What can be **ignored** -* What needs **human review** +It understands: +- Docker Compose setups +- bind mounts +- named volumes +- service-specific data paths -It bridges the gap between: - -👉 “everything looks fine” -and -👉 “restore just failed” +Instead of guessing, DockerVault builds a structured backup plan. --- ## ⚡ Quick Start -```bash -git clone https://github.com/YOUR-USER/dockervault.git +git clone https://git.lanx.dk/ed/dockervault.git cd dockervault -pip install -e . -``` - -Run analysis: - -```bash -python -m dockervault.cli docker-compose.yml --borg --repo /backup-repo -``` - -Run backup: - -```bash -python -m dockervault.cli docker-compose.yml \ - --run-borg \ - --repo /backup-repo -``` +python -m dockervault.cli scan /your/docker/root --repo /backup --- ## 🧠 How it Works -DockerVault parses your compose file and inspects: +DockerVault works in layers: -* bind mounts -* volume targets -* known data paths - -It then classifies them using heuristics: - -* database paths → critical -* logs/cache → optional -* unknown → review +1. Scan for docker-compose.yml +2. Parse services and volumes +3. Resolve: + - bind mounts + - named volumes +4. Classify paths: + - critical + - review + - skip +5. Generate backup plan --- ## 🗂 Classification Model -DockerVault divides everything into three categories: +DockerVault sorts paths into: -### ✅ INCLUDE +### INCLUDE (critical) +Must be backed up -Must be backed up. +Examples: +- /var/lib/mysql +- /data -Example: +--- -``` -/var/lib/mysql -/data -/config -``` +### REVIEW +Needs human decision -### ⚠️ REVIEW +Examples: +- uploads +- config folders -Needs human decision. +--- -Triggered when: +### SKIP +Safe to ignore -* path does not exist -* path exists but is empty -* named volumes (Docker-managed) - -Example: - -``` -./mc-missing → /data -``` - -### ❌ SKIP - -Safe to ignore. - -Example: - -``` -/var/log -/tmp -/cache -``` +Examples: +- logs +- cache +- temp data --- ## 💾 Borg Integration -DockerVault can generate and run Borg backups directly. +DockerVault can generate ready-to-use Borg commands: -Example: - -```bash -python -m dockervault.cli docker-compose.yml \ - --run-borg \ - --repo /mnt/backups/borg/dockervault -``` - -Generated command: - -```bash borg create --stats --progress \ - /repo::hostname-2026-03-23_12-44-19 \ - /path/to/data -``` + /backup-repo::{hostname}-{now:%Y-%m-%d_%H-%M} \ + /path1 \ + /path2 -### Features - -* automatic archive naming (with seconds precision) -* deduplicated paths -* safe command generation -* subprocess execution -* optional passphrase support +This makes it easy to plug into: +- cron jobs +- scripts +- automation pipelines --- ## 🤖 Automation Mode -Designed for cron / scripts / servers. +python -m dockervault.cli scan /path --automation --quiet -```bash -python -m dockervault.cli docker-compose.yml \ - --run-borg \ - --quiet \ - --automation \ - --repo /backup-repo -``` - -### Behavior - -* no plan output -* no interactive prompts -* minimal output -* suitable for logs / CI +Designed for: +- scheduled backups +- CI/CD pipelines +- unattended systems --- ## 🔢 Exit Codes -| Code | Meaning | -| ---- | ------------------------------------ | -| 0 | Success | -| 1 | General error | -| 2 | Missing required args | -| 3 | No include paths | -| 4 | Review required (`--fail-on-review`) | - -### Fail on review - -```bash ---fail-on-review -``` - -Stops automation if something needs human attention. +0 = Success +1 = Missing critical paths +2 = General error --- ## 🛠 Tech Stack -* Python 3.10+ -* PyYAML -* BorgBackup -* CLI-first design +- Python 3 +- Docker Compose parsing +- Filesystem analysis +- Borg backup integration +- pytest (testing) --- ## 🔍 Example -Input: +DockerVault Backup Plan +======================= -```yaml -services: - db: - volumes: - - ./db:/var/lib/mysql +INCLUDE PATHS: + - ./db [critical] + - ./mc [critical] - mc: - volumes: - - ./mc-missing:/data - - nginx: - volumes: - - ./logs:/var/log/nginx -``` - -Output: - -``` -INCLUDE: - db - -REVIEW: - mc-missing - -SKIP: - logs -``` +WARNING: Missing critical paths detected + - ./db (service=db) --- -## 🧱 Current Features +## 🔥 Future Ideas -* Docker Compose parsing -* Bind mount detection -* Intelligent classification -* Borg backup integration -* Automation mode -* Exit codes for scripting -* Safe path handling -* Deduplication - ---- - -## 🗺 Roadmap - -DockerVault is built with a clear philosophy: -**simple core, intelligent behavior, and extensible design — without unnecessary complexity or vendor lock-in.** - ---- - -### 🚀 v1 — Core Engine (Current Focus) - -> Build a reliable, deterministic backup discovery engine - -- [x] Docker Compose scanning -- [x] Volume and bind mount detection -- [x] Intelligent classification (critical / review / skip) -- [x] Backup plan generation -- [x] Borg backup integration -- [x] Dry-run mode -- [x] Automation mode (`--automation`, `--quiet`) - ---- - -### 🔧 v2 — Observability & Automation - -> Make DockerVault production-ready - -- [ ] Advanced logging (human + JSON output) -- [ ] Webhook support (primary notification system) -- [ ] ntfy integration (lightweight alerts) -- [ ] Email notifications (optional reports) -- [ ] Change detection (new/missing volumes) -- [ ] Backup summaries (stats, duration, warnings) -- [ ] Basic run history (file-based, no database) - ---- - -### 🧠 v3 — Intelligence Layer - -> Move from tool → system awareness - -- [ ] "Explain why" classification decisions -- [ ] Anomaly detection (size, duration, structure) -- [ ] System understanding confidence -- [ ] Backup diff between runs -- [ ] Smarter classification patterns - ---- - -### 🧪 v4 — Reliability & Safety - -> Ensure backups are actually usable - -- [ ] Restore testing (ephemeral container validation) -- [ ] Integrity checks (borg/restic verify) -- [ ] Pre/post execution hooks -- [ ] Backup profiles (critical / full / custom) - ---- - -### 🔐 v5 — Security & Encryption - -> Strong, transparent data protection - -- [ ] Engine-native encryption (Borg / Restic) -- [ ] Encryption validation checks -- [ ] Optional post-process encryption (age / gpg) -- [ ] Clear key handling guidelines - ---- - -### 🔌 v6 — Plugin Ecosystem - -> Extend without bloating core - -- [ ] Storage backends (S3, WebDAV, SSH, etc.) -- [ ] Optional cloud integrations (Dropbox, Google Drive, Proton Drive) -- [ ] Notification plugins (webhook-first approach) -- [ ] Pluggable architecture for extensions - ---- - -### 🌐 v7 — Platform & Deployment - -> Make DockerVault easy to run anywhere - -- [ ] Official Docker image -- [ ] Non-interactive container mode -- [ ] Unraid Community Apps template -- [ ] Configurable via environment + config file - ---- - -### 🧭 Design Principles - -- **No vendor lock-in** — webhook over platform integrations -- **Self-hosting friendly** — works fully offline/local -- **Transparency over magic** — explain decisions -- **Stateless-first** — no database required by default -- **Extensible architecture** — plugins over core bloat -- **Backup ≠ done until restore works** - ---- - -### 🔮 Future Ideas - -> Ideas that push DockerVault beyond backup — towards system awareness and control. - -#### 🧠 System Intelligence -- Change detection (new/missing volumes, structure changes) -- "Explain why" classification decisions -- System understanding confidence score -- Backup diff between runs -- Detection of unknown/unclassified data - -#### 📊 Observability & Insight -- Historical trends (size, duration, change rate) -- Growth analysis (detect abnormal data expansion) -- Backup performance tracking -- Structured JSON logs for external systems - -#### 🚨 Alerting & Automation -- Webhook-first automation triggers -- ntfy notifications -- Email reporting -- Conditional alerts (failures, anomalies, missing data) -- Integration with external systems (Node-RED, Home Assistant, OpenObserve) - -#### 🧪 Reliability & Verification -- Automated restore testing (ephemeral containers) -- Service-level validation (DB start, app health) -- Integrity checks (borg/restic verification) -- Backup validation reports - -#### ⚙️ Control & Extensibility -- Pre/post execution hooks -- Backup profiles (critical / full / custom) -- Simulation mode (predict behavior before execution) -- Advanced dry-run with diff preview - -#### 🔐 Security & Encryption -- Engine-native encryption support -- Optional post-process encryption (age, gpg) -- Encryption validation and key awareness -- Secure offsite export workflows - -#### 🔌 Plugin Ecosystem -- Storage backends (S3, WebDAV, SSH, etc.) -- Optional cloud targets (Dropbox, Google Drive, Proton Drive) -- Notification plugins (webhook-first design) -- Pluggable architecture for extensions - -#### 🌐 Multi-System Awareness -- Multi-host environments (Lanx-style setups) -- Centralized reporting and monitoring -- Cross-node backup visibility - -#### 🖥 Platform & UX -- Optional Web UI (status, history, alerts) -- Docker-native deployment mode -- Unraid Community Apps integration -- Config-driven operation (env + config files) - ---- - -> Built with ❤️ for real systems — not toy setups. ---- - -## 🧠 Philosophy - -DockerVault is built on a simple idea: - -> Backups should reflect reality — not assumptions. - -* No blind backups -* No hidden data -* No silent failures - -Just clarity. +- Notifications (mail, ntfy) +- Web interface +- Backup reports +- Restore validation +- smarter classification engine +- Docker API integration --- ## 📜 License -GNU GPLv3 - -This project is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License v3. +This project is licensed under the GNU GPL v3. --- ## ❤️ Credits -Created by:
**Ed Nielsen https://lanx.dk
NodeFox 🦊 https://nodefox.lanx.dk** +## ❤️ Credits -Built with ❤️ for Lanx +Built with ❤️ for Lanx by [NodeFox 🦊](https://gnodefoxlanx.dk/nodefox) -Maintained by Ed Nielsen -Feel free to contribute, suggest improvements or fork the project. +Maintained by [Eddie Nielsen](https://lanx.dk/ed) + +Feel free to contribute, suggest improvements, or fork the projec diff --git a/dockervault/classifier.py b/dockervault/classifier.py index 2703e1f..9469e10 100644 --- a/dockervault/classifier.py +++ b/dockervault/classifier.py @@ -1,31 +1,299 @@ +from __future__ import annotations + +from dataclasses import dataclass from pathlib import Path +from typing import Any + +import yaml + +from .volume_inspector import ( + docker_available, + infer_project_name, + normalize_top_level_volume_name, + run_docker_volume_inspect, +) -CRITICAL_PATHS = [ - "/var/lib/mysql", - "/data", - "/config", -] +@dataclass +class ClassifiedEntry: + service: str + source: Path | str | None + target: str + classification: str + reason: str + compose: Path + exists: bool = False + volume_type: str | None = None -SKIP_PATHS = [ - "/var/log", - "/tmp", -] +_PRIORITY = { + "critical": 3, + "review": 2, + "optional": 1, +} + + +def _classify_target(target: str) -> tuple[str, str]: + t = (target or "").lower() + + if t == "/var/lib/mysql": + return "critical", f"mariadb critical path {target}" + + if t == "/var/lib/postgresql/data": + return "critical", f"postgres critical path {target}" + + if t == "/data": + return "critical", f"critical target path {target}" + + if "/log" in t or "cache" in t or "/tmp" in t: + return "optional", f"ephemeral/log/cache target path {target}" + + return "review", "no rule matched" def classify_mount(mount: dict) -> dict: - target = mount["target"] + classification, reason = _classify_target(mount.get("target", "")) + out = dict(mount) + out["classification"] = classification + out["reason"] = reason + return out - # 🔥 critical - for p in CRITICAL_PATHS: - if target.startswith(p): - return {**mount, "class": "critical"} - # 🗑 skip - for p in SKIP_PATHS: - if target.startswith(p): - return {**mount, "class": "skip"} +def _parse_volume_spec(spec: Any) -> dict[str, Any] | None: + if isinstance(spec, dict): + source = spec.get("source") + target = spec.get("target") + if not target: + return None - # 🤔 fallback - return {**mount, "class": "review"} + mount_type = spec.get("type") + if mount_type is None: + mount_type = ( + "volume" + if source and not str(source).startswith((".", "/", "~")) + else "bind" + ) + + return { + "source": source, + "target": target, + "type": mount_type, + } + + if not isinstance(spec, str): + return None + + parts = spec.split(":") + if len(parts) == 1: + return { + "source": "__anonymous_volume__", + "target": parts[0], + "type": "anonymous", + } + + source = parts[0] + target = parts[1] + + if source.startswith("/") or source.startswith(".") or source.startswith("~"): + mount_type = "bind" + else: + mount_type = "volume" + + return { + "source": source, + "target": target, + "type": mount_type, + } + + +def _build_entry( + *, + service: str, + compose_path: Path, + source: Path | str | None, + target: str, + classification: str, + reason: str, + volume_type: str | None, +) -> ClassifiedEntry: + exists = False + if isinstance(source, Path): + exists = source.exists() + elif isinstance(source, str) and not source.startswith("__"): + exists = Path(source).exists() + + return ClassifiedEntry( + service=service, + source=source, + target=target, + classification=classification, + reason=reason, + compose=compose_path, + exists=exists, + volume_type=volume_type, + ) + + +def _candidate_volume_names( + *, + named_volume: str, + compose_data: dict[str, Any], + project_name: str, +) -> list[str]: + candidates: list[str] = [] + + explicit_name, is_external = normalize_top_level_volume_name( + named_volume, + compose_data, + ) + + if explicit_name: + candidates.append(explicit_name) + + if named_volume not in candidates: + candidates.append(named_volume) + + if not is_external: + project_prefixed = f"{project_name}_{named_volume}" + if project_prefixed not in candidates: + candidates.append(project_prefixed) + + return candidates + + +def classify_compose(compose_path: str | Path) -> list[ClassifiedEntry]: + compose_path = Path(compose_path) + + with compose_path.open("r", encoding="utf-8") as f: + compose_data = yaml.safe_load(f) or {} + + services = compose_data.get("services", {}) or {} + project_name = infer_project_name(compose_path, compose_data) + + entries: list[ClassifiedEntry] = [] + + for service_name, service_data in services.items(): + volumes = service_data.get("volumes", []) or [] + + for raw_spec in volumes: + parsed = _parse_volume_spec(raw_spec) + if not parsed: + continue + + source = parsed["source"] + target = parsed["target"] + mount_type = parsed["type"] + + if mount_type == "bind": + if source and source.startswith("."): + resolved_source = (compose_path.parent / source).resolve() + else: + resolved_source = Path(source).expanduser().resolve() + + classification, reason = _classify_target(target) + + entries.append( + _build_entry( + service=service_name, + compose_path=compose_path, + source=resolved_source, + target=target, + classification=classification, + reason=reason, + volume_type="bind", + ) + ) + continue + + if mount_type == "anonymous": + entries.append( + _build_entry( + service=service_name, + compose_path=compose_path, + source="__anonymous_volume__", + target=target, + classification="review", + reason=f"anonymous volume for {target}", + volume_type="anonymous", + ) + ) + continue + + classification, reason = _classify_target(target) + named_volume = source + + candidates = _candidate_volume_names( + named_volume=named_volume, + compose_data=compose_data, + project_name=project_name, + ) + + if docker_available(): + resolved_source: Path | None = None + resolved_candidate: str | None = None + + for candidate in candidates: + inspected = run_docker_volume_inspect(candidate) + if inspected and inspected.get("Mountpoint"): + resolved_source = Path(inspected["Mountpoint"]).resolve() + resolved_candidate = candidate + break + + if resolved_source is not None: + entries.append( + ClassifiedEntry( + service=service_name, + source=resolved_source, + target=target, + classification=classification, + reason=f"resolved named volume '{resolved_candidate}' for {target}", + compose=compose_path, + exists=resolved_source.exists(), + volume_type="volume", + ) + ) + else: + explicit_name, _ = normalize_top_level_volume_name( + named_volume, + compose_data, + ) + unresolved_name = explicit_name or named_volume + + entries.append( + _build_entry( + service=service_name, + compose_path=compose_path, + source=f"__named_volume_unresolved__/{unresolved_name}", + target=target, + classification="review", + reason=f"named volume '{named_volume}' could not be resolved", + volume_type="volume", + ) + ) + else: + entries.append( + _build_entry( + service=service_name, + compose_path=compose_path, + source=named_volume, + target=target, + classification="review", + reason=f"docker CLI not available for named volume {named_volume}", + volume_type="volume", + ) + ) + + deduped: dict[str, ClassifiedEntry] = {} + + for entry in entries: + key = str(entry.source) + + existing = deduped.get(key) + if existing is None: + deduped[key] = entry + continue + + if _PRIORITY[entry.classification] > _PRIORITY[existing.classification]: + deduped[key] = entry + + return list(deduped.values())