diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..cecc186 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +.venv/ +*.egg-info/ +__pycache__/ +*.pyc diff --git a/LICENSE b/LICENSE index c23d472..4aec557 100644 --- a/LICENSE +++ b/LICENSE @@ -1,15 +1,21 @@ -GNU GENERAL PUBLIC LICENSE -Version 2, June 1991 +MIT License -Copyright (C) 2026 Eddie Nielsen +Copyright (c) 2026 Ed & NodeFox -Everyone is permitted to copy and distribute verbatim copies -of this license document, but changing it is not allowed. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: -[...] - -(⚠️ VIGTIGT: den fulde GPL v2 tekst er meget lang — du skal bruge hele den officielle) - -👉 Brug den officielle her: -https://www.gnu.org/licenses/old-licenses/gpl-2.0.txt +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md index 457ae8d..ab0cda8 100644 --- a/README.md +++ b/README.md @@ -1,229 +1,57 @@ -

- DockerVault logo +# DockerVault ---- +Early CLI foundation for DockerVault. -## 📑 Contents +## Current scope -* [🚀 What is DockerVault?](#-what-is-dockervault) -* [✨ Goals](#-goals) -* [⚙️ Technology](#-technology) -* [🏗 Architecture](#-architecture) -* [📦 What DockerVault backs up](#-what-dockervault-is-expected-to-back-up) -* [🔁 Restore philosophy](#-restore-philosophy) -* [🧩 Planned Features](#-planned-features) -* [🛣 Roadmap](#-roadmap) -* [📁 Project Structure](#-project-structure) -* [🤝 Philosophy](#-philosophy) -* [📜 License](#-license) +- Python CLI project skeleton +- `scan` command +- Recursive discovery of Docker Compose projects +- YAML parsing with `PyYAML` +- Detection of: + - services + - images + - restart policies + - bind mounts + - named volumes + - `env_file` +- JSON or human-readable output ---- +## Quick start -## 🚀 What is DockerVault? - -DockerVault is a CLI-first backup system for Docker environments. - -It is designed to make backups simple, transparent, and reliable without unnecessary complexity. The goal is to provide a practical way to discover containers, identify important data, and manage backups in a structured and predictable way. - -DockerVault is aimed at homelabs, self-hosted infrastructure, and small-scale server environments where control, clarity, and recoverability matter. - ---- - -## ✨ Goals - -DockerVault is being built to: - -* Discover Docker containers automatically -* Identify volumes, bind mounts, and relevant configuration data -* Keep track of backup history and metadata -* Use a proven backup backend instead of reinventing backup logic -* Make restore operations easier and safer -* Stay simple enough to understand and debug - ---- - -## ⚙️ Technology - -DockerVault is planned as a modular, CLI-first tool. - -### Current design direction - -* **Core language:** Python -* **Backup engine:** BorgBackup -* **Metadata storage:** SQLite -* **Interface:** CLI first -* **Platform focus:** Linux Docker hosts - -### Why this stack? - -* **Python** makes it fast to build, maintain, and extend -* **BorgBackup** is mature, reliable, and well-suited for deduplicated backups -* **SQLite** keeps metadata simple, local, and easy to inspect -* **CLI first** keeps the project transparent and easy to debug - -The project philosophy is clear: use proven tools where it makes sense, and avoid building complexity just for the sake of it. - ---- - -## 🏗 Architecture - -DockerVault follows a simple flow: - -1. Scan Docker environment -2. Detect containers, volumes, bind mounts, and configs -3. Store metadata in SQLite -4. Build backup jobs from discovered data -5. Execute backups through Borg -6. Restore data when needed - -More detailed architecture notes will live in `docs/architecture.md`. - ---- - -## 📦 What DockerVault is expected to back up - -DockerVault is intended to focus on the parts of Docker environments that actually matter: - -* Docker volumes -* Bind mounts -* Selected configuration files -* Backup metadata -* Restore-related information - -It is not intended to blindly copy everything without structure. The purpose is to know what is being backed up and why. - ---- - -## 🔁 Restore philosophy - -Backups are only useful if restore is realistic. - -DockerVault is being designed with restore in mind from the beginning: - -* Clear mapping between containers and stored data -* Metadata that explains relationships between services and data -* Predictable restore flow -* Minimal guesswork during recovery - -The goal is not just to store backups, but to enable actual recovery. - ---- - -## 🧩 Planned Features - -### Core features - -* Docker container discovery -* Volume detection -* Bind mount detection -* Backup job creation -* Borg-based backup execution -* SQLite-based metadata tracking -* Restore workflow - -### Future possibilities - -* Scheduled backups -* Retention policies -* Pre/post backup hooks -* E-mail notifications -* `ntfy` notifications -* Web interface -* Multi-node support -* Remote repository support -* Backup health/status reporting -* Configuration profiles -* Selective backup policies per container - ---- - -## 🛣 Roadmap - -### Phase 1 – Foundation - -* Repository structure -* Documentation -* CLI skeleton -* Initial project design - -### Phase 2 – Discovery - -* Scan Docker environment -* Detect containers -* Detect volumes and bind mounts - -### Phase 3 – Backup Engine - -* Integrate BorgBackup -* Build backup job flow -* Store metadata in SQLite - -### Phase 4 – Restore - -* Basic restore workflow -* Restore metadata mapping -* Safer recovery process - -### Phase 5 – Usability - -* Better CLI commands -* Config handling -* Scheduling support -* Notifications - -### Phase 6 – Expansion - -* Web interface -* Multi-node support -* Advanced backup policies - ---- - -## 📁 Project Structure - -```text -dockervault/ -├── cmd/ -├── core/ -├── scanner/ -├── backup/ -├── restore/ -├── config/ -├── database/ -├── docs/ -├── scripts/ -├── images/ -├── README.md -└── LICENSE +```bash +python3 -m venv .venv +source .venv/bin/activate +pip install -e . ``` ---- +Run a scan: -## 🤝 Philosophy +```bash +dockervault scan /path/to/docker/projects +``` -DockerVault is built on a few simple principles: +JSON output: -* Keep it simple -* Be transparent -* Avoid unnecessary complexity -* Prefer proven tools over hype -* Build something practical and maintainable +```bash +dockervault scan /path/to/docker/projects --json +``` -No magic. No hidden behavior. No unnecessary abstraction. +## What v0.2 adds ---- +DockerVault no longer just finds compose files. +It now builds a first inventory layer that can be used for backup logic later: -Author -Built with ❤️ for Lanx by NodeFox 🦊 +- project name and root path +- compose files found in the project +- service metadata +- backup candidate paths from bind mounts and env files +- named volumes defined in compose -Maintained by Eddie Nielsen Feel free to contribute, suggest improvements or fork the project. - ---- - -## 📜 License - -This project is licensed under the **GNU General Public License v2.0**. - -See the `LICENSE` file for details. +## Example direction +This is meant as the next brick in a bigger flow: +1. Discover Docker app folders +2. Learn what services and data paths exist +3. Later attach backup rules, Borg targets, retention, notifications, and restore metadata diff --git a/dockervault/__init__.py b/dockervault/__init__.py new file mode 100644 index 0000000..b770313 --- /dev/null +++ b/dockervault/__init__.py @@ -0,0 +1,4 @@ +"""DockerVault package.""" + +__all__ = ["__version__"] +__version__ = "0.1.0" diff --git a/dockervault/cli.py b/dockervault/cli.py new file mode 100644 index 0000000..2469eba --- /dev/null +++ b/dockervault/cli.py @@ -0,0 +1,107 @@ +from __future__ import annotations + +import argparse +import json +import sys +from pathlib import Path + +from dockervault.scanner import scan_projects + + +def build_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser( + prog="dockervault", + description="DockerVault CLI" + ) + subparsers = parser.add_subparsers(dest="command", required=True) + + scan_parser = subparsers.add_parser( + "scan", + help="Scan a folder for Docker Compose projects" + ) + scan_parser.add_argument( + "path", + nargs="?", + default=".", + help="Base path to scan (default: current directory)" + ) + scan_parser.add_argument( + "--json", + action="store_true", + help="Output scan results as JSON" + ) + + return parser + + +def render_text(projects: list) -> str: + if not projects: + return "No Docker Compose projects found." + + lines: list[str] = [] + lines.append(f"Found {len(projects)} project(s):") + + for project in projects: + lines.append("") + lines.append(f"- {project.name}") + lines.append(f" Path: {project.root_path}") + lines.append(f" Compose files: {', '.join(project.compose_files) or '-'}") + lines.append(f" Services: {', '.join(project.service_names) or '-'}") + lines.append(f" Named volumes: {', '.join(project.named_volumes) or '-'}") + + if project.backup_paths: + lines.append(" Backup candidates:") + for backup_path in project.backup_paths: + lines.append(f" - {backup_path}") + else: + lines.append(" Backup candidates: -") + + for service in project.services: + lines.append(f" Service: {service.name}") + lines.append(f" Image: {service.image or '-'}") + lines.append(f" Restart: {service.restart or '-'}") + lines.append(f" Env files: {', '.join(service.env_files) or '-'}") + + if service.mounts: + lines.append(" Mounts:") + for mount in service.mounts: + ro = " (ro)" if mount.read_only else "" + lines.append( + f" - {mount.kind}: {mount.source or '[anonymous]'} -> {mount.target}{ro}" + ) + else: + lines.append(" Mounts: -") + + return "\n".join(lines) + + +def main() -> int: + parser = build_parser() + args = parser.parse_args() + + if args.command == "scan": + try: + projects = scan_projects(Path(args.path)) + except (FileNotFoundError, NotADirectoryError) as exc: + print(f"Error: {exc}", file=sys.stderr) + return 2 + except json.JSONDecodeError as exc: + print(f"Error: invalid JSON/YAML data: {exc}", file=sys.stderr) + return 2 + except Exception as exc: + print(f"Error: {exc}", file=sys.stderr) + return 2 + + if args.json: + print(json.dumps([project.to_dict() for project in projects], indent=2)) + else: + print(render_text(projects)) + + return 0 + + parser.print_help() + return 1 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/dockervault/models.py b/dockervault/models.py new file mode 100644 index 0000000..f8ed090 --- /dev/null +++ b/dockervault/models.py @@ -0,0 +1,63 @@ +from __future__ import annotations + +from dataclasses import asdict, dataclass, field +from pathlib import Path + + +@dataclass(slots=True) +class MountMapping: + source: str + target: str + kind: str + read_only: bool = False + + def to_dict(self) -> dict: + return asdict(self) + + +@dataclass(slots=True) +class ServiceDefinition: + name: str + image: str | None = None + restart: str | None = None + env_files: list[str] = field(default_factory=list) + mounts: list[MountMapping] = field(default_factory=list) + + def to_dict(self) -> dict: + return asdict(self) + + +@dataclass(slots=True) +class ComposeProject: + name: str + root_path: str + compose_files: list[str] = field(default_factory=list) + services: list[ServiceDefinition] = field(default_factory=list) + named_volumes: list[str] = field(default_factory=list) + backup_paths: list[str] = field(default_factory=list) + + def to_dict(self) -> dict: + return { + "name": self.name, + "root_path": self.root_path, + "compose_files": self.compose_files, + "services": [service.to_dict() for service in self.services], + "named_volumes": self.named_volumes, + "backup_paths": self.backup_paths, + } + + @property + def service_names(self) -> list[str]: + return [service.name for service in self.services] + + +DEFAULT_COMPOSE_FILENAMES = { + "docker-compose.yml", + "docker-compose.yaml", + "compose.yml", + "compose.yaml", +} + + +def normalize_path(path: Path) -> str: + return str(path.resolve()) diff --git a/dockervault/scanner.py b/dockervault/scanner.py new file mode 100644 index 0000000..d4fbf2c --- /dev/null +++ b/dockervault/scanner.py @@ -0,0 +1,222 @@ +from __future__ import annotations + +from pathlib import Path +from typing import Any + +import yaml + +from dockervault.models import ( + ComposeProject, + DEFAULT_COMPOSE_FILENAMES, + MountMapping, + ServiceDefinition, + normalize_path, +) + + +def find_compose_files(base_path: Path) -> list[Path]: + """Find likely Docker Compose files under base_path.""" + matches: list[Path] = [] + + for path in base_path.rglob("*"): + if path.is_file() and path.name in DEFAULT_COMPOSE_FILENAMES: + matches.append(path) + + return sorted(matches) + + +def load_yaml_file(compose_path: Path) -> dict[str, Any]: + try: + content = compose_path.read_text(encoding="utf-8") + except UnicodeDecodeError: + content = compose_path.read_text(encoding="utf-8", errors="ignore") + + data = yaml.safe_load(content) or {} + if not isinstance(data, dict): + return {} + return data + + +def parse_env_files(value: Any) -> list[str]: + if isinstance(value, str): + return [value] + + if isinstance(value, list): + items: list[str] = [] + for item in value: + if isinstance(item, str): + items.append(item) + elif isinstance(item, dict): + path = item.get("path") + if isinstance(path, str): + items.append(path) + return sorted(set(items)) + + return [] + + +def normalize_volume_dict(volume: dict[str, Any]) -> MountMapping | None: + source = volume.get("source") or volume.get("src") or "" + target = volume.get("target") or volume.get("dst") or volume.get("destination") or "" + if not isinstance(target, str) or not target: + return None + + kind = volume.get("type") or ("bind" if source and str(source).startswith(("/", ".", "~")) else "volume") + read_only = bool(volume.get("read_only") or volume.get("readonly")) + + return MountMapping( + source=str(source), + target=target, + kind=str(kind), + read_only=read_only, + ) + + +def normalize_volume_string(value: str) -> MountMapping | None: + parts = value.split(":") + if len(parts) == 1: + return MountMapping(source="", target=parts[0], kind="anonymous", read_only=False) + + if len(parts) >= 2: + source = parts[0] + target = parts[1] + options = parts[2:] + read_only = any(option == "ro" for option in options) + + if source.startswith(("/", ".", "~")): + kind = "bind" + else: + kind = "volume" + + return MountMapping(source=source, target=target, kind=kind, read_only=read_only) + + return None + + +def parse_mounts(value: Any) -> list[MountMapping]: + mounts: list[MountMapping] = [] + + if not isinstance(value, list): + return mounts + + for item in value: + mapping: MountMapping | None = None + if isinstance(item, str): + mapping = normalize_volume_string(item) + elif isinstance(item, dict): + mapping = normalize_volume_dict(item) + + if mapping: + mounts.append(mapping) + + return mounts + + +def parse_service_definition(name: str, data: Any) -> ServiceDefinition: + if not isinstance(data, dict): + return ServiceDefinition(name=name) + + mounts = parse_mounts(data.get("volumes", [])) + env_files = parse_env_files(data.get("env_file")) + + return ServiceDefinition( + name=name, + image=data.get("image") if isinstance(data.get("image"), str) else None, + restart=data.get("restart") if isinstance(data.get("restart"), str) else None, + env_files=env_files, + mounts=mounts, + ) + + +def merge_service(existing: ServiceDefinition, incoming: ServiceDefinition) -> ServiceDefinition: + mounts_by_key: dict[tuple[str, str, str, bool], MountMapping] = { + (mount.source, mount.target, mount.kind, mount.read_only): mount + for mount in existing.mounts + } + for mount in incoming.mounts: + mounts_by_key[(mount.source, mount.target, mount.kind, mount.read_only)] = mount + + env_files = sorted(set(existing.env_files) | set(incoming.env_files)) + + return ServiceDefinition( + name=existing.name, + image=incoming.image or existing.image, + restart=incoming.restart or existing.restart, + env_files=env_files, + mounts=sorted(mounts_by_key.values(), key=lambda item: (item.target, item.source, item.kind)), + ) + + +def extract_project_from_compose(folder: Path, compose_files: list[Path]) -> ComposeProject: + services_by_name: dict[str, ServiceDefinition] = {} + named_volumes: set[str] = set() + backup_paths: set[str] = set() + + for compose_file in sorted(compose_files): + data = load_yaml_file(compose_file) + + for volume_name in (data.get("volumes") or {}).keys() if isinstance(data.get("volumes"), dict) else []: + if isinstance(volume_name, str): + named_volumes.add(volume_name) + + raw_services = data.get("services") or {} + if not isinstance(raw_services, dict): + continue + + for service_name, service_data in raw_services.items(): + if not isinstance(service_name, str): + continue + + incoming = parse_service_definition(service_name, service_data) + if service_name in services_by_name: + services_by_name[service_name] = merge_service(services_by_name[service_name], incoming) + else: + services_by_name[service_name] = incoming + + for service in services_by_name.values(): + for mount in service.mounts: + if mount.kind == "bind" and mount.source: + candidate = Path(mount.source).expanduser() + if not candidate.is_absolute(): + candidate = (folder / candidate).resolve() + backup_paths.add(str(candidate)) + + for env_file in service.env_files: + candidate = Path(env_file).expanduser() + if not candidate.is_absolute(): + candidate = (folder / candidate).resolve() + backup_paths.add(str(candidate)) + + return ComposeProject( + name=folder.name, + root_path=normalize_path(folder), + compose_files=[file.name for file in sorted(compose_files)], + services=sorted(services_by_name.values(), key=lambda item: item.name), + named_volumes=sorted(named_volumes), + backup_paths=sorted(backup_paths), + ) + + +def group_projects_by_folder(compose_files: list[Path]) -> list[ComposeProject]: + grouped: dict[Path, list[Path]] = {} + + for compose_file in compose_files: + grouped.setdefault(compose_file.parent, []).append(compose_file) + + projects: list[ComposeProject] = [] + + for folder, files in sorted(grouped.items()): + projects.append(extract_project_from_compose(folder, files)) + + return projects + + +def scan_projects(base_path: Path) -> list[ComposeProject]: + if not base_path.exists(): + raise FileNotFoundError(f"Path does not exist: {base_path}") + + if not base_path.is_dir(): + raise NotADirectoryError(f"Path is not a directory: {base_path}") + + compose_files = find_compose_files(base_path) + return group_projects_by_folder(compose_files) diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..652c56e --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,37 @@ +[build-system] +requires = ["setuptools>=68", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "dockervault" +version = "0.2.0" +description = "CLI backup discovery tool for Docker environments" +readme = "README.md" +requires-python = ">=3.10" +authors = [ + { name = "Ed & NodeFox" } +] +license = { text = "MIT" } +keywords = ["docker", "backup", "cli", "borg", "inventory"] +classifiers = [ + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3 :: Only", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent" +] + +dependencies = [ + "PyYAML>=6.0", +] + +[project.scripts] +dockervault = "dockervault.cli:main" + +[tool.setuptools] +package-dir = {"" = "."} + +[tool.setuptools.packages.find] +where = ["."] +include = ["dockervault*"]