diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..cecc186 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +.venv/ +*.egg-info/ +__pycache__/ +*.pyc diff --git a/LICENSE b/LICENSE index c23d472..4aec557 100644 --- a/LICENSE +++ b/LICENSE @@ -1,15 +1,21 @@ -GNU GENERAL PUBLIC LICENSE -Version 2, June 1991 +MIT License -Copyright (C) 2026 Eddie Nielsen +Copyright (c) 2026 Ed & NodeFox -Everyone is permitted to copy and distribute verbatim copies -of this license document, but changing it is not allowed. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: -[...] - -(⚠️ VIGTIGT: den fulde GPL v2 tekst er meget lang — du skal bruge hele den officielle) - -👉 Brug den officielle her: -https://www.gnu.org/licenses/old-licenses/gpl-2.0.txt +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md index 457ae8d..ab0cda8 100644 --- a/README.md +++ b/README.md @@ -1,229 +1,57 @@ -
-
+# DockerVault
----
+Early CLI foundation for DockerVault.
-## 📑 Contents
+## Current scope
-* [🚀 What is DockerVault?](#-what-is-dockervault)
-* [✨ Goals](#-goals)
-* [⚙️ Technology](#-technology)
-* [🏗 Architecture](#-architecture)
-* [📦 What DockerVault backs up](#-what-dockervault-is-expected-to-back-up)
-* [🔁 Restore philosophy](#-restore-philosophy)
-* [🧩 Planned Features](#-planned-features)
-* [🛣 Roadmap](#-roadmap)
-* [📁 Project Structure](#-project-structure)
-* [🤝 Philosophy](#-philosophy)
-* [📜 License](#-license)
+- Python CLI project skeleton
+- `scan` command
+- Recursive discovery of Docker Compose projects
+- YAML parsing with `PyYAML`
+- Detection of:
+ - services
+ - images
+ - restart policies
+ - bind mounts
+ - named volumes
+ - `env_file`
+- JSON or human-readable output
----
+## Quick start
-## 🚀 What is DockerVault?
-
-DockerVault is a CLI-first backup system for Docker environments.
-
-It is designed to make backups simple, transparent, and reliable without unnecessary complexity. The goal is to provide a practical way to discover containers, identify important data, and manage backups in a structured and predictable way.
-
-DockerVault is aimed at homelabs, self-hosted infrastructure, and small-scale server environments where control, clarity, and recoverability matter.
-
----
-
-## ✨ Goals
-
-DockerVault is being built to:
-
-* Discover Docker containers automatically
-* Identify volumes, bind mounts, and relevant configuration data
-* Keep track of backup history and metadata
-* Use a proven backup backend instead of reinventing backup logic
-* Make restore operations easier and safer
-* Stay simple enough to understand and debug
-
----
-
-## ⚙️ Technology
-
-DockerVault is planned as a modular, CLI-first tool.
-
-### Current design direction
-
-* **Core language:** Python
-* **Backup engine:** BorgBackup
-* **Metadata storage:** SQLite
-* **Interface:** CLI first
-* **Platform focus:** Linux Docker hosts
-
-### Why this stack?
-
-* **Python** makes it fast to build, maintain, and extend
-* **BorgBackup** is mature, reliable, and well-suited for deduplicated backups
-* **SQLite** keeps metadata simple, local, and easy to inspect
-* **CLI first** keeps the project transparent and easy to debug
-
-The project philosophy is clear: use proven tools where it makes sense, and avoid building complexity just for the sake of it.
-
----
-
-## 🏗 Architecture
-
-DockerVault follows a simple flow:
-
-1. Scan Docker environment
-2. Detect containers, volumes, bind mounts, and configs
-3. Store metadata in SQLite
-4. Build backup jobs from discovered data
-5. Execute backups through Borg
-6. Restore data when needed
-
-More detailed architecture notes will live in `docs/architecture.md`.
-
----
-
-## 📦 What DockerVault is expected to back up
-
-DockerVault is intended to focus on the parts of Docker environments that actually matter:
-
-* Docker volumes
-* Bind mounts
-* Selected configuration files
-* Backup metadata
-* Restore-related information
-
-It is not intended to blindly copy everything without structure. The purpose is to know what is being backed up and why.
-
----
-
-## 🔁 Restore philosophy
-
-Backups are only useful if restore is realistic.
-
-DockerVault is being designed with restore in mind from the beginning:
-
-* Clear mapping between containers and stored data
-* Metadata that explains relationships between services and data
-* Predictable restore flow
-* Minimal guesswork during recovery
-
-The goal is not just to store backups, but to enable actual recovery.
-
----
-
-## 🧩 Planned Features
-
-### Core features
-
-* Docker container discovery
-* Volume detection
-* Bind mount detection
-* Backup job creation
-* Borg-based backup execution
-* SQLite-based metadata tracking
-* Restore workflow
-
-### Future possibilities
-
-* Scheduled backups
-* Retention policies
-* Pre/post backup hooks
-* E-mail notifications
-* `ntfy` notifications
-* Web interface
-* Multi-node support
-* Remote repository support
-* Backup health/status reporting
-* Configuration profiles
-* Selective backup policies per container
-
----
-
-## 🛣 Roadmap
-
-### Phase 1 – Foundation
-
-* Repository structure
-* Documentation
-* CLI skeleton
-* Initial project design
-
-### Phase 2 – Discovery
-
-* Scan Docker environment
-* Detect containers
-* Detect volumes and bind mounts
-
-### Phase 3 – Backup Engine
-
-* Integrate BorgBackup
-* Build backup job flow
-* Store metadata in SQLite
-
-### Phase 4 – Restore
-
-* Basic restore workflow
-* Restore metadata mapping
-* Safer recovery process
-
-### Phase 5 – Usability
-
-* Better CLI commands
-* Config handling
-* Scheduling support
-* Notifications
-
-### Phase 6 – Expansion
-
-* Web interface
-* Multi-node support
-* Advanced backup policies
-
----
-
-## 📁 Project Structure
-
-```text
-dockervault/
-├── cmd/
-├── core/
-├── scanner/
-├── backup/
-├── restore/
-├── config/
-├── database/
-├── docs/
-├── scripts/
-├── images/
-├── README.md
-└── LICENSE
+```bash
+python3 -m venv .venv
+source .venv/bin/activate
+pip install -e .
```
----
+Run a scan:
-## 🤝 Philosophy
+```bash
+dockervault scan /path/to/docker/projects
+```
-DockerVault is built on a few simple principles:
+JSON output:
-* Keep it simple
-* Be transparent
-* Avoid unnecessary complexity
-* Prefer proven tools over hype
-* Build something practical and maintainable
+```bash
+dockervault scan /path/to/docker/projects --json
+```
-No magic. No hidden behavior. No unnecessary abstraction.
+## What v0.2 adds
----
+DockerVault no longer just finds compose files.
+It now builds a first inventory layer that can be used for backup logic later:
-Author
-Built with ❤️ for Lanx by NodeFox 🦊
+- project name and root path
+- compose files found in the project
+- service metadata
+- backup candidate paths from bind mounts and env files
+- named volumes defined in compose
-Maintained by Eddie Nielsen Feel free to contribute, suggest improvements or fork the project.
-
----
-
-## 📜 License
-
-This project is licensed under the **GNU General Public License v2.0**.
-
-See the `LICENSE` file for details.
+## Example direction
+This is meant as the next brick in a bigger flow:
+1. Discover Docker app folders
+2. Learn what services and data paths exist
+3. Later attach backup rules, Borg targets, retention, notifications, and restore metadata
diff --git a/dockervault/__init__.py b/dockervault/__init__.py
new file mode 100644
index 0000000..b770313
--- /dev/null
+++ b/dockervault/__init__.py
@@ -0,0 +1,4 @@
+"""DockerVault package."""
+
+__all__ = ["__version__"]
+__version__ = "0.1.0"
diff --git a/dockervault/cli.py b/dockervault/cli.py
new file mode 100644
index 0000000..2469eba
--- /dev/null
+++ b/dockervault/cli.py
@@ -0,0 +1,107 @@
+from __future__ import annotations
+
+import argparse
+import json
+import sys
+from pathlib import Path
+
+from dockervault.scanner import scan_projects
+
+
+def build_parser() -> argparse.ArgumentParser:
+ parser = argparse.ArgumentParser(
+ prog="dockervault",
+ description="DockerVault CLI"
+ )
+ subparsers = parser.add_subparsers(dest="command", required=True)
+
+ scan_parser = subparsers.add_parser(
+ "scan",
+ help="Scan a folder for Docker Compose projects"
+ )
+ scan_parser.add_argument(
+ "path",
+ nargs="?",
+ default=".",
+ help="Base path to scan (default: current directory)"
+ )
+ scan_parser.add_argument(
+ "--json",
+ action="store_true",
+ help="Output scan results as JSON"
+ )
+
+ return parser
+
+
+def render_text(projects: list) -> str:
+ if not projects:
+ return "No Docker Compose projects found."
+
+ lines: list[str] = []
+ lines.append(f"Found {len(projects)} project(s):")
+
+ for project in projects:
+ lines.append("")
+ lines.append(f"- {project.name}")
+ lines.append(f" Path: {project.root_path}")
+ lines.append(f" Compose files: {', '.join(project.compose_files) or '-'}")
+ lines.append(f" Services: {', '.join(project.service_names) or '-'}")
+ lines.append(f" Named volumes: {', '.join(project.named_volumes) or '-'}")
+
+ if project.backup_paths:
+ lines.append(" Backup candidates:")
+ for backup_path in project.backup_paths:
+ lines.append(f" - {backup_path}")
+ else:
+ lines.append(" Backup candidates: -")
+
+ for service in project.services:
+ lines.append(f" Service: {service.name}")
+ lines.append(f" Image: {service.image or '-'}")
+ lines.append(f" Restart: {service.restart or '-'}")
+ lines.append(f" Env files: {', '.join(service.env_files) or '-'}")
+
+ if service.mounts:
+ lines.append(" Mounts:")
+ for mount in service.mounts:
+ ro = " (ro)" if mount.read_only else ""
+ lines.append(
+ f" - {mount.kind}: {mount.source or '[anonymous]'} -> {mount.target}{ro}"
+ )
+ else:
+ lines.append(" Mounts: -")
+
+ return "\n".join(lines)
+
+
+def main() -> int:
+ parser = build_parser()
+ args = parser.parse_args()
+
+ if args.command == "scan":
+ try:
+ projects = scan_projects(Path(args.path))
+ except (FileNotFoundError, NotADirectoryError) as exc:
+ print(f"Error: {exc}", file=sys.stderr)
+ return 2
+ except json.JSONDecodeError as exc:
+ print(f"Error: invalid JSON/YAML data: {exc}", file=sys.stderr)
+ return 2
+ except Exception as exc:
+ print(f"Error: {exc}", file=sys.stderr)
+ return 2
+
+ if args.json:
+ print(json.dumps([project.to_dict() for project in projects], indent=2))
+ else:
+ print(render_text(projects))
+
+ return 0
+
+ parser.print_help()
+ return 1
+
+
+if __name__ == "__main__":
+ raise SystemExit(main())
diff --git a/dockervault/models.py b/dockervault/models.py
new file mode 100644
index 0000000..f8ed090
--- /dev/null
+++ b/dockervault/models.py
@@ -0,0 +1,63 @@
+from __future__ import annotations
+
+from dataclasses import asdict, dataclass, field
+from pathlib import Path
+
+
+@dataclass(slots=True)
+class MountMapping:
+ source: str
+ target: str
+ kind: str
+ read_only: bool = False
+
+ def to_dict(self) -> dict:
+ return asdict(self)
+
+
+@dataclass(slots=True)
+class ServiceDefinition:
+ name: str
+ image: str | None = None
+ restart: str | None = None
+ env_files: list[str] = field(default_factory=list)
+ mounts: list[MountMapping] = field(default_factory=list)
+
+ def to_dict(self) -> dict:
+ return asdict(self)
+
+
+@dataclass(slots=True)
+class ComposeProject:
+ name: str
+ root_path: str
+ compose_files: list[str] = field(default_factory=list)
+ services: list[ServiceDefinition] = field(default_factory=list)
+ named_volumes: list[str] = field(default_factory=list)
+ backup_paths: list[str] = field(default_factory=list)
+
+ def to_dict(self) -> dict:
+ return {
+ "name": self.name,
+ "root_path": self.root_path,
+ "compose_files": self.compose_files,
+ "services": [service.to_dict() for service in self.services],
+ "named_volumes": self.named_volumes,
+ "backup_paths": self.backup_paths,
+ }
+
+ @property
+ def service_names(self) -> list[str]:
+ return [service.name for service in self.services]
+
+
+DEFAULT_COMPOSE_FILENAMES = {
+ "docker-compose.yml",
+ "docker-compose.yaml",
+ "compose.yml",
+ "compose.yaml",
+}
+
+
+def normalize_path(path: Path) -> str:
+ return str(path.resolve())
diff --git a/dockervault/scanner.py b/dockervault/scanner.py
new file mode 100644
index 0000000..d4fbf2c
--- /dev/null
+++ b/dockervault/scanner.py
@@ -0,0 +1,222 @@
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Any
+
+import yaml
+
+from dockervault.models import (
+ ComposeProject,
+ DEFAULT_COMPOSE_FILENAMES,
+ MountMapping,
+ ServiceDefinition,
+ normalize_path,
+)
+
+
+def find_compose_files(base_path: Path) -> list[Path]:
+ """Find likely Docker Compose files under base_path."""
+ matches: list[Path] = []
+
+ for path in base_path.rglob("*"):
+ if path.is_file() and path.name in DEFAULT_COMPOSE_FILENAMES:
+ matches.append(path)
+
+ return sorted(matches)
+
+
+def load_yaml_file(compose_path: Path) -> dict[str, Any]:
+ try:
+ content = compose_path.read_text(encoding="utf-8")
+ except UnicodeDecodeError:
+ content = compose_path.read_text(encoding="utf-8", errors="ignore")
+
+ data = yaml.safe_load(content) or {}
+ if not isinstance(data, dict):
+ return {}
+ return data
+
+
+def parse_env_files(value: Any) -> list[str]:
+ if isinstance(value, str):
+ return [value]
+
+ if isinstance(value, list):
+ items: list[str] = []
+ for item in value:
+ if isinstance(item, str):
+ items.append(item)
+ elif isinstance(item, dict):
+ path = item.get("path")
+ if isinstance(path, str):
+ items.append(path)
+ return sorted(set(items))
+
+ return []
+
+
+def normalize_volume_dict(volume: dict[str, Any]) -> MountMapping | None:
+ source = volume.get("source") or volume.get("src") or ""
+ target = volume.get("target") or volume.get("dst") or volume.get("destination") or ""
+ if not isinstance(target, str) or not target:
+ return None
+
+ kind = volume.get("type") or ("bind" if source and str(source).startswith(("/", ".", "~")) else "volume")
+ read_only = bool(volume.get("read_only") or volume.get("readonly"))
+
+ return MountMapping(
+ source=str(source),
+ target=target,
+ kind=str(kind),
+ read_only=read_only,
+ )
+
+
+def normalize_volume_string(value: str) -> MountMapping | None:
+ parts = value.split(":")
+ if len(parts) == 1:
+ return MountMapping(source="", target=parts[0], kind="anonymous", read_only=False)
+
+ if len(parts) >= 2:
+ source = parts[0]
+ target = parts[1]
+ options = parts[2:]
+ read_only = any(option == "ro" for option in options)
+
+ if source.startswith(("/", ".", "~")):
+ kind = "bind"
+ else:
+ kind = "volume"
+
+ return MountMapping(source=source, target=target, kind=kind, read_only=read_only)
+
+ return None
+
+
+def parse_mounts(value: Any) -> list[MountMapping]:
+ mounts: list[MountMapping] = []
+
+ if not isinstance(value, list):
+ return mounts
+
+ for item in value:
+ mapping: MountMapping | None = None
+ if isinstance(item, str):
+ mapping = normalize_volume_string(item)
+ elif isinstance(item, dict):
+ mapping = normalize_volume_dict(item)
+
+ if mapping:
+ mounts.append(mapping)
+
+ return mounts
+
+
+def parse_service_definition(name: str, data: Any) -> ServiceDefinition:
+ if not isinstance(data, dict):
+ return ServiceDefinition(name=name)
+
+ mounts = parse_mounts(data.get("volumes", []))
+ env_files = parse_env_files(data.get("env_file"))
+
+ return ServiceDefinition(
+ name=name,
+ image=data.get("image") if isinstance(data.get("image"), str) else None,
+ restart=data.get("restart") if isinstance(data.get("restart"), str) else None,
+ env_files=env_files,
+ mounts=mounts,
+ )
+
+
+def merge_service(existing: ServiceDefinition, incoming: ServiceDefinition) -> ServiceDefinition:
+ mounts_by_key: dict[tuple[str, str, str, bool], MountMapping] = {
+ (mount.source, mount.target, mount.kind, mount.read_only): mount
+ for mount in existing.mounts
+ }
+ for mount in incoming.mounts:
+ mounts_by_key[(mount.source, mount.target, mount.kind, mount.read_only)] = mount
+
+ env_files = sorted(set(existing.env_files) | set(incoming.env_files))
+
+ return ServiceDefinition(
+ name=existing.name,
+ image=incoming.image or existing.image,
+ restart=incoming.restart or existing.restart,
+ env_files=env_files,
+ mounts=sorted(mounts_by_key.values(), key=lambda item: (item.target, item.source, item.kind)),
+ )
+
+
+def extract_project_from_compose(folder: Path, compose_files: list[Path]) -> ComposeProject:
+ services_by_name: dict[str, ServiceDefinition] = {}
+ named_volumes: set[str] = set()
+ backup_paths: set[str] = set()
+
+ for compose_file in sorted(compose_files):
+ data = load_yaml_file(compose_file)
+
+ for volume_name in (data.get("volumes") or {}).keys() if isinstance(data.get("volumes"), dict) else []:
+ if isinstance(volume_name, str):
+ named_volumes.add(volume_name)
+
+ raw_services = data.get("services") or {}
+ if not isinstance(raw_services, dict):
+ continue
+
+ for service_name, service_data in raw_services.items():
+ if not isinstance(service_name, str):
+ continue
+
+ incoming = parse_service_definition(service_name, service_data)
+ if service_name in services_by_name:
+ services_by_name[service_name] = merge_service(services_by_name[service_name], incoming)
+ else:
+ services_by_name[service_name] = incoming
+
+ for service in services_by_name.values():
+ for mount in service.mounts:
+ if mount.kind == "bind" and mount.source:
+ candidate = Path(mount.source).expanduser()
+ if not candidate.is_absolute():
+ candidate = (folder / candidate).resolve()
+ backup_paths.add(str(candidate))
+
+ for env_file in service.env_files:
+ candidate = Path(env_file).expanduser()
+ if not candidate.is_absolute():
+ candidate = (folder / candidate).resolve()
+ backup_paths.add(str(candidate))
+
+ return ComposeProject(
+ name=folder.name,
+ root_path=normalize_path(folder),
+ compose_files=[file.name for file in sorted(compose_files)],
+ services=sorted(services_by_name.values(), key=lambda item: item.name),
+ named_volumes=sorted(named_volumes),
+ backup_paths=sorted(backup_paths),
+ )
+
+
+def group_projects_by_folder(compose_files: list[Path]) -> list[ComposeProject]:
+ grouped: dict[Path, list[Path]] = {}
+
+ for compose_file in compose_files:
+ grouped.setdefault(compose_file.parent, []).append(compose_file)
+
+ projects: list[ComposeProject] = []
+
+ for folder, files in sorted(grouped.items()):
+ projects.append(extract_project_from_compose(folder, files))
+
+ return projects
+
+
+def scan_projects(base_path: Path) -> list[ComposeProject]:
+ if not base_path.exists():
+ raise FileNotFoundError(f"Path does not exist: {base_path}")
+
+ if not base_path.is_dir():
+ raise NotADirectoryError(f"Path is not a directory: {base_path}")
+
+ compose_files = find_compose_files(base_path)
+ return group_projects_by_folder(compose_files)
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..652c56e
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,37 @@
+[build-system]
+requires = ["setuptools>=68", "wheel"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "dockervault"
+version = "0.2.0"
+description = "CLI backup discovery tool for Docker environments"
+readme = "README.md"
+requires-python = ">=3.10"
+authors = [
+ { name = "Ed & NodeFox" }
+]
+license = { text = "MIT" }
+keywords = ["docker", "backup", "cli", "borg", "inventory"]
+classifiers = [
+ "Programming Language :: Python :: 3",
+ "Programming Language :: Python :: 3 :: Only",
+ "Programming Language :: Python :: 3.10",
+ "Programming Language :: Python :: 3.11",
+ "License :: OSI Approved :: MIT License",
+ "Operating System :: OS Independent"
+]
+
+dependencies = [
+ "PyYAML>=6.0",
+]
+
+[project.scripts]
+dockervault = "dockervault.cli:main"
+
+[tool.setuptools]
+package-dir = {"" = "."}
+
+[tool.setuptools.packages.find]
+where = ["."]
+include = ["dockervault*"]