From 21906ff37e1603354a72831a2b1d5719f05d9ae1 Mon Sep 17 00:00:00 2001 From: Eddie Nielsen <“ed”@edcore.dk”> Date: Sun, 22 Mar 2026 12:04:11 +0000 Subject: [PATCH] feat: add YAML parsing and backup detection --- .gitignore | 4 + .../__pycache__/__init__.cpython-310.pyc | Bin 226 -> 0 bytes dockervault/__pycache__/cli.cpython-310.pyc | Bin 2191 -> 0 bytes .../__pycache__/models.cpython-310.pyc | Bin 1109 -> 0 bytes .../__pycache__/scanner.cpython-310.pyc | Bin 2527 -> 0 bytes dockervault/cli.py | 32 ++- dockervault/models.py | 40 +++- dockervault/scanner.py | 211 ++++++++++++++---- pyproject.toml | 7 +- 9 files changed, 249 insertions(+), 45 deletions(-) create mode 100644 .gitignore delete mode 100644 dockervault/__pycache__/__init__.cpython-310.pyc delete mode 100644 dockervault/__pycache__/cli.cpython-310.pyc delete mode 100644 dockervault/__pycache__/models.cpython-310.pyc delete mode 100644 dockervault/__pycache__/scanner.cpython-310.pyc diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..cecc186 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +.venv/ +*.egg-info/ +__pycache__/ +*.pyc diff --git a/dockervault/__pycache__/__init__.cpython-310.pyc b/dockervault/__pycache__/__init__.cpython-310.pyc deleted file mode 100644 index 3d95bd7f89cafacc331ed8deff62e252e982ae45..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 226 zcmd1j<>g`kg4tL1XZir?#~=v0lqz4mrI zOPkdO0qJWVcrKNiMUaoh|Hr|aQ>Sp0U=e;;Gg>`johB`ZZtE;yLU0uZOn|f>5k2AAQoQ}xd^r6sP z{BKiu>mUs4afO4GvoChpnDvB`s-j$Qe9U_e1)yfYiLvNO<*OP6qMG}Xvr>X!)SQ~% zcfd1Te__qmR31Hy^>^K2YDTlGCG#~g!R;?UZwV`L_bph(MWmAf#b>Kg-tTua^%be1 zFxJ!YqhX?BWhKQsYIU#%-C#3MRjl#S!NhZLJMRzjA`Ye*p;T?h=>X>-VTb&NX_;*Ebe(Lot5jGFACyJZ zh4qP_Ft$!Kc9wO;<=i8=f5N^t!9`j0Uzxsu`yO6d-p>2+a;%o8*;zhOoKtj^ChftF ztv?th>Ga$hd=&3vkzC;CzG_M-CKQNL2I*hYozU*z($xXXtv5Z+$}~YWox-PPxoj+)v$*j;7+aj+UU|+{(pc27>Ai^kKDWT z4o3AU6fYO870X6qDHwS{K<&E{jOK!4#BL^OTtFl~2oCuY_5~fZO%jpNa=%ptcm{(w zQ`Sl2%*w3WkL`@ka|2wMZ7X|ulGz4&hZ-O(-B}MkU;1b{`VtarD`}F&0P;~uN?=Tu zi=?gr`sk?8#81q75EN&yNRFR4!#&~idHl}v3)~m_3T<$b%tRoL9|?*kYB^MX17X-6 zqha!bAC%R^P~n(#>!TUt80559+2ebBSA4^=i;@}FR5!)WN1W|qh>Yfpr^L9dpPA|T zG?+O1hJB0ayX$=KDA_rp9`nZxzH0h2^JzaUI0e2wvR; zwaSd9R(eTGYj)CbemM|mL=>88q3NQUx=hbvv2%A3(pqjxh?bGsd9XKErz3A7s|+-% zi+0_?hovyUg$BxK;#JhJKPV;$@Jp+ZhHOjfbL3x^Czp70;wRDTp-PJBDh!Kvu(;d< znna1LMc(Az58{<8&vPm3ucYvVzDv?0iw}uz zT0aaohxjbUVW=ssnn0~@(DDfuGz0o4O&*jPvI{3>VwdX@!0UqttEyE=5mK90N_$y^tYz(}TfA|wHh;?gOx}}$TrJwqB%#}eIrXeN5;~w{~={v#$9$pb1UOJaFjZO%Ue$uqX zmw1oIS6t(nKI%~t5X5~u~=X3!R+8`6zL0AzO75Pdo>6qNmQ9?B$ zMq-SRlN+K}!M%MmI8mh-2tJrKvzh@{`Df^bb3L$2#f3EMvvV6(ied8@Di~}S(Tk@K zA+O)1j1oupAww_0mb7fd7}CI)Z8HWH=MwEMV_)W(ygTVLwjeWO+K0Q=LuzynYhhpg z1UJO6u&tqsz(e?&E^KWQIT2lfPwdnuoA_pLJ9p%+pO^Bsd+%i6tu**w-3KAI`ZxxO z)0OcBlEwKKR139ndz+q61@AOA4iC%^Pkb}2usD)ATOn^u)z<-3u##5!sOcocS zemKt|rKY`0^-HiviPv0vI7`w&jwf??EWoiwoowg+k0MO{9yHR?5F8>NS$t|BA&d6e z+u`o^@!k>JIoLlu7!TR*`@P|4dw)2t8!yaR3xV|8XrqyJ6JMi2sk&s%>C(tuqES1J F{sATB4ln=! diff --git a/dockervault/__pycache__/scanner.cpython-310.pyc b/dockervault/__pycache__/scanner.cpython-310.pyc deleted file mode 100644 index 25916c1c44fc59c437abb0a2a986ed03c2be508e..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 2527 zcmZ`*Pj4GV6yKShUE6CXEpD3rp@&sl5H5}TM;zKvDoW!>6>VA(3c|E(H=aqd>3Y|l znY4{rONjJH58RL{B>TXDZ@`UD!DX(T_zDpcyf>SKw!m7u`}WPun>X+M-tRe|nhG4a zUi|iJ@&ib}60!YDASa7DZJg(?XxbkI+l8Uzw7ra-AcdO ztrADjagBQu(hc}54|x3<>(==ZKFtq+)Zj;XlOFqxf3nkxY=2{ zcB`{&rn6l3<1`tHXaI|cC%(F;zP>nm=gVtz-FR+ze(rMAp1c2UYyO@(bK2BaVpUF* zczEMWp*Ll=6QqgKU*hifdjy@^y>JqP`#MJH5(sO~h!oBkVl^QP&i#WUTF^0>IPyqA zH>h?WqeRB6pc9DWql)$lvQ6O3ba-Jg$#|G14@J5aF66z3LWaBN3|Es>sBkmmpxbw` zZOECHZ#=n{<}0R>s0gPSmc@M`J#fbO@n9e_ZhVzXEqKe76;xOHvF@!4Wf+F!XGbnycV-)GPC09Z+?!rW){Ibec9v zKzXu)?|*RTPKKv zzuj?{v*$M4Bl3llktYuKMif%w7MP5SBUZ4n3+uhYljjR>%(y=x^1`E21y%Ef$14x1 zyv(a33K|BZwFyy+RtiuePIgdxW4~Y%GV%*w-Ywj5rSQj9UY|h1A)%*qO$UX$T_&|L z{3!_Gx|yu4>u?ZD1^HXeWeD%bNl%2BD%B3`q>qIdsE`ZHn8L00YFOLf@zWg=?vUCaSQD+f zsYwy@NQWn-NN8Ko?L(4g%C0 zRN?|NwFh-uyE`)$>$~;5%Ul13eGOJx*nbB?Vz~r>+W;0JV+R<7tT_`p22QbHV+d^x z;2yJyvyBixbB~VL$kX1o0|50vQ-ihs*w>YUZkG@}381CATCfd-D%&PlSkD}=dft*f_SszTFEXL!~oJlf-;Xk{x}%~K%2_7wnmoT{rJ>@93DyA1#^ z&$T6bIR|EB3-_)A%7o@(6?x3~kZ!CAL{$kVFVUi}(O zZJLU8Oz?lm%+hXk&r{=))I6nd!zQs;OK|8BmPXEzrRsae{ zWWP7LOT)w1`$C=rkw8Zd#Z!pVpU>Okyv7&Me?vczSLr&gVuruXO!A@seR3Y)gCZG6s>N;zXK6P5;1G26sCfW zElpO+zYooq8fd!@+?T2nRM?#@s7wWck5zFBRoEm;>|sjn30?w>je+H?>J_lInTo4O Xyte)tPL(#u3=L@D2KC^r>EnL`{mzls diff --git a/dockervault/cli.py b/dockervault/cli.py index 73f0982..2469eba 100644 --- a/dockervault/cli.py +++ b/dockervault/cli.py @@ -46,7 +46,31 @@ def render_text(projects: list) -> str: lines.append(f"- {project.name}") lines.append(f" Path: {project.root_path}") lines.append(f" Compose files: {', '.join(project.compose_files) or '-'}") - lines.append(f" Services: {', '.join(project.services) or '-'}") + lines.append(f" Services: {', '.join(project.service_names) or '-'}") + lines.append(f" Named volumes: {', '.join(project.named_volumes) or '-'}") + + if project.backup_paths: + lines.append(" Backup candidates:") + for backup_path in project.backup_paths: + lines.append(f" - {backup_path}") + else: + lines.append(" Backup candidates: -") + + for service in project.services: + lines.append(f" Service: {service.name}") + lines.append(f" Image: {service.image or '-'}") + lines.append(f" Restart: {service.restart or '-'}") + lines.append(f" Env files: {', '.join(service.env_files) or '-'}") + + if service.mounts: + lines.append(" Mounts:") + for mount in service.mounts: + ro = " (ro)" if mount.read_only else "" + lines.append( + f" - {mount.kind}: {mount.source or '[anonymous]'} -> {mount.target}{ro}" + ) + else: + lines.append(" Mounts: -") return "\n".join(lines) @@ -61,6 +85,12 @@ def main() -> int: except (FileNotFoundError, NotADirectoryError) as exc: print(f"Error: {exc}", file=sys.stderr) return 2 + except json.JSONDecodeError as exc: + print(f"Error: invalid JSON/YAML data: {exc}", file=sys.stderr) + return 2 + except Exception as exc: + print(f"Error: {exc}", file=sys.stderr) + return 2 if args.json: print(json.dumps([project.to_dict() for project in projects], indent=2)) diff --git a/dockervault/models.py b/dockervault/models.py index 4f161da..f8ed090 100644 --- a/dockervault/models.py +++ b/dockervault/models.py @@ -4,15 +4,51 @@ from dataclasses import asdict, dataclass, field from pathlib import Path +@dataclass(slots=True) +class MountMapping: + source: str + target: str + kind: str + read_only: bool = False + + def to_dict(self) -> dict: + return asdict(self) + + +@dataclass(slots=True) +class ServiceDefinition: + name: str + image: str | None = None + restart: str | None = None + env_files: list[str] = field(default_factory=list) + mounts: list[MountMapping] = field(default_factory=list) + + def to_dict(self) -> dict: + return asdict(self) + + @dataclass(slots=True) class ComposeProject: name: str root_path: str compose_files: list[str] = field(default_factory=list) - services: list[str] = field(default_factory=list) + services: list[ServiceDefinition] = field(default_factory=list) + named_volumes: list[str] = field(default_factory=list) + backup_paths: list[str] = field(default_factory=list) def to_dict(self) -> dict: - return asdict(self) + return { + "name": self.name, + "root_path": self.root_path, + "compose_files": self.compose_files, + "services": [service.to_dict() for service in self.services], + "named_volumes": self.named_volumes, + "backup_paths": self.backup_paths, + } + + @property + def service_names(self) -> list[str]: + return [service.name for service in self.services] DEFAULT_COMPOSE_FILENAMES = { diff --git a/dockervault/scanner.py b/dockervault/scanner.py index ca814b1..d4fbf2c 100644 --- a/dockervault/scanner.py +++ b/dockervault/scanner.py @@ -1,11 +1,17 @@ from __future__ import annotations -import re from pathlib import Path +from typing import Any -from dockervault.models import ComposeProject, DEFAULT_COMPOSE_FILENAMES, normalize_path +import yaml -SERVICE_LINE_RE = re.compile(r"^\s{2}([A-Za-z0-9_.-]+):\s*$") +from dockervault.models import ( + ComposeProject, + DEFAULT_COMPOSE_FILENAMES, + MountMapping, + ServiceDefinition, + normalize_path, +) def find_compose_files(base_path: Path) -> list[Path]: @@ -19,41 +25,176 @@ def find_compose_files(base_path: Path) -> list[Path]: return sorted(matches) -def parse_services_from_compose(compose_path: Path) -> list[str]: - """ - Light parser for service names. - - Keeps dependencies minimal for v0. - It looks for the `services:` block and collects entries indented by two spaces. - """ +def load_yaml_file(compose_path: Path) -> dict[str, Any]: try: - lines = compose_path.read_text(encoding="utf-8").splitlines() + content = compose_path.read_text(encoding="utf-8") except UnicodeDecodeError: - lines = compose_path.read_text(encoding="utf-8", errors="ignore").splitlines() + content = compose_path.read_text(encoding="utf-8", errors="ignore") - in_services = False - services: list[str] = [] + data = yaml.safe_load(content) or {} + if not isinstance(data, dict): + return {} + return data - for line in lines: - stripped = line.strip() - if not stripped or stripped.startswith("#"): +def parse_env_files(value: Any) -> list[str]: + if isinstance(value, str): + return [value] + + if isinstance(value, list): + items: list[str] = [] + for item in value: + if isinstance(item, str): + items.append(item) + elif isinstance(item, dict): + path = item.get("path") + if isinstance(path, str): + items.append(path) + return sorted(set(items)) + + return [] + + +def normalize_volume_dict(volume: dict[str, Any]) -> MountMapping | None: + source = volume.get("source") or volume.get("src") or "" + target = volume.get("target") or volume.get("dst") or volume.get("destination") or "" + if not isinstance(target, str) or not target: + return None + + kind = volume.get("type") or ("bind" if source and str(source).startswith(("/", ".", "~")) else "volume") + read_only = bool(volume.get("read_only") or volume.get("readonly")) + + return MountMapping( + source=str(source), + target=target, + kind=str(kind), + read_only=read_only, + ) + + +def normalize_volume_string(value: str) -> MountMapping | None: + parts = value.split(":") + if len(parts) == 1: + return MountMapping(source="", target=parts[0], kind="anonymous", read_only=False) + + if len(parts) >= 2: + source = parts[0] + target = parts[1] + options = parts[2:] + read_only = any(option == "ro" for option in options) + + if source.startswith(("/", ".", "~")): + kind = "bind" + else: + kind = "volume" + + return MountMapping(source=source, target=target, kind=kind, read_only=read_only) + + return None + + +def parse_mounts(value: Any) -> list[MountMapping]: + mounts: list[MountMapping] = [] + + if not isinstance(value, list): + return mounts + + for item in value: + mapping: MountMapping | None = None + if isinstance(item, str): + mapping = normalize_volume_string(item) + elif isinstance(item, dict): + mapping = normalize_volume_dict(item) + + if mapping: + mounts.append(mapping) + + return mounts + + +def parse_service_definition(name: str, data: Any) -> ServiceDefinition: + if not isinstance(data, dict): + return ServiceDefinition(name=name) + + mounts = parse_mounts(data.get("volumes", [])) + env_files = parse_env_files(data.get("env_file")) + + return ServiceDefinition( + name=name, + image=data.get("image") if isinstance(data.get("image"), str) else None, + restart=data.get("restart") if isinstance(data.get("restart"), str) else None, + env_files=env_files, + mounts=mounts, + ) + + +def merge_service(existing: ServiceDefinition, incoming: ServiceDefinition) -> ServiceDefinition: + mounts_by_key: dict[tuple[str, str, str, bool], MountMapping] = { + (mount.source, mount.target, mount.kind, mount.read_only): mount + for mount in existing.mounts + } + for mount in incoming.mounts: + mounts_by_key[(mount.source, mount.target, mount.kind, mount.read_only)] = mount + + env_files = sorted(set(existing.env_files) | set(incoming.env_files)) + + return ServiceDefinition( + name=existing.name, + image=incoming.image or existing.image, + restart=incoming.restart or existing.restart, + env_files=env_files, + mounts=sorted(mounts_by_key.values(), key=lambda item: (item.target, item.source, item.kind)), + ) + + +def extract_project_from_compose(folder: Path, compose_files: list[Path]) -> ComposeProject: + services_by_name: dict[str, ServiceDefinition] = {} + named_volumes: set[str] = set() + backup_paths: set[str] = set() + + for compose_file in sorted(compose_files): + data = load_yaml_file(compose_file) + + for volume_name in (data.get("volumes") or {}).keys() if isinstance(data.get("volumes"), dict) else []: + if isinstance(volume_name, str): + named_volumes.add(volume_name) + + raw_services = data.get("services") or {} + if not isinstance(raw_services, dict): continue - if not in_services: - if stripped == "services:": - in_services = True - continue + for service_name, service_data in raw_services.items(): + if not isinstance(service_name, str): + continue - # Leaving top-level services block - if not line.startswith(" ") and not line.startswith("\t"): - break + incoming = parse_service_definition(service_name, service_data) + if service_name in services_by_name: + services_by_name[service_name] = merge_service(services_by_name[service_name], incoming) + else: + services_by_name[service_name] = incoming - match = SERVICE_LINE_RE.match(line) - if match: - services.append(match.group(1)) + for service in services_by_name.values(): + for mount in service.mounts: + if mount.kind == "bind" and mount.source: + candidate = Path(mount.source).expanduser() + if not candidate.is_absolute(): + candidate = (folder / candidate).resolve() + backup_paths.add(str(candidate)) - return sorted(set(services)) + for env_file in service.env_files: + candidate = Path(env_file).expanduser() + if not candidate.is_absolute(): + candidate = (folder / candidate).resolve() + backup_paths.add(str(candidate)) + + return ComposeProject( + name=folder.name, + root_path=normalize_path(folder), + compose_files=[file.name for file in sorted(compose_files)], + services=sorted(services_by_name.values(), key=lambda item: item.name), + named_volumes=sorted(named_volumes), + backup_paths=sorted(backup_paths), + ) def group_projects_by_folder(compose_files: list[Path]) -> list[ComposeProject]: @@ -65,19 +206,7 @@ def group_projects_by_folder(compose_files: list[Path]) -> list[ComposeProject]: projects: list[ComposeProject] = [] for folder, files in sorted(grouped.items()): - service_names: set[str] = set() - - for compose_file in files: - service_names.update(parse_services_from_compose(compose_file)) - - projects.append( - ComposeProject( - name=folder.name, - root_path=normalize_path(folder), - compose_files=[file.name for file in sorted(files)], - services=sorted(service_names), - ) - ) + projects.append(extract_project_from_compose(folder, files)) return projects diff --git a/pyproject.toml b/pyproject.toml index b7dd3b7..652c56e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "dockervault" -version = "0.1.0" +version = "0.2.0" description = "CLI backup discovery tool for Docker environments" readme = "README.md" requires-python = ">=3.10" @@ -16,11 +16,16 @@ keywords = ["docker", "backup", "cli", "borg", "inventory"] classifiers = [ "Programming Language :: Python :: 3", "Programming Language :: Python :: 3 :: Only", + "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "License :: OSI Approved :: MIT License", "Operating System :: OS Independent" ] +dependencies = [ + "PyYAML>=6.0", +] + [project.scripts] dockervault = "dockervault.cli:main"