feat: add named volume detection and image-aware classification
This commit is contained in:
parent
932c668e65
commit
483e2720f1
2 changed files with 770 additions and 221 deletions
|
|
@ -1,291 +1,546 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import shutil
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import yaml
|
||||
|
||||
from .models import MountEntry
|
||||
|
||||
CRITICAL_TARGET_PATTERNS = (
|
||||
"/var/lib/mysql",
|
||||
"/var/lib/postgresql",
|
||||
"/var/lib/postgres",
|
||||
"/var/lib/mariadb",
|
||||
"/data",
|
||||
|
||||
# ----------------------------
|
||||
# Image-aware rules
|
||||
# ----------------------------
|
||||
|
||||
IMAGE_RULES = {
|
||||
"mariadb": {
|
||||
"/var/lib/mysql": "critical",
|
||||
},
|
||||
"mysql": {
|
||||
"/var/lib/mysql": "critical",
|
||||
},
|
||||
"postgres": {
|
||||
"/var/lib/postgresql/data": "critical",
|
||||
},
|
||||
"redis": {
|
||||
"/data": "critical",
|
||||
},
|
||||
"grafana": {
|
||||
"/var/lib/grafana": "critical",
|
||||
},
|
||||
"prometheus": {
|
||||
"/prometheus": "critical",
|
||||
},
|
||||
"influxdb": {
|
||||
"/var/lib/influxdb": "critical",
|
||||
},
|
||||
"nginx": {
|
||||
"/var/log/nginx": "optional",
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
# ----------------------------
|
||||
# Generic rules
|
||||
# ----------------------------
|
||||
|
||||
CRITICAL_TARGETS = {
|
||||
"/config",
|
||||
"/var/www",
|
||||
"/srv",
|
||||
"/app/data",
|
||||
"/bitnami",
|
||||
"/var/opt",
|
||||
"/data",
|
||||
"/var/lib/mysql",
|
||||
"/var/lib/mariadb",
|
||||
"/var/lib/postgresql/data",
|
||||
"/bitnami/postgresql",
|
||||
"/var/lib/redis",
|
||||
"/redis",
|
||||
"/var/lib/mongodb",
|
||||
"/mongodb",
|
||||
"/data/db",
|
||||
"/var/lib/grafana",
|
||||
"/var/lib/influxdb",
|
||||
"/var/lib/prometheus",
|
||||
"/etc/letsencrypt",
|
||||
"/acme.sh",
|
||||
"/app/data",
|
||||
"/srv",
|
||||
}
|
||||
|
||||
REVIEW_TARGET_KEYWORDS = {
|
||||
"backup",
|
||||
"uploads",
|
||||
"media",
|
||||
"www",
|
||||
"html",
|
||||
"content",
|
||||
"storage",
|
||||
"files",
|
||||
"database",
|
||||
"db",
|
||||
"config",
|
||||
}
|
||||
|
||||
SKIP_TARGET_PREFIXES = (
|
||||
"/tmp",
|
||||
"/var/tmp",
|
||||
"/run",
|
||||
"/var/run",
|
||||
"/dev",
|
||||
)
|
||||
|
||||
OPTIONAL_TARGET_PATTERNS = (
|
||||
SKIP_TARGET_EXACT = {
|
||||
"/var/log",
|
||||
"/var/log/nginx",
|
||||
"/logs",
|
||||
"/log",
|
||||
"/tmp",
|
||||
"/cache",
|
||||
"/var/cache",
|
||||
"/run",
|
||||
)
|
||||
|
||||
OPTIONAL_SOURCE_PATTERNS = (
|
||||
"logs",
|
||||
"log",
|
||||
"cache",
|
||||
"tmp",
|
||||
"temp",
|
||||
)
|
||||
|
||||
SKIP_TARGET_PATTERNS = (
|
||||
"/dev",
|
||||
"/proc",
|
||||
"/sys",
|
||||
"/run",
|
||||
"/tmp",
|
||||
)
|
||||
|
||||
SKIP_SOURCE_PATTERNS = (
|
||||
"/var/run/docker.sock",
|
||||
"docker.sock",
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
def load_compose(compose_file: Path) -> dict[str, Any]:
|
||||
CLASS_PRIORITY = {
|
||||
"critical": 3,
|
||||
"review": 2,
|
||||
"optional": 1,
|
||||
"unknown": 0,
|
||||
}
|
||||
|
||||
|
||||
# ----------------------------
|
||||
# Compose loader
|
||||
# ----------------------------
|
||||
|
||||
def load_compose(compose_path: str | Path) -> dict[str, Any]:
|
||||
compose_file = Path(compose_path).expanduser().resolve()
|
||||
|
||||
with compose_file.open("r", encoding="utf-8") as f:
|
||||
data = yaml.safe_load(f) or {}
|
||||
|
||||
if not isinstance(data, dict):
|
||||
raise ValueError("Compose file did not parse into a dictionary")
|
||||
raise ValueError(f"Compose file did not parse as a mapping: {compose_file}")
|
||||
|
||||
return data
|
||||
|
||||
|
||||
def is_bind_mount(volume: Any) -> bool:
|
||||
if isinstance(volume, str):
|
||||
return ":" in volume
|
||||
# ----------------------------
|
||||
# Docker helpers
|
||||
# ----------------------------
|
||||
|
||||
if isinstance(volume, dict):
|
||||
return volume.get("type") == "bind"
|
||||
|
||||
return False
|
||||
def docker_available() -> bool:
|
||||
return shutil.which("docker") is not None
|
||||
|
||||
|
||||
def parse_volume_entry(
|
||||
volume: Any,
|
||||
compose_file: Path,
|
||||
) -> dict[str, str] | None:
|
||||
project_root = compose_file.parent.resolve()
|
||||
|
||||
if isinstance(volume, str):
|
||||
parts = volume.split(":")
|
||||
if len(parts) < 2:
|
||||
def run_docker_volume_inspect(volume_name: str) -> dict[str, Any] | None:
|
||||
if not docker_available():
|
||||
return None
|
||||
|
||||
source_raw = parts[0].strip()
|
||||
target = parts[1].strip()
|
||||
|
||||
if not source_raw or not target:
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["docker", "volume", "inspect", volume_name],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=False,
|
||||
)
|
||||
except OSError:
|
||||
return None
|
||||
|
||||
# Named volumes should not be treated as bind mounts
|
||||
if not source_raw.startswith(("/", ".", "~")):
|
||||
if result.returncode != 0:
|
||||
return None
|
||||
|
||||
source = resolve_source_path(source_raw, project_root)
|
||||
try:
|
||||
data = json.loads(result.stdout)
|
||||
except json.JSONDecodeError:
|
||||
return None
|
||||
|
||||
if not isinstance(data, list) or not data:
|
||||
return None
|
||||
|
||||
first = data[0]
|
||||
if not isinstance(first, dict):
|
||||
return None
|
||||
|
||||
return first
|
||||
|
||||
|
||||
# ----------------------------
|
||||
# Volume resolution
|
||||
# ----------------------------
|
||||
|
||||
def infer_project_name(compose_path: Path, compose_data: dict[str, Any]) -> str:
|
||||
top_level_name = compose_data.get("name")
|
||||
if isinstance(top_level_name, str) and top_level_name.strip():
|
||||
return top_level_name.strip()
|
||||
|
||||
return compose_path.parent.name
|
||||
|
||||
|
||||
def normalize_top_level_volume_name(
|
||||
volume_key: str,
|
||||
compose_data: dict[str, Any],
|
||||
) -> tuple[str | None, bool]:
|
||||
volumes = compose_data.get("volumes", {})
|
||||
if not isinstance(volumes, dict):
|
||||
return None, False
|
||||
|
||||
cfg = volumes.get(volume_key)
|
||||
if not isinstance(cfg, dict):
|
||||
return None, False
|
||||
|
||||
explicit_name = cfg.get("name")
|
||||
if not isinstance(explicit_name, str) or not explicit_name.strip():
|
||||
explicit_name = None
|
||||
|
||||
external = cfg.get("external", False)
|
||||
is_external = False
|
||||
|
||||
if isinstance(external, bool):
|
||||
is_external = external
|
||||
elif isinstance(external, dict):
|
||||
is_external = True
|
||||
ext_name = external.get("name")
|
||||
if isinstance(ext_name, str) and ext_name.strip():
|
||||
explicit_name = ext_name.strip()
|
||||
|
||||
return explicit_name, is_external
|
||||
|
||||
|
||||
def build_volume_candidates(
|
||||
compose_name: str,
|
||||
compose_path: Path,
|
||||
compose_data: dict[str, Any],
|
||||
) -> list[str]:
|
||||
project_name = infer_project_name(compose_path, compose_data)
|
||||
explicit_name, is_external = normalize_top_level_volume_name(compose_name, compose_data)
|
||||
|
||||
candidates: list[str] = []
|
||||
|
||||
if explicit_name:
|
||||
candidates.append(explicit_name)
|
||||
|
||||
if is_external:
|
||||
candidates.append(compose_name)
|
||||
|
||||
candidates.append(compose_name)
|
||||
candidates.append(f"{project_name}_{compose_name}")
|
||||
|
||||
unique: list[str] = []
|
||||
seen: set[str] = set()
|
||||
|
||||
for candidate in candidates:
|
||||
if candidate not in seen:
|
||||
unique.append(candidate)
|
||||
seen.add(candidate)
|
||||
|
||||
return unique
|
||||
|
||||
|
||||
def resolve_named_volume(
|
||||
compose_name: str,
|
||||
compose_path: Path,
|
||||
compose_data: dict[str, Any],
|
||||
) -> tuple[Path | None, str]:
|
||||
if not docker_available():
|
||||
return None, "docker CLI not available"
|
||||
|
||||
for candidate in build_volume_candidates(compose_name, compose_path, compose_data):
|
||||
inspected = run_docker_volume_inspect(candidate)
|
||||
if not inspected:
|
||||
continue
|
||||
|
||||
mountpoint = inspected.get("Mountpoint")
|
||||
if isinstance(mountpoint, str) and mountpoint.strip():
|
||||
return Path(mountpoint), f"named volume '{compose_name}' -> docker volume '{candidate}'"
|
||||
|
||||
return None, f"named volume '{compose_name}' could not be resolved"
|
||||
|
||||
|
||||
# ----------------------------
|
||||
# Parsing helpers
|
||||
# ----------------------------
|
||||
|
||||
def _extract_image_name(image: str | None) -> str | None:
|
||||
if not image or not isinstance(image, str):
|
||||
return None
|
||||
|
||||
if "/" in image:
|
||||
image = image.split("/")[-1]
|
||||
|
||||
if ":" in image:
|
||||
image = image.split(":")[0]
|
||||
|
||||
return image.lower()
|
||||
|
||||
|
||||
def _is_bind_source(source: str) -> bool:
|
||||
return (
|
||||
source.startswith("/")
|
||||
or source.startswith("./")
|
||||
or source.startswith("../")
|
||||
or source.startswith("~/")
|
||||
)
|
||||
|
||||
|
||||
def _normalize_bind_path(source: str, compose_file: Path) -> Path:
|
||||
path = Path(source).expanduser()
|
||||
if path.is_absolute():
|
||||
return path.resolve()
|
||||
return (compose_file.parent / path).resolve()
|
||||
|
||||
|
||||
def _parse_volume_string(spec: str) -> dict[str, str | None]:
|
||||
parts = spec.split(":")
|
||||
|
||||
if len(parts) == 1:
|
||||
return {
|
||||
"source": str(source),
|
||||
"target": target,
|
||||
"source": None,
|
||||
"target": parts[0],
|
||||
"mode": None,
|
||||
"kind": "anonymous",
|
||||
}
|
||||
|
||||
if isinstance(volume, dict):
|
||||
if volume.get("type") != "bind":
|
||||
return None
|
||||
source = parts[0]
|
||||
target = parts[1]
|
||||
mode = ":".join(parts[2:]) if len(parts) > 2 else None
|
||||
|
||||
source_raw = str(volume.get("source", "")).strip()
|
||||
target = str(volume.get("target", "")).strip()
|
||||
|
||||
if not source_raw or not target:
|
||||
return None
|
||||
|
||||
source = resolve_source_path(source_raw, project_root)
|
||||
kind = "bind" if _is_bind_source(source) else "named"
|
||||
|
||||
return {
|
||||
"source": str(source),
|
||||
"source": source,
|
||||
"target": target,
|
||||
"mode": mode,
|
||||
"kind": kind,
|
||||
}
|
||||
|
||||
return None
|
||||
|
||||
def _parse_volume_entry(entry: Any) -> dict[str, str | None]:
|
||||
if isinstance(entry, str):
|
||||
return _parse_volume_string(entry)
|
||||
|
||||
def resolve_source_path(source_raw: str, project_root: Path) -> Path:
|
||||
source_path = Path(source_raw).expanduser()
|
||||
if isinstance(entry, dict):
|
||||
entry_type = entry.get("type")
|
||||
source = entry.get("source") or entry.get("src")
|
||||
target = entry.get("target") or entry.get("dst") or entry.get("destination")
|
||||
|
||||
if not source_path.is_absolute():
|
||||
source_path = (project_root / source_path).resolve()
|
||||
if entry_type == "bind":
|
||||
kind = "bind"
|
||||
elif entry_type == "volume":
|
||||
kind = "named" if source else "anonymous"
|
||||
else:
|
||||
source_path = source_path.resolve()
|
||||
if isinstance(source, str) and source:
|
||||
kind = "bind" if _is_bind_source(source) else "named"
|
||||
else:
|
||||
kind = "anonymous"
|
||||
|
||||
return source_path
|
||||
|
||||
|
||||
def classify_mount(
|
||||
service_name: str,
|
||||
source: str,
|
||||
target: str,
|
||||
) -> tuple[str, str, str]:
|
||||
source_lower = source.lower()
|
||||
target_lower = target.lower()
|
||||
|
||||
for pattern in SKIP_SOURCE_PATTERNS:
|
||||
if pattern in source_lower:
|
||||
return "skip", "optional", "docker runtime socket"
|
||||
|
||||
for pattern in SKIP_TARGET_PATTERNS:
|
||||
if target_lower == pattern or target_lower.startswith(pattern + "/"):
|
||||
return "skip", "optional", "runtime/system path"
|
||||
|
||||
for pattern in CRITICAL_TARGET_PATTERNS:
|
||||
if target_lower == pattern or target_lower.startswith(pattern + "/"):
|
||||
return "include", "critical", "persistent app data"
|
||||
|
||||
for pattern in OPTIONAL_TARGET_PATTERNS:
|
||||
if target_lower == pattern or target_lower.startswith(pattern + "/"):
|
||||
return "skip", "optional", "logs/cache/temp path"
|
||||
|
||||
source_name = Path(source).name.lower()
|
||||
for pattern in OPTIONAL_SOURCE_PATTERNS:
|
||||
if pattern in source_name:
|
||||
return "skip", "optional", "logs/cache/temp source"
|
||||
|
||||
return "review", "medium", "unknown bind mount"
|
||||
|
||||
|
||||
def classify_service_mounts(
|
||||
service_name: str,
|
||||
service_data: dict[str, Any],
|
||||
compose_file: Path,
|
||||
) -> list[dict[str, str]]:
|
||||
results: list[dict[str, str]] = []
|
||||
|
||||
volumes = service_data.get("volumes", [])
|
||||
if not isinstance(volumes, list):
|
||||
return results
|
||||
|
||||
for volume in volumes:
|
||||
if not is_bind_mount(volume):
|
||||
continue
|
||||
|
||||
parsed = parse_volume_entry(volume, compose_file)
|
||||
if not parsed:
|
||||
continue
|
||||
|
||||
bucket, priority, reason = classify_mount(
|
||||
service_name=service_name,
|
||||
source=parsed["source"],
|
||||
target=parsed["target"],
|
||||
)
|
||||
|
||||
results.append(
|
||||
{
|
||||
"bucket": bucket,
|
||||
"priority": priority,
|
||||
"reason": reason,
|
||||
"service": service_name,
|
||||
"source": parsed["source"],
|
||||
"target": parsed["target"],
|
||||
}
|
||||
)
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def deduplicate_items(items: list[dict[str, str]]) -> list[dict[str, str]]:
|
||||
seen: set[tuple[str, str, str, str]] = set()
|
||||
deduped: list[dict[str, str]] = []
|
||||
|
||||
for item in items:
|
||||
key = (
|
||||
item["service"],
|
||||
item["source"],
|
||||
item["target"],
|
||||
item["bucket"],
|
||||
)
|
||||
if key in seen:
|
||||
continue
|
||||
seen.add(key)
|
||||
deduped.append(item)
|
||||
|
||||
return deduped
|
||||
|
||||
|
||||
def sort_items(items: list[dict[str, str]]) -> list[dict[str, str]]:
|
||||
priority_order = {
|
||||
"critical": 0,
|
||||
"high": 1,
|
||||
"medium": 2,
|
||||
"low": 3,
|
||||
"optional": 4,
|
||||
return {
|
||||
"source": source,
|
||||
"target": target,
|
||||
"mode": None,
|
||||
"kind": kind,
|
||||
}
|
||||
|
||||
return sorted(
|
||||
items,
|
||||
key=lambda item: (
|
||||
priority_order.get(item["priority"], 99),
|
||||
item["service"],
|
||||
item["source"],
|
||||
item["target"],
|
||||
),
|
||||
return {
|
||||
"source": None,
|
||||
"target": None,
|
||||
"mode": None,
|
||||
"kind": "unknown",
|
||||
}
|
||||
|
||||
|
||||
# ----------------------------
|
||||
# Classification logic
|
||||
# ----------------------------
|
||||
|
||||
def _classify_target(target_path: str | None, image_name: str | None = None) -> tuple[str, str]:
|
||||
if not target_path:
|
||||
return "review", "missing container target path"
|
||||
|
||||
if image_name and image_name in IMAGE_RULES:
|
||||
rules = IMAGE_RULES[image_name]
|
||||
if target_path in rules:
|
||||
level = rules[target_path]
|
||||
if level == "critical":
|
||||
return "critical", f"{image_name} rule for {target_path}"
|
||||
if level == "optional":
|
||||
return "optional", f"{image_name} rule for {target_path}"
|
||||
|
||||
if target_path in CRITICAL_TARGETS:
|
||||
return "critical", f"critical target path {target_path}"
|
||||
|
||||
if target_path in SKIP_TARGET_EXACT:
|
||||
return "optional", f"non-essential target path {target_path}"
|
||||
|
||||
if target_path.startswith(SKIP_TARGET_PREFIXES):
|
||||
return "optional", f"ephemeral target path {target_path}"
|
||||
|
||||
lowered = target_path.lower()
|
||||
for keyword in REVIEW_TARGET_KEYWORDS:
|
||||
if keyword in lowered:
|
||||
return "review", f"data-like target path {target_path} requires review"
|
||||
|
||||
return "review", f"unknown target path {target_path}"
|
||||
|
||||
|
||||
def _merge_reason(existing: str, new: str) -> str:
|
||||
if not existing:
|
||||
return new
|
||||
if not new or new == existing:
|
||||
return existing
|
||||
|
||||
parts = [p.strip() for p in existing.split(" | ") if p.strip()]
|
||||
if new not in parts:
|
||||
parts.append(new)
|
||||
return " | ".join(parts)
|
||||
|
||||
|
||||
def _prefer_entry(existing: MountEntry, new: MountEntry) -> MountEntry:
|
||||
existing_priority = CLASS_PRIORITY.get(existing.classification, 0)
|
||||
new_priority = CLASS_PRIORITY.get(new.classification, 0)
|
||||
|
||||
if new_priority > existing_priority:
|
||||
preferred = new
|
||||
other = existing
|
||||
else:
|
||||
preferred = existing
|
||||
other = new
|
||||
|
||||
preferred.reason = _merge_reason(preferred.reason, other.reason)
|
||||
|
||||
if other.service and other.service not in preferred.reason:
|
||||
preferred.reason = _merge_reason(preferred.reason, f"also used by service={other.service} target={other.target}")
|
||||
|
||||
preferred.exists = preferred.exists or other.exists
|
||||
return preferred
|
||||
|
||||
|
||||
def _dedupe_entries(entries: list[MountEntry]) -> list[MountEntry]:
|
||||
deduped: dict[str, MountEntry] = {}
|
||||
|
||||
for entry in entries:
|
||||
key = str(entry.source.resolve()) if entry.source.is_absolute() else str(entry.source)
|
||||
|
||||
if key not in deduped:
|
||||
deduped[key] = entry
|
||||
continue
|
||||
|
||||
deduped[key] = _prefer_entry(deduped[key], entry)
|
||||
|
||||
return list(deduped.values())
|
||||
|
||||
|
||||
def _make_entry(
|
||||
source: Path,
|
||||
service: str,
|
||||
target: str | None,
|
||||
classification: str,
|
||||
reason: str,
|
||||
) -> MountEntry:
|
||||
return MountEntry(
|
||||
source=source,
|
||||
service=service,
|
||||
target=target or "unknown",
|
||||
classification=classification,
|
||||
reason=reason,
|
||||
exists=source.exists(),
|
||||
)
|
||||
|
||||
|
||||
def classify_compose(compose_file: str | Path) -> dict[str, Any]:
|
||||
compose_path = Path(compose_file).resolve()
|
||||
data = load_compose(compose_path)
|
||||
# ----------------------------
|
||||
# Main classifier
|
||||
# ----------------------------
|
||||
|
||||
services = data.get("services", {})
|
||||
def classify_compose(compose_path: str | Path) -> list[MountEntry]:
|
||||
compose_file = Path(compose_path).expanduser().resolve()
|
||||
compose_data = load_compose(compose_file)
|
||||
|
||||
services = compose_data.get("services", {})
|
||||
if not isinstance(services, dict):
|
||||
raise ValueError("Compose file does not contain a valid 'services' section")
|
||||
return []
|
||||
|
||||
all_items: list[dict[str, str]] = []
|
||||
entries: list[MountEntry] = []
|
||||
|
||||
for service_name, service_data in services.items():
|
||||
if not isinstance(service_data, dict):
|
||||
for service_name, service_cfg in services.items():
|
||||
if not isinstance(service_cfg, dict):
|
||||
continue
|
||||
|
||||
all_items.extend(
|
||||
classify_service_mounts(
|
||||
service_name=service_name,
|
||||
service_data=service_data,
|
||||
compose_file=compose_path,
|
||||
raw_volumes = service_cfg.get("volumes", [])
|
||||
if not isinstance(raw_volumes, list):
|
||||
continue
|
||||
|
||||
image_name = _extract_image_name(service_cfg.get("image"))
|
||||
|
||||
for raw_entry in raw_volumes:
|
||||
parsed = _parse_volume_entry(raw_entry)
|
||||
source = parsed.get("source")
|
||||
target = parsed.get("target")
|
||||
kind = parsed.get("kind")
|
||||
|
||||
if kind == "anonymous":
|
||||
entries.append(
|
||||
MountEntry(
|
||||
source=Path("/__anonymous_volume__"),
|
||||
service=service_name,
|
||||
target=target or "unknown",
|
||||
classification="review",
|
||||
reason="anonymous volume cannot be safely mapped to host path",
|
||||
exists=False,
|
||||
)
|
||||
)
|
||||
continue
|
||||
|
||||
if kind == "bind" and isinstance(source, str):
|
||||
host_path = _normalize_bind_path(source, compose_file)
|
||||
classification, base_reason = _classify_target(target, image_name)
|
||||
reason = f"{base_reason}; bind mount source '{source}' -> '{host_path}'"
|
||||
|
||||
entries.append(
|
||||
_make_entry(
|
||||
source=host_path,
|
||||
service=service_name,
|
||||
target=target,
|
||||
classification=classification,
|
||||
reason=reason,
|
||||
)
|
||||
)
|
||||
continue
|
||||
|
||||
if kind == "named" and isinstance(source, str):
|
||||
mountpoint, volume_reason = resolve_named_volume(source, compose_file, compose_data)
|
||||
|
||||
if mountpoint is None:
|
||||
entries.append(
|
||||
MountEntry(
|
||||
source=Path(f"/__named_volume_unresolved__/{source}"),
|
||||
service=service_name,
|
||||
target=target or "unknown",
|
||||
classification="review",
|
||||
reason=volume_reason,
|
||||
exists=False,
|
||||
)
|
||||
)
|
||||
continue
|
||||
|
||||
classification, base_reason = _classify_target(target, image_name)
|
||||
reason = f"{base_reason}; {volume_reason}; mountpoint '{mountpoint}'"
|
||||
|
||||
entries.append(
|
||||
_make_entry(
|
||||
source=mountpoint,
|
||||
service=service_name,
|
||||
target=target,
|
||||
classification=classification,
|
||||
reason=reason,
|
||||
)
|
||||
)
|
||||
continue
|
||||
|
||||
entries.append(
|
||||
MountEntry(
|
||||
source=Path("/__unknown_volume__"),
|
||||
service=service_name,
|
||||
target=target or "unknown",
|
||||
classification="review",
|
||||
reason="unrecognized volume entry",
|
||||
exists=False,
|
||||
)
|
||||
)
|
||||
|
||||
all_items = deduplicate_items(all_items)
|
||||
all_items = sort_items(all_items)
|
||||
|
||||
include = [strip_bucket(item) for item in all_items if item["bucket"] == "include"]
|
||||
review = [strip_bucket(item) for item in all_items if item["bucket"] == "review"]
|
||||
skip = [strip_bucket(item) for item in all_items if item["bucket"] == "skip"]
|
||||
|
||||
return {
|
||||
"include": include,
|
||||
"review": review,
|
||||
"skip": skip,
|
||||
}
|
||||
|
||||
|
||||
def strip_bucket(item: dict[str, str]) -> dict[str, str]:
|
||||
return {
|
||||
"service": item["service"],
|
||||
"source": item["source"],
|
||||
"target": item["target"],
|
||||
"priority": item["priority"],
|
||||
"reason": item["reason"],
|
||||
}
|
||||
return _dedupe_entries(entries)
|
||||
|
|
|
|||
294
dockervault/tests/test_classifier.py
Normal file
294
dockervault/tests/test_classifier.py
Normal file
|
|
@ -0,0 +1,294 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import textwrap
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from dockervault.classifier import classify_compose
|
||||
|
||||
|
||||
def write_compose(tmp_path: Path, content: str) -> Path:
|
||||
compose_file = tmp_path / "docker-compose.yml"
|
||||
compose_file.write_text(textwrap.dedent(content).strip() + "\n", encoding="utf-8")
|
||||
return compose_file
|
||||
|
||||
|
||||
def find_entry(entries, service: str, target: str):
|
||||
for entry in entries:
|
||||
if entry.service == service and entry.target == target:
|
||||
return entry
|
||||
raise AssertionError(f"No entry found for service={service!r} target={target!r}")
|
||||
|
||||
|
||||
def test_bind_mount_relative_path_is_resolved_and_classified_critical(tmp_path: Path):
|
||||
data_dir = tmp_path / "db"
|
||||
data_dir.mkdir()
|
||||
|
||||
compose_file = write_compose(
|
||||
tmp_path,
|
||||
"""
|
||||
services:
|
||||
db:
|
||||
image: mariadb:11
|
||||
volumes:
|
||||
- ./db:/var/lib/mysql
|
||||
""",
|
||||
)
|
||||
|
||||
entries = classify_compose(compose_file)
|
||||
|
||||
assert len(entries) == 1
|
||||
entry = find_entry(entries, "db", "/var/lib/mysql")
|
||||
|
||||
assert entry.source == data_dir.resolve()
|
||||
assert entry.classification == "critical"
|
||||
assert entry.exists is True
|
||||
assert entry.service == "db"
|
||||
assert "mariadb" in entry.reason or "critical" in entry.reason
|
||||
|
||||
|
||||
def test_named_volume_resolves_and_is_classified_critical(tmp_path: Path, monkeypatch: pytest.MonkeyPatch):
|
||||
mountpoint = tmp_path / "docker-volumes" / "project_dbdata" / "_data"
|
||||
mountpoint.mkdir(parents=True)
|
||||
|
||||
compose_file = write_compose(
|
||||
tmp_path,
|
||||
"""
|
||||
services:
|
||||
db:
|
||||
image: mariadb:11
|
||||
volumes:
|
||||
- dbdata:/var/lib/mysql
|
||||
|
||||
volumes:
|
||||
dbdata:
|
||||
""",
|
||||
)
|
||||
|
||||
monkeypatch.setattr("dockervault.classifier.docker_available", lambda: True)
|
||||
monkeypatch.setattr(
|
||||
"dockervault.classifier.run_docker_volume_inspect",
|
||||
lambda volume_name: {"Mountpoint": str(mountpoint)} if volume_name == "dbdata" else None,
|
||||
)
|
||||
|
||||
entries = classify_compose(compose_file)
|
||||
|
||||
assert len(entries) == 1
|
||||
entry = find_entry(entries, "db", "/var/lib/mysql")
|
||||
|
||||
assert entry.source == mountpoint
|
||||
assert entry.classification == "critical"
|
||||
assert entry.exists is True
|
||||
assert "named volume 'dbdata'" in entry.reason
|
||||
|
||||
|
||||
def test_named_volume_unresolved_falls_back_to_review(tmp_path: Path, monkeypatch: pytest.MonkeyPatch):
|
||||
compose_file = write_compose(
|
||||
tmp_path,
|
||||
"""
|
||||
services:
|
||||
app:
|
||||
image: redis:7
|
||||
volumes:
|
||||
- cachedata:/data
|
||||
|
||||
volumes:
|
||||
cachedata:
|
||||
""",
|
||||
)
|
||||
|
||||
monkeypatch.setattr("dockervault.classifier.docker_available", lambda: True)
|
||||
monkeypatch.setattr("dockervault.classifier.run_docker_volume_inspect", lambda volume_name: None)
|
||||
|
||||
entries = classify_compose(compose_file)
|
||||
|
||||
assert len(entries) == 1
|
||||
entry = find_entry(entries, "app", "/data")
|
||||
|
||||
assert entry.classification == "review"
|
||||
assert entry.exists is False
|
||||
assert "__named_volume_unresolved__" in str(entry.source)
|
||||
assert "could not be resolved" in entry.reason
|
||||
|
||||
|
||||
def test_named_volume_review_when_docker_not_available(tmp_path: Path, monkeypatch: pytest.MonkeyPatch):
|
||||
compose_file = write_compose(
|
||||
tmp_path,
|
||||
"""
|
||||
services:
|
||||
app:
|
||||
image: redis:7
|
||||
volumes:
|
||||
- cachedata:/data
|
||||
|
||||
volumes:
|
||||
cachedata:
|
||||
""",
|
||||
)
|
||||
|
||||
monkeypatch.setattr("dockervault.classifier.docker_available", lambda: False)
|
||||
|
||||
entries = classify_compose(compose_file)
|
||||
|
||||
assert len(entries) == 1
|
||||
entry = find_entry(entries, "app", "/data")
|
||||
|
||||
assert entry.classification == "review"
|
||||
assert entry.exists is False
|
||||
assert "docker CLI not available" in entry.reason
|
||||
|
||||
|
||||
def test_image_rule_overrides_generic_logic_for_nginx_logs(tmp_path: Path):
|
||||
logs_dir = tmp_path / "logs"
|
||||
logs_dir.mkdir()
|
||||
|
||||
compose_file = write_compose(
|
||||
tmp_path,
|
||||
"""
|
||||
services:
|
||||
nginx:
|
||||
image: nginx:latest
|
||||
volumes:
|
||||
- ./logs:/var/log/nginx
|
||||
""",
|
||||
)
|
||||
|
||||
entries = classify_compose(compose_file)
|
||||
|
||||
assert len(entries) == 1
|
||||
entry = find_entry(entries, "nginx", "/var/log/nginx")
|
||||
|
||||
assert entry.source == logs_dir.resolve()
|
||||
assert entry.classification == "optional"
|
||||
assert entry.exists is True
|
||||
|
||||
|
||||
def test_dedupe_prefers_stronger_classification_for_same_source(tmp_path: Path):
|
||||
shared_dir = tmp_path / "shared"
|
||||
shared_dir.mkdir()
|
||||
|
||||
compose_file = write_compose(
|
||||
tmp_path,
|
||||
f"""
|
||||
services:
|
||||
db:
|
||||
image: mariadb:11
|
||||
volumes:
|
||||
- {shared_dir}:/var/lib/mysql
|
||||
|
||||
backup:
|
||||
image: busybox
|
||||
volumes:
|
||||
- {shared_dir}:/backup
|
||||
""",
|
||||
)
|
||||
|
||||
entries = classify_compose(compose_file)
|
||||
|
||||
assert len(entries) == 1
|
||||
entry = entries[0]
|
||||
|
||||
assert entry.source == shared_dir.resolve()
|
||||
assert entry.classification == "critical"
|
||||
assert entry.exists is True
|
||||
assert "mariadb" in entry.reason or "/var/lib/mysql" in entry.reason
|
||||
|
||||
|
||||
def test_top_level_volume_name_override_is_used(tmp_path: Path, monkeypatch: pytest.MonkeyPatch):
|
||||
mountpoint = tmp_path / "docker-volumes" / "real-db-volume" / "_data"
|
||||
mountpoint.mkdir(parents=True)
|
||||
|
||||
compose_file = write_compose(
|
||||
tmp_path,
|
||||
"""
|
||||
services:
|
||||
db:
|
||||
image: postgres:16
|
||||
volumes:
|
||||
- dbdata:/var/lib/postgresql/data
|
||||
|
||||
volumes:
|
||||
dbdata:
|
||||
name: real-db-volume
|
||||
""",
|
||||
)
|
||||
|
||||
seen = []
|
||||
|
||||
def fake_inspect(volume_name: str):
|
||||
seen.append(volume_name)
|
||||
if volume_name == "real-db-volume":
|
||||
return {"Mountpoint": str(mountpoint)}
|
||||
return None
|
||||
|
||||
monkeypatch.setattr("dockervault.classifier.docker_available", lambda: True)
|
||||
monkeypatch.setattr("dockervault.classifier.run_docker_volume_inspect", fake_inspect)
|
||||
|
||||
entries = classify_compose(compose_file)
|
||||
|
||||
entry = find_entry(entries, "db", "/var/lib/postgresql/data")
|
||||
assert entry.source == mountpoint
|
||||
assert entry.classification == "critical"
|
||||
assert "real-db-volume" in entry.reason
|
||||
assert seen[0] == "real-db-volume"
|
||||
|
||||
|
||||
def test_external_volume_tries_raw_name(tmp_path: Path, monkeypatch: pytest.MonkeyPatch):
|
||||
mountpoint = tmp_path / "docker-volumes" / "shared-prod-data" / "_data"
|
||||
mountpoint.mkdir(parents=True)
|
||||
|
||||
compose_file = write_compose(
|
||||
tmp_path,
|
||||
"""
|
||||
services:
|
||||
app:
|
||||
image: redis:7
|
||||
volumes:
|
||||
- shareddata:/data
|
||||
|
||||
volumes:
|
||||
shareddata:
|
||||
external: true
|
||||
""",
|
||||
)
|
||||
|
||||
tried = []
|
||||
|
||||
def fake_inspect(volume_name: str):
|
||||
tried.append(volume_name)
|
||||
if volume_name == "shareddata":
|
||||
return {"Mountpoint": str(mountpoint)}
|
||||
return None
|
||||
|
||||
monkeypatch.setattr("dockervault.classifier.docker_available", lambda: True)
|
||||
monkeypatch.setattr("dockervault.classifier.run_docker_volume_inspect", fake_inspect)
|
||||
|
||||
entries = classify_compose(compose_file)
|
||||
|
||||
entry = find_entry(entries, "app", "/data")
|
||||
assert entry.source == mountpoint
|
||||
assert entry.classification == "critical"
|
||||
assert "shareddata" in tried
|
||||
|
||||
|
||||
def test_anonymous_volume_becomes_review(tmp_path: Path):
|
||||
compose_file = write_compose(
|
||||
tmp_path,
|
||||
"""
|
||||
services:
|
||||
app:
|
||||
image: busybox
|
||||
volumes:
|
||||
- /data
|
||||
""",
|
||||
)
|
||||
|
||||
entries = classify_compose(compose_file)
|
||||
|
||||
assert len(entries) == 1
|
||||
entry = find_entry(entries, "app", "/data")
|
||||
|
||||
assert entry.classification == "review"
|
||||
assert entry.exists is False
|
||||
assert "__anonymous_volume__" in str(entry.source) or "__anonymous__" in str(entry.source)
|
||||
Loading…
Add table
Add a link
Reference in a new issue