feat(scan): exclude common non-project directories during discovery
This commit is contained in:
parent
4d36198bdd
commit
02ff096c6b
2 changed files with 73 additions and 163 deletions
|
|
@ -1,28 +1,56 @@
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import os
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from typing import Iterable
|
||||||
|
|
||||||
COMPOSE_FILENAMES = (
|
DEFAULT_SCAN_EXCLUDES = {
|
||||||
|
".git",
|
||||||
|
".hg",
|
||||||
|
".svn",
|
||||||
|
".venv",
|
||||||
|
"venv",
|
||||||
|
"env",
|
||||||
|
"node_modules",
|
||||||
|
"__pycache__",
|
||||||
|
".pytest_cache",
|
||||||
|
".mypy_cache",
|
||||||
|
".tox",
|
||||||
|
".cache",
|
||||||
|
".idea",
|
||||||
|
".vscode",
|
||||||
|
}
|
||||||
|
|
||||||
|
COMPOSE_FILENAMES = {
|
||||||
"docker-compose.yml",
|
"docker-compose.yml",
|
||||||
"docker-compose.yaml",
|
"docker-compose.yaml",
|
||||||
"compose.yml",
|
"compose.yml",
|
||||||
"compose.yaml",
|
"compose.yaml",
|
||||||
)
|
}
|
||||||
|
|
||||||
|
|
||||||
def find_compose_files(root: str | Path) -> list[Path]:
|
def find_compose_files(
|
||||||
|
root: Path | str,
|
||||||
|
excludes: Iterable[str] | None = None,
|
||||||
|
) -> list[Path]:
|
||||||
root_path = Path(root).resolve()
|
root_path = Path(root).resolve()
|
||||||
|
|
||||||
if not root_path.exists():
|
exclude_set = set(DEFAULT_SCAN_EXCLUDES)
|
||||||
raise FileNotFoundError(f"Scan root not found: {root_path}")
|
if excludes:
|
||||||
|
exclude_set.update(x.strip() for x in excludes if x and x.strip())
|
||||||
|
|
||||||
if not root_path.is_dir():
|
found: set[Path] = set()
|
||||||
raise NotADirectoryError(f"Scan root is not a directory: {root_path}")
|
|
||||||
|
|
||||||
found: list[Path] = []
|
for current_root, dirnames, filenames in os.walk(root_path, topdown=True):
|
||||||
|
dirnames[:] = sorted(
|
||||||
|
d for d in dirnames
|
||||||
|
if d not in exclude_set
|
||||||
|
)
|
||||||
|
|
||||||
for path in root_path.rglob("*"):
|
current_path = Path(current_root)
|
||||||
if path.is_file() and path.name in COMPOSE_FILENAMES:
|
|
||||||
found.append(path)
|
for filename in filenames:
|
||||||
|
if filename in COMPOSE_FILENAMES:
|
||||||
|
found.add((current_path / filename).resolve())
|
||||||
|
|
||||||
return sorted(found)
|
return sorted(found)
|
||||||
|
|
|
||||||
|
|
@ -1,165 +1,47 @@
|
||||||
from __future__ import annotations
|
import os
|
||||||
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, Dict, List
|
from typing import Iterable
|
||||||
|
|
||||||
import yaml
|
DEFAULT_SCAN_EXCLUDES = {
|
||||||
|
".git",
|
||||||
|
".venv",
|
||||||
|
"venv",
|
||||||
|
"node_modules",
|
||||||
|
"__pycache__",
|
||||||
|
".pytest_cache",
|
||||||
|
}
|
||||||
|
|
||||||
from dockervault.classification.models import MountCandidate
|
COMPOSE_FILENAMES = (
|
||||||
|
"docker-compose.yml",
|
||||||
|
"docker-compose.yaml",
|
||||||
|
"compose.yml",
|
||||||
|
"compose.yaml",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class DockerComposeScanner:
|
def discover_compose_files(
|
||||||
def __init__(self, compose_file: str | Path):
|
root: Path | str,
|
||||||
self.compose_file = Path(compose_file)
|
excludes: Iterable[str] | None = None,
|
||||||
self.base_dir = self.compose_file.parent
|
) -> list[Path]:
|
||||||
|
|
||||||
def load_compose(self) -> Dict[str, Any]:
|
root = Path(root).resolve()
|
||||||
with self.compose_file.open("r", encoding="utf-8") as f:
|
|
||||||
return yaml.safe_load(f) or {}
|
|
||||||
|
|
||||||
def scan(self) -> List[MountCandidate]:
|
exclude_set = set(DEFAULT_SCAN_EXCLUDES)
|
||||||
compose = self.load_compose()
|
if excludes:
|
||||||
services = compose.get("services", {})
|
exclude_set.update(x.strip() for x in excludes if x)
|
||||||
project_name = compose.get("name") or self.base_dir.name
|
|
||||||
|
|
||||||
candidates: List[MountCandidate] = []
|
found = set()
|
||||||
|
|
||||||
for service_name, service_def in services.items():
|
for current_root, dirnames, filenames in os.walk(root, topdown=True):
|
||||||
image = service_def.get("image", "")
|
# 🚫 skip unwanted dirs
|
||||||
env = self._normalize_environment(service_def.get("environment", {}))
|
dirnames[:] = sorted(
|
||||||
volumes = service_def.get("volumes", [])
|
d for d in dirnames if d not in exclude_set
|
||||||
|
|
||||||
for volume in volumes:
|
|
||||||
candidate = self._parse_volume(
|
|
||||||
service_name=service_name,
|
|
||||||
image=image,
|
|
||||||
volume=volume,
|
|
||||||
env=env,
|
|
||||||
compose_project=project_name,
|
|
||||||
)
|
|
||||||
if candidate:
|
|
||||||
candidates.append(candidate)
|
|
||||||
|
|
||||||
return candidates
|
|
||||||
|
|
||||||
def _normalize_environment(self, env: Any) -> Dict[str, str]:
|
|
||||||
if isinstance(env, dict):
|
|
||||||
return {str(k): str(v) for k, v in env.items()}
|
|
||||||
|
|
||||||
if isinstance(env, list):
|
|
||||||
parsed: Dict[str, str] = {}
|
|
||||||
for item in env:
|
|
||||||
if isinstance(item, str) and "=" in item:
|
|
||||||
key, value = item.split("=", 1)
|
|
||||||
parsed[key] = value
|
|
||||||
return parsed
|
|
||||||
|
|
||||||
return {}
|
|
||||||
|
|
||||||
def _parse_volume(
|
|
||||||
self,
|
|
||||||
service_name: str,
|
|
||||||
image: str,
|
|
||||||
volume: Any,
|
|
||||||
env: Dict[str, str],
|
|
||||||
compose_project: str,
|
|
||||||
) -> MountCandidate | None:
|
|
||||||
if isinstance(volume, str):
|
|
||||||
return self._parse_short_syntax(
|
|
||||||
service_name=service_name,
|
|
||||||
image=image,
|
|
||||||
volume=volume,
|
|
||||||
env=env,
|
|
||||||
compose_project=compose_project,
|
|
||||||
)
|
|
||||||
|
|
||||||
if isinstance(volume, dict):
|
|
||||||
return self._parse_long_syntax(
|
|
||||||
service_name=service_name,
|
|
||||||
image=image,
|
|
||||||
volume=volume,
|
|
||||||
env=env,
|
|
||||||
compose_project=compose_project,
|
|
||||||
)
|
|
||||||
|
|
||||||
return None
|
|
||||||
|
|
||||||
def _parse_short_syntax(
|
|
||||||
self,
|
|
||||||
service_name: str,
|
|
||||||
image: str,
|
|
||||||
volume: str,
|
|
||||||
env: Dict[str, str],
|
|
||||||
compose_project: str,
|
|
||||||
) -> MountCandidate | None:
|
|
||||||
parts = volume.split(":")
|
|
||||||
|
|
||||||
if len(parts) == 1:
|
|
||||||
# Anonymous volume style: "/data"
|
|
||||||
return MountCandidate(
|
|
||||||
service_name=service_name,
|
|
||||||
image=image,
|
|
||||||
source="",
|
|
||||||
target=parts[0],
|
|
||||||
mount_type="volume",
|
|
||||||
read_only=False,
|
|
||||||
env=env,
|
|
||||||
compose_project=compose_project,
|
|
||||||
)
|
|
||||||
|
|
||||||
if len(parts) >= 2:
|
|
||||||
source = parts[0]
|
|
||||||
target = parts[1]
|
|
||||||
options = parts[2:] if len(parts) > 2 else []
|
|
||||||
read_only = "ro" in options
|
|
||||||
|
|
||||||
mount_type = self._guess_mount_type(source)
|
|
||||||
|
|
||||||
return MountCandidate(
|
|
||||||
service_name=service_name,
|
|
||||||
image=image,
|
|
||||||
source=source,
|
|
||||||
target=target,
|
|
||||||
mount_type=mount_type,
|
|
||||||
read_only=read_only,
|
|
||||||
env=env,
|
|
||||||
compose_project=compose_project,
|
|
||||||
)
|
|
||||||
|
|
||||||
return None
|
|
||||||
|
|
||||||
def _parse_long_syntax(
|
|
||||||
self,
|
|
||||||
service_name: str,
|
|
||||||
image: str,
|
|
||||||
volume: Dict[str, Any],
|
|
||||||
env: Dict[str, str],
|
|
||||||
compose_project: str,
|
|
||||||
) -> MountCandidate | None:
|
|
||||||
source = volume.get("source", "") or volume.get("src", "")
|
|
||||||
target = volume.get("target", "") or volume.get("dst", "") or volume.get("destination", "")
|
|
||||||
mount_type = volume.get("type", self._guess_mount_type(str(source)))
|
|
||||||
read_only = bool(volume.get("read_only", False))
|
|
||||||
|
|
||||||
if not target:
|
|
||||||
return None
|
|
||||||
|
|
||||||
return MountCandidate(
|
|
||||||
service_name=service_name,
|
|
||||||
image=image,
|
|
||||||
source=str(source),
|
|
||||||
target=str(target),
|
|
||||||
mount_type=str(mount_type),
|
|
||||||
read_only=read_only,
|
|
||||||
env=env,
|
|
||||||
compose_project=compose_project,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
def _guess_mount_type(self, source: str) -> str:
|
current_path = Path(current_root)
|
||||||
if not source:
|
|
||||||
return "volume"
|
|
||||||
|
|
||||||
if source.startswith("/") or source.startswith("./") or source.startswith("../"):
|
for filename in filenames:
|
||||||
return "bind"
|
if filename in COMPOSE_FILENAMES:
|
||||||
|
found.add((current_path / filename).resolve())
|
||||||
|
|
||||||
return "volume"
|
return sorted(found)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue