# app/srt_parser.py import re from typing import List, Dict SRT_BLOCK_RE = re.compile( r"(\d+)\s*\n" r"(\d{2}:\d{2}:\d{2},\d{3})\s-->\s(\d{2}:\d{2}:\d{2},\d{3})\s*\n" r"(.*?)(?=\n{2,}|\Z)", re.DOTALL, ) def parse_srt(content: str) -> List[Dict[str, str]]: normalized = content.replace("\r\n", "\n").replace("\r", "\n").strip() blocks = [] for match in SRT_BLOCK_RE.finditer(normalized): index, start, end, text = match.groups() blocks.append( { "index": int(index), "start": start, "end": end, "text": text.strip(), } ) return blocks def build_srt(blocks: List[Dict[str, str]]) -> str: output = [] for i, block in enumerate(blocks, start=1): output.append(str(i)) output.append(f"{block['start']} --> {block['end']}") output.append(block["text"]) output.append("") return "\n".join(output).strip() + "\n"