subfox/app/srt_parser.py

41 lines
1,003 B
Python

# app/srt_parser.py
import re
from typing import List, Dict
SRT_BLOCK_RE = re.compile(
r"(\d+)\s*\n"
r"(\d{2}:\d{2}:\d{2},\d{3})\s-->\s(\d{2}:\d{2}:\d{2},\d{3})\s*\n"
r"(.*?)(?=\n{2,}|\Z)",
re.DOTALL,
)
def parse_srt(content: str) -> List[Dict[str, str]]:
normalized = content.replace("\r\n", "\n").replace("\r", "\n").strip()
blocks = []
for match in SRT_BLOCK_RE.finditer(normalized):
index, start, end, text = match.groups()
blocks.append(
{
"index": int(index),
"start": start,
"end": end,
"text": text.strip(),
}
)
return blocks
def build_srt(blocks: List[Dict[str, str]]) -> str:
output = []
for i, block in enumerate(blocks, start=1):
output.append(str(i))
output.append(f"{block['start']} --> {block['end']}")
output.append(block["text"])
output.append("")
return "\n".join(output).strip() + "\n"