41 lines
1,003 B
Python
41 lines
1,003 B
Python
# app/srt_parser.py
|
|
import re
|
|
from typing import List, Dict
|
|
|
|
|
|
SRT_BLOCK_RE = re.compile(
|
|
r"(\d+)\s*\n"
|
|
r"(\d{2}:\d{2}:\d{2},\d{3})\s-->\s(\d{2}:\d{2}:\d{2},\d{3})\s*\n"
|
|
r"(.*?)(?=\n{2,}|\Z)",
|
|
re.DOTALL,
|
|
)
|
|
|
|
|
|
def parse_srt(content: str) -> List[Dict[str, str]]:
|
|
normalized = content.replace("\r\n", "\n").replace("\r", "\n").strip()
|
|
|
|
blocks = []
|
|
for match in SRT_BLOCK_RE.finditer(normalized):
|
|
index, start, end, text = match.groups()
|
|
blocks.append(
|
|
{
|
|
"index": int(index),
|
|
"start": start,
|
|
"end": end,
|
|
"text": text.strip(),
|
|
}
|
|
)
|
|
|
|
return blocks
|
|
|
|
|
|
def build_srt(blocks: List[Dict[str, str]]) -> str:
|
|
output = []
|
|
|
|
for i, block in enumerate(blocks, start=1):
|
|
output.append(str(i))
|
|
output.append(f"{block['start']} --> {block['end']}")
|
|
output.append(block["text"])
|
|
output.append("")
|
|
|
|
return "\n".join(output).strip() + "\n"
|