subfox/app/services/subtitle_service.py

193 lines
5.2 KiB
Python

import re
from app.translators.factory import get_translator
def parse_srt(content: str):
content = content.replace("\r\n", "\n").replace("\r", "\n").strip()
raw_blocks = re.split(r"\n\s*\n", content)
blocks = []
for raw in raw_blocks:
lines = raw.strip().split("\n")
if len(lines) < 3:
continue
try:
index = int(lines[0].strip())
except ValueError:
continue
if "-->" not in lines[1]:
continue
start, end = [x.strip() for x in lines[1].split("-->", 1)]
text = "\n".join(lines[2:]).strip()
blocks.append(
{
"index": index,
"start": start,
"end": end,
"text": text,
}
)
return blocks
def build_srt(blocks):
output = []
for b in blocks:
output.append(
f"{b['index']}\n{b['start']} --> {b['end']}\n{b['text']}\n"
)
return "\n".join(output)
def _make_translator(mode=None, source_lang="auto", target_lang="da"):
attempts = [
lambda: get_translator(mode=mode, source_lang=source_lang, target_lang=target_lang),
lambda: get_translator(mode, source_lang, target_lang),
lambda: get_translator(mode=mode),
lambda: get_translator(mode),
lambda: get_translator(),
]
last_error = None
for attempt in attempts:
try:
translator = attempt()
if translator is not None:
return translator
except Exception as e:
last_error = e
raise RuntimeError(f"Could not create translator via get_translator(): {last_error}")
def _translate_blocks(
translator,
blocks,
source_lang="auto",
target_lang="da",
progress_callback=None,
):
if hasattr(translator, "translate_blocks"):
return translator.translate_blocks(
blocks,
source_lang,
target_lang,
progress_callback=progress_callback,
)
if hasattr(translator, "translate_batch"):
translated_texts = translator.translate_batch(blocks, source_lang, target_lang)
output = []
total = len(blocks)
for i, (block, translated_text) in enumerate(zip(blocks, translated_texts), start=1):
new_block = dict(block)
if isinstance(translated_text, dict):
translated_text = (
translated_text.get("text")
or translated_text.get("translated_text")
or translated_text.get("translation")
or block["text"]
)
new_block["text"] = str(translated_text).strip()
output.append(new_block)
if progress_callback:
progress_callback(i, total)
return output
if hasattr(translator, "translate"):
output = []
total = len(blocks)
for i, block in enumerate(blocks, start=1):
new_block = dict(block)
try:
translated = translator.translate(block["text"], source_lang, target_lang)
except TypeError:
translated = translator.translate(block["text"])
new_block["text"] = str(translated).strip()
output.append(new_block)
if progress_callback:
progress_callback(i, total)
return output
if callable(translator):
output = []
total = len(blocks)
for i, block in enumerate(blocks, start=1):
new_block = dict(block)
try:
translated = translator(block["text"], source_lang, target_lang)
except TypeError:
translated = translator(block["text"])
new_block["text"] = str(translated).strip()
output.append(new_block)
if progress_callback:
progress_callback(i, total)
return output
raise TypeError(f"Unsupported translator interface: {type(translator)}")
def translate_srt_content(
content,
translator=None,
mode=None,
source_lang="auto",
target_lang="da",
job_id=None,
progress_callback=None,
**kwargs,
):
print("DEBUG: ===== START translate_srt_content =====")
print("DEBUG: mode:", mode)
print("DEBUG: source_lang:", source_lang)
print("DEBUG: target_lang:", target_lang)
print("DEBUG: job_id:", job_id)
print("DEBUG: extra kwargs:", kwargs)
blocks = parse_srt(content)
print("DEBUG: blocks count:", len(blocks))
print("DEBUG: first block:", blocks[0] if blocks else "NONE")
if not blocks:
raise ValueError("No SRT blocks could be parsed from content")
if translator is None:
translator = _make_translator(
mode=mode,
source_lang=source_lang,
target_lang=target_lang,
)
print("DEBUG: translator created:", type(translator))
translated_blocks = _translate_blocks(
translator,
blocks,
source_lang=source_lang,
target_lang=target_lang,
progress_callback=progress_callback,
)
print("DEBUG: translated blocks count:", len(translated_blocks))
print("DEBUG: ===== END translate_srt_content =====")
return build_srt(translated_blocks)