193 lines
5.2 KiB
Python
193 lines
5.2 KiB
Python
import re
|
|
from app.translators.factory import get_translator
|
|
|
|
|
|
def parse_srt(content: str):
|
|
content = content.replace("\r\n", "\n").replace("\r", "\n").strip()
|
|
raw_blocks = re.split(r"\n\s*\n", content)
|
|
blocks = []
|
|
|
|
for raw in raw_blocks:
|
|
lines = raw.strip().split("\n")
|
|
if len(lines) < 3:
|
|
continue
|
|
|
|
try:
|
|
index = int(lines[0].strip())
|
|
except ValueError:
|
|
continue
|
|
|
|
if "-->" not in lines[1]:
|
|
continue
|
|
|
|
start, end = [x.strip() for x in lines[1].split("-->", 1)]
|
|
text = "\n".join(lines[2:]).strip()
|
|
|
|
blocks.append(
|
|
{
|
|
"index": index,
|
|
"start": start,
|
|
"end": end,
|
|
"text": text,
|
|
}
|
|
)
|
|
|
|
return blocks
|
|
|
|
|
|
def build_srt(blocks):
|
|
output = []
|
|
for b in blocks:
|
|
output.append(
|
|
f"{b['index']}\n{b['start']} --> {b['end']}\n{b['text']}\n"
|
|
)
|
|
return "\n".join(output)
|
|
|
|
|
|
def _make_translator(mode=None, source_lang="auto", target_lang="da"):
|
|
attempts = [
|
|
lambda: get_translator(mode=mode, source_lang=source_lang, target_lang=target_lang),
|
|
lambda: get_translator(mode, source_lang, target_lang),
|
|
lambda: get_translator(mode=mode),
|
|
lambda: get_translator(mode),
|
|
lambda: get_translator(),
|
|
]
|
|
|
|
last_error = None
|
|
for attempt in attempts:
|
|
try:
|
|
translator = attempt()
|
|
if translator is not None:
|
|
return translator
|
|
except Exception as e:
|
|
last_error = e
|
|
|
|
raise RuntimeError(f"Could not create translator via get_translator(): {last_error}")
|
|
|
|
|
|
def _translate_blocks(
|
|
translator,
|
|
blocks,
|
|
source_lang="auto",
|
|
target_lang="da",
|
|
progress_callback=None,
|
|
):
|
|
if hasattr(translator, "translate_blocks"):
|
|
return translator.translate_blocks(
|
|
blocks,
|
|
source_lang,
|
|
target_lang,
|
|
progress_callback=progress_callback,
|
|
)
|
|
|
|
if hasattr(translator, "translate_batch"):
|
|
translated_texts = translator.translate_batch(blocks, source_lang, target_lang)
|
|
|
|
output = []
|
|
total = len(blocks)
|
|
|
|
for i, (block, translated_text) in enumerate(zip(blocks, translated_texts), start=1):
|
|
new_block = dict(block)
|
|
|
|
if isinstance(translated_text, dict):
|
|
translated_text = (
|
|
translated_text.get("text")
|
|
or translated_text.get("translated_text")
|
|
or translated_text.get("translation")
|
|
or block["text"]
|
|
)
|
|
|
|
new_block["text"] = str(translated_text).strip()
|
|
output.append(new_block)
|
|
|
|
if progress_callback:
|
|
progress_callback(i, total)
|
|
|
|
return output
|
|
|
|
if hasattr(translator, "translate"):
|
|
output = []
|
|
total = len(blocks)
|
|
|
|
for i, block in enumerate(blocks, start=1):
|
|
new_block = dict(block)
|
|
try:
|
|
translated = translator.translate(block["text"], source_lang, target_lang)
|
|
except TypeError:
|
|
translated = translator.translate(block["text"])
|
|
|
|
new_block["text"] = str(translated).strip()
|
|
output.append(new_block)
|
|
|
|
if progress_callback:
|
|
progress_callback(i, total)
|
|
|
|
return output
|
|
|
|
if callable(translator):
|
|
output = []
|
|
total = len(blocks)
|
|
|
|
for i, block in enumerate(blocks, start=1):
|
|
new_block = dict(block)
|
|
try:
|
|
translated = translator(block["text"], source_lang, target_lang)
|
|
except TypeError:
|
|
translated = translator(block["text"])
|
|
|
|
new_block["text"] = str(translated).strip()
|
|
output.append(new_block)
|
|
|
|
if progress_callback:
|
|
progress_callback(i, total)
|
|
|
|
return output
|
|
|
|
raise TypeError(f"Unsupported translator interface: {type(translator)}")
|
|
|
|
|
|
def translate_srt_content(
|
|
content,
|
|
translator=None,
|
|
mode=None,
|
|
source_lang="auto",
|
|
target_lang="da",
|
|
job_id=None,
|
|
progress_callback=None,
|
|
**kwargs,
|
|
):
|
|
print("DEBUG: ===== START translate_srt_content =====")
|
|
print("DEBUG: mode:", mode)
|
|
print("DEBUG: source_lang:", source_lang)
|
|
print("DEBUG: target_lang:", target_lang)
|
|
print("DEBUG: job_id:", job_id)
|
|
print("DEBUG: extra kwargs:", kwargs)
|
|
|
|
blocks = parse_srt(content)
|
|
|
|
print("DEBUG: blocks count:", len(blocks))
|
|
print("DEBUG: first block:", blocks[0] if blocks else "NONE")
|
|
|
|
if not blocks:
|
|
raise ValueError("No SRT blocks could be parsed from content")
|
|
|
|
if translator is None:
|
|
translator = _make_translator(
|
|
mode=mode,
|
|
source_lang=source_lang,
|
|
target_lang=target_lang,
|
|
)
|
|
print("DEBUG: translator created:", type(translator))
|
|
|
|
translated_blocks = _translate_blocks(
|
|
translator,
|
|
blocks,
|
|
source_lang=source_lang,
|
|
target_lang=target_lang,
|
|
progress_callback=progress_callback,
|
|
)
|
|
|
|
print("DEBUG: translated blocks count:", len(translated_blocks))
|
|
print("DEBUG: ===== END translate_srt_content =====")
|
|
|
|
return build_srt(translated_blocks)
|