Make SubFox production-ready with parallel translation and UI controls

This commit is contained in:
Eddie Nielsen 2026-03-25 11:24:54 +00:00
parent c40b8bed2b
commit 2b1d05f02c
6046 changed files with 798327 additions and 0 deletions

Binary file not shown.

Binary file not shown.

0
app/app/__init__.py Normal file
View file

7
app/app/main.py Normal file
View file

@ -0,0 +1,7 @@
from fastapi import FastAPI
app = FastAPI()
@app.get("/")
def root():
return {"status": "SubFox alive"}

View file

View file

52
app/cache.py Normal file
View file

@ -0,0 +1,52 @@
import os
import json
import hashlib
import time
from pathlib import Path
CACHE_ROOT = Path("/data/cache")
def _hash_key(source_lang: str, target_lang: str, text: str) -> str:
raw = f"{source_lang}:{target_lang}:{text.strip()}"
return hashlib.sha256(raw.encode("utf-8")).hexdigest()
def _get_path(source_lang: str, target_lang: str, key: str) -> Path:
folder = CACHE_ROOT / f"{source_lang}_{target_lang}"
folder.mkdir(parents=True, exist_ok=True)
return folder / f"{key}.json"
def get_cached(source_lang: str, target_lang: str, text: str):
key = _hash_key(source_lang, target_lang, text)
path = _get_path(source_lang, target_lang, key)
if not path.exists():
return None
try:
with open(path, "r", encoding="utf-8") as f:
data = json.load(f)
return data.get("translated")
except Exception:
return None
def set_cache(source_lang: str, target_lang: str, text: str, translated: str, model: str):
key = _hash_key(source_lang, target_lang, text)
path = _get_path(source_lang, target_lang, key)
data = {
"source": text,
"translated": translated,
"model": model,
"created": int(time.time()),
}
tmp_path = path.with_suffix(".tmp")
with open(tmp_path, "w", encoding="utf-8") as f:
json.dump(data, f, ensure_ascii=False)
os.replace(tmp_path, path)

157
app/main.py Normal file
View file

@ -0,0 +1,157 @@
from __future__ import annotations
import threading
import uuid
from pathlib import Path
from fastapi import FastAPI, UploadFile, File, Form, Request
from fastapi.responses import HTMLResponse, FileResponse, JSONResponse
from fastapi.templating import Jinja2Templates
from app.services.job_store import jobs
from app.services.subtitle_service import translate_srt_content, parse_srt
app = FastAPI()
templates = Jinja2Templates(directory="app/templates")
OUTPUT_DIR = Path("data/output")
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
@app.get("/", response_class=HTMLResponse)
async def home(request: Request):
return templates.TemplateResponse(
request=request,
name="index.html",
context={},
)
@app.post("/start")
async def start_translation(
file: UploadFile = File(...),
mode: str = Form("fast"),
source_lang: str = Form("auto"),
target_lang: str = Form("da"),
model: str = Form("gpt-4o-mini"),
workers: int = Form(4),
):
try:
raw = await file.read()
content = raw.decode("utf-8-sig")
except UnicodeDecodeError:
return JSONResponse(
{"error": "Kunne ikke læse filen som UTF-8/UTF-8-SIG"},
status_code=400,
)
if workers < 1:
workers = 1
if workers > 16:
workers = 16
job_id = str(uuid.uuid4())
output_path = OUTPUT_DIR / f"{job_id}.srt"
try:
parsed_blocks = parse_srt(content)
block_count = len(parsed_blocks)
except Exception:
block_count = 0
jobs[job_id] = {
"status": "queued",
"progress": 0,
"filename": file.filename or "translated.srt",
"output_path": str(output_path),
"blocks": block_count,
"error": "",
"done": False,
"mode": mode,
"source_lang": source_lang,
"target_lang": target_lang,
"model": model,
"workers": workers,
}
def progress_callback(done_blocks: int, total_blocks: int):
if total_blocks <= 0:
jobs[job_id]["progress"] = 1
return
percent = int((done_blocks / total_blocks) * 100)
if done_blocks > 0:
percent = max(2, percent)
percent = min(99, percent)
jobs[job_id]["progress"] = percent
jobs[job_id]["status"] = "running"
jobs[job_id]["blocks"] = total_blocks
def worker():
try:
jobs[job_id]["status"] = "starting"
jobs[job_id]["progress"] = 1
translated_srt = translate_srt_content(
content=content,
mode=mode,
source_lang=source_lang,
target_lang=target_lang,
job_id=job_id,
progress_callback=progress_callback,
model=model,
workers=workers,
)
output_path.write_text(translated_srt, encoding="utf-8")
jobs[job_id]["status"] = "done"
jobs[job_id]["progress"] = 100
jobs[job_id]["done"] = True
except Exception as e:
jobs[job_id]["status"] = "error"
jobs[job_id]["progress"] = 0
jobs[job_id]["error"] = str(e)
jobs[job_id]["done"] = False
threading.Thread(target=worker, daemon=True).start()
return JSONResponse({"job_id": job_id})
@app.get("/status/{job_id}")
async def get_status(job_id: str):
job = jobs.get(job_id)
if not job:
return JSONResponse(
{"status": "unknown", "progress": 0, "error": "Job not found"},
status_code=404,
)
return JSONResponse(job)
@app.get("/download/{job_id}")
async def download_result(job_id: str):
job = jobs.get(job_id)
if not job:
return JSONResponse({"error": "Job not found"}, status_code=404)
if not job.get("done"):
return JSONResponse({"error": "File not ready yet"}, status_code=400)
path = Path(job["output_path"])
if not path.exists():
return JSONResponse({"error": "Output file missing"}, status_code=404)
original_name = job.get("filename", "translated.srt")
download_name = f"translated_{original_name}"
return FileResponse(
path=path,
media_type="application/x-subrip",
filename=download_name,
)

0
app/services/__init__.py Normal file
View file

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View file

@ -0,0 +1 @@
jobs = {}

View file

@ -0,0 +1,10 @@
def build_srt(blocks):
output = []
for block in blocks:
output.append(str(block["index"]))
output.append(f'{block["start"]} --> {block["end"]}')
output.append(block.get("translated_text", block["text"]))
output.append("")
return "\n".join(output)

View file

@ -0,0 +1,35 @@
import re
def parse_srt(content: str):
content = content.replace("\r\n", "\n").replace("\r", "\n").strip()
parts = re.split(r"\n\s*\n", content)
blocks = []
for part in parts:
lines = [line.rstrip() for line in part.split("\n") if line.strip() != ""]
if len(lines) < 3:
continue
try:
index = int(lines[0].strip())
times = lines[1].strip()
if " --> " not in times:
continue
start, end = times.split(" --> ", 1)
text = "\n".join(lines[2:]).strip()
blocks.append({
"index": index,
"start": start,
"end": end,
"text": text
})
except Exception:
continue
return blocks

View file

@ -0,0 +1,193 @@
import re
from app.translators.factory import get_translator
def parse_srt(content: str):
content = content.replace("\r\n", "\n").replace("\r", "\n").strip()
raw_blocks = re.split(r"\n\s*\n", content)
blocks = []
for raw in raw_blocks:
lines = raw.strip().split("\n")
if len(lines) < 3:
continue
try:
index = int(lines[0].strip())
except ValueError:
continue
if "-->" not in lines[1]:
continue
start, end = [x.strip() for x in lines[1].split("-->", 1)]
text = "\n".join(lines[2:]).strip()
blocks.append(
{
"index": index,
"start": start,
"end": end,
"text": text,
}
)
return blocks
def build_srt(blocks):
output = []
for b in blocks:
output.append(
f"{b['index']}\n{b['start']} --> {b['end']}\n{b['text']}\n"
)
return "\n".join(output)
def _make_translator(mode=None, source_lang="auto", target_lang="da"):
attempts = [
lambda: get_translator(mode=mode, source_lang=source_lang, target_lang=target_lang),
lambda: get_translator(mode, source_lang, target_lang),
lambda: get_translator(mode=mode),
lambda: get_translator(mode),
lambda: get_translator(),
]
last_error = None
for attempt in attempts:
try:
translator = attempt()
if translator is not None:
return translator
except Exception as e:
last_error = e
raise RuntimeError(f"Could not create translator via get_translator(): {last_error}")
def _translate_blocks(
translator,
blocks,
source_lang="auto",
target_lang="da",
progress_callback=None,
):
if hasattr(translator, "translate_blocks"):
return translator.translate_blocks(
blocks,
source_lang,
target_lang,
progress_callback=progress_callback,
)
if hasattr(translator, "translate_batch"):
translated_texts = translator.translate_batch(blocks, source_lang, target_lang)
output = []
total = len(blocks)
for i, (block, translated_text) in enumerate(zip(blocks, translated_texts), start=1):
new_block = dict(block)
if isinstance(translated_text, dict):
translated_text = (
translated_text.get("text")
or translated_text.get("translated_text")
or translated_text.get("translation")
or block["text"]
)
new_block["text"] = str(translated_text).strip()
output.append(new_block)
if progress_callback:
progress_callback(i, total)
return output
if hasattr(translator, "translate"):
output = []
total = len(blocks)
for i, block in enumerate(blocks, start=1):
new_block = dict(block)
try:
translated = translator.translate(block["text"], source_lang, target_lang)
except TypeError:
translated = translator.translate(block["text"])
new_block["text"] = str(translated).strip()
output.append(new_block)
if progress_callback:
progress_callback(i, total)
return output
if callable(translator):
output = []
total = len(blocks)
for i, block in enumerate(blocks, start=1):
new_block = dict(block)
try:
translated = translator(block["text"], source_lang, target_lang)
except TypeError:
translated = translator(block["text"])
new_block["text"] = str(translated).strip()
output.append(new_block)
if progress_callback:
progress_callback(i, total)
return output
raise TypeError(f"Unsupported translator interface: {type(translator)}")
def translate_srt_content(
content,
translator=None,
mode=None,
source_lang="auto",
target_lang="da",
job_id=None,
progress_callback=None,
**kwargs,
):
print("DEBUG: ===== START translate_srt_content =====")
print("DEBUG: mode:", mode)
print("DEBUG: source_lang:", source_lang)
print("DEBUG: target_lang:", target_lang)
print("DEBUG: job_id:", job_id)
print("DEBUG: extra kwargs:", kwargs)
blocks = parse_srt(content)
print("DEBUG: blocks count:", len(blocks))
print("DEBUG: first block:", blocks[0] if blocks else "NONE")
if not blocks:
raise ValueError("No SRT blocks could be parsed from content")
if translator is None:
translator = _make_translator(
mode=mode,
source_lang=source_lang,
target_lang=target_lang,
)
print("DEBUG: translator created:", type(translator))
translated_blocks = _translate_blocks(
translator,
blocks,
source_lang=source_lang,
target_lang=target_lang,
progress_callback=progress_callback,
)
print("DEBUG: translated blocks count:", len(translated_blocks))
print("DEBUG: ===== END translate_srt_content =====")
return build_srt(translated_blocks)

View file

@ -0,0 +1,88 @@
import os
import re
from openai import OpenAI
BLOCK_MARKER = "<<<BLOCK_{index}>>>"
BATCH_SIZE = 50
def _build_batch_prompt(blocks):
parts = []
for block in blocks:
parts.append(BLOCK_MARKER.format(index=block["index"]))
parts.append(block["text"])
parts.append("")
joined = "\n".join(parts)
return f"""
You are translating subtitle text from English to Danish.
Rules:
- Translate naturally into short, readable Danish suitable for subtitles.
- Keep each block marker exactly unchanged.
- Do not add explanations.
- Do not remove markers.
- Return only the translated blocks.
Text to translate:
{joined}
""".strip()
def _parse_translated_response(translated_text):
pattern = r"<<<BLOCK_(\d+)>>>\n?(.*?)(?=(?:\n<<<BLOCK_\d+>>>|\Z))"
matches = re.findall(pattern, translated_text, re.DOTALL)
result = {}
for block_id, text in matches:
result[int(block_id)] = text.strip()
return result
def _chunked(seq, size):
for i in range(0, len(seq), size):
yield seq[i:i + size]
def translate_blocks(blocks):
api_key = os.getenv("OPENAI_API_KEY", "").strip()
if not api_key:
print("DEBUG: no OPENAI_API_KEY found, using fallback translator", flush=True)
for block in blocks:
block["translated_text"] = "[DA] " + block["text"]
return blocks
client = OpenAI(api_key=api_key)
model = os.getenv("OPENAI_MODEL", "gpt-4.1-mini")
translated_map = {}
for batch_num, batch in enumerate(_chunked(blocks, BATCH_SIZE), start=1):
print(f"DEBUG: translating batch {batch_num} with {len(batch)} blocks", flush=True)
prompt = _build_batch_prompt(batch)
response = client.responses.create(
model=model,
input=prompt,
)
translated_output = response.output_text
parsed = _parse_translated_response(translated_output)
print(
f"DEBUG: batch {batch_num} parsed translations = {len(parsed)}",
flush=True
)
for block in batch:
translated_map[block["index"]] = parsed.get(block["index"], block["text"])
for block in blocks:
block["translated_text"] = translated_map.get(block["index"], block["text"])
return blocks

41
app/srt_parser.py Normal file
View file

@ -0,0 +1,41 @@
# app/srt_parser.py
import re
from typing import List, Dict
SRT_BLOCK_RE = re.compile(
r"(\d+)\s*\n"
r"(\d{2}:\d{2}:\d{2},\d{3})\s-->\s(\d{2}:\d{2}:\d{2},\d{3})\s*\n"
r"(.*?)(?=\n{2,}|\Z)",
re.DOTALL,
)
def parse_srt(content: str) -> List[Dict[str, str]]:
normalized = content.replace("\r\n", "\n").replace("\r", "\n").strip()
blocks = []
for match in SRT_BLOCK_RE.finditer(normalized):
index, start, end, text = match.groups()
blocks.append(
{
"index": int(index),
"start": start,
"end": end,
"text": text.strip(),
}
)
return blocks
def build_srt(blocks: List[Dict[str, str]]) -> str:
output = []
for i, block in enumerate(blocks, start=1):
output.append(str(i))
output.append(f"{block['start']} --> {block['end']}")
output.append(block["text"])
output.append("")
return "\n".join(output).strip() + "\n"

254
app/templates/index.html Normal file
View file

@ -0,0 +1,254 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width,initial-scale=1" />
<title>SubFox</title>
<style>
body {
font-family: Arial, sans-serif;
max-width: 760px;
margin: 40px auto;
padding: 0 16px;
line-height: 1.4;
}
h1 {
margin-bottom: 8px;
}
.card {
border: 1px solid #ddd;
border-radius: 12px;
padding: 16px;
margin-top: 20px;
}
.grid {
display: grid;
grid-template-columns: 1fr 1fr;
gap: 12px;
}
.full {
grid-column: 1 / -1;
}
label {
display: block;
font-weight: 600;
margin-bottom: 6px;
}
input, select, button {
width: 100%;
padding: 10px;
box-sizing: border-box;
}
button {
cursor: pointer;
font-weight: 700;
}
progress {
width: 100%;
height: 22px;
}
.muted {
color: #666;
font-size: 14px;
}
.hidden {
display: none;
}
#statusBox {
margin-top: 16px;
}
#downloadLink {
display: inline-block;
margin-top: 12px;
font-weight: 700;
}
pre {
white-space: pre-wrap;
word-break: break-word;
background: #f7f7f7;
padding: 12px;
border-radius: 8px;
}
</style>
</head>
<body>
<h1>SubFox</h1>
<div class="muted">Subtitle translator with per-job settings</div>
<div class="card">
<form id="uploadForm">
<div class="grid">
<div class="full">
<label for="file">SRT file</label>
<input id="file" name="file" type="file" accept=".srt" required />
</div>
<div>
<label for="mode">Mode</label>
<select id="mode" name="mode">
<option value="fast" selected>fast</option>
</select>
</div>
<div>
<label for="target_lang">Target language</label>
<select id="target_lang" name="target_lang">
<option value="da" selected>Danish</option>
<option value="en">English</option>
<option value="de">German</option>
<option value="sv">Swedish</option>
<option value="no">Norwegian</option>
</select>
</div>
<div>
<label for="source_lang">Source language</label>
<select id="source_lang" name="source_lang">
<option value="auto" selected>auto</option>
<option value="en">English</option>
<option value="da">Danish</option>
<option value="de">German</option>
<option value="sv">Swedish</option>
<option value="no">Norwegian</option>
</select>
</div>
<div>
<label for="model">Model</label>
<select id="model" name="model">
<option value="gpt-4o-mini" selected>gpt-4o-mini</option>
<option value="gpt-4.1-mini">gpt-4.1-mini</option>
</select>
</div>
<div>
<label for="workers">Workers</label>
<input id="workers" name="workers" type="number" min="1" max="16" value="4" />
</div>
<div class="full">
<button type="submit">Start translation</button>
</div>
</div>
</form>
<div id="statusBox" class="hidden">
<p><strong>Status:</strong> <span id="statusText">queued</span></p>
<p><strong>Progress:</strong> <span id="progressText">0%</span></p>
<progress id="progressBar" value="0" max="100"></progress>
<div class="card">
<div><strong>Job settings</strong></div>
<pre id="jobMeta"></pre>
</div>
<a id="downloadLink" class="hidden" href="#">Download translated file</a>
<div id="errorBox" class="hidden">
<strong>Error</strong>
<pre id="errorText"></pre>
</div>
</div>
</div>
<script>
const form = document.getElementById("uploadForm");
const statusBox = document.getElementById("statusBox");
const statusText = document.getElementById("statusText");
const progressText = document.getElementById("progressText");
const progressBar = document.getElementById("progressBar");
const downloadLink = document.getElementById("downloadLink");
const errorBox = document.getElementById("errorBox");
const errorText = document.getElementById("errorText");
const jobMeta = document.getElementById("jobMeta");
let pollTimer = null;
function setStatus(job) {
statusBox.classList.remove("hidden");
statusText.textContent = job.status ?? "unknown";
progressText.textContent = `${job.progress ?? 0}%`;
progressBar.value = job.progress ?? 0;
jobMeta.textContent = JSON.stringify({
mode: job.mode,
source_lang: job.source_lang,
target_lang: job.target_lang,
model: job.model,
workers: job.workers,
blocks: job.blocks
}, null, 2);
if (job.done) {
downloadLink.href = `/download/${jobId}`;
downloadLink.classList.remove("hidden");
} else {
downloadLink.classList.add("hidden");
}
if (job.status === "error") {
errorBox.classList.remove("hidden");
errorText.textContent = job.error || "Unknown error";
} else {
errorBox.classList.add("hidden");
errorText.textContent = "";
}
}
let jobId = null;
async function pollStatus() {
if (!jobId) return;
const res = await fetch(`/status/${jobId}`);
const job = await res.json();
setStatus(job);
if (job.done || job.status === "error") {
if (pollTimer) {
clearTimeout(pollTimer);
pollTimer = null;
}
return;
}
pollTimer = setTimeout(pollStatus, 800);
}
form.addEventListener("submit", async (e) => {
e.preventDefault();
if (pollTimer) {
clearTimeout(pollTimer);
pollTimer = null;
}
downloadLink.classList.add("hidden");
errorBox.classList.add("hidden");
progressBar.value = 0;
progressText.textContent = "0%";
statusText.textContent = "uploading...";
statusBox.classList.remove("hidden");
const formData = new FormData(form);
const res = await fetch("/start", {
method: "POST",
body: formData
});
const data = await res.json();
if (!res.ok) {
statusText.textContent = "error";
errorBox.classList.remove("hidden");
errorText.textContent = data.error || "Upload failed";
return;
}
jobId = data.job_id;
pollStatus();
});
</script>
</body>
</html>

View file

Binary file not shown.

Binary file not shown.

Binary file not shown.

13
app/translators/base.py Normal file
View file

@ -0,0 +1,13 @@
from abc import ABC, abstractmethod
from typing import List
class BaseTranslator(ABC):
@abstractmethod
def translate_blocks(
self,
texts: List[str],
source_lang: str = "auto",
target_lang: str = "da",
) -> List[str]:
pass

View file

@ -0,0 +1,9 @@
from .fast_engine import FastTranslator
def get_translator(mode: str = "fast"):
if mode == "smart":
from .smart_engine import SmartTranslator
return SmartTranslator()
return FastTranslator(max_chunk_chars=3500)

View file

@ -0,0 +1,151 @@
from __future__ import annotations
import os
import time
from concurrent.futures import ThreadPoolExecutor, as_completed
from typing import Callable, Dict, List, Optional
from openai import OpenAI
from app.cache import get_cached, set_cache
def _env_int(name: str, default: int) -> int:
value = os.getenv(name)
if value is None or value == "":
return default
try:
return int(value)
except ValueError:
return default
def _env_float(name: str, default: float) -> float:
value = os.getenv(name)
if value is None or value == "":
return default
try:
return float(value)
except ValueError:
return default
class FastTranslator:
def __init__(
self,
api_key=None,
model=None,
workers=None,
max_retries=None,
retry_base_delay=None,
**kwargs,
):
self.api_key = api_key or os.getenv("OPENAI_API_KEY")
self.model = model or os.getenv("SUBFOX_MODEL", "gpt-4o-mini")
self.workers = workers if workers is not None else _env_int("SUBFOX_WORKERS", 4)
self.max_retries = (
max_retries if max_retries is not None else _env_int("SUBFOX_MAX_RETRIES", 3)
)
self.retry_base_delay = (
retry_base_delay
if retry_base_delay is not None
else _env_float("SUBFOX_RETRY_BASE_DELAY", 1.0)
)
self.kwargs = kwargs
self.client = OpenAI(api_key=self.api_key) if self.api_key else OpenAI()
def _translate_text(self, text: str, source_lang: str, target_lang: str) -> str:
cached = get_cached(source_lang, target_lang, text)
if cached:
return cached
prompt = (
f"Translate the following subtitle text from {source_lang} to {target_lang}. "
"Preserve meaning, keep it natural, and return only the translated text.\n\n"
f"{text}"
)
last_error = None
for attempt in range(1, self.max_retries + 1):
try:
response = self.client.chat.completions.create(
model=self.model,
messages=[
{
"role": "system",
"content": (
"You are a subtitle translator. "
"Return only the translated text with no explanations."
),
},
{"role": "user", "content": prompt},
],
temperature=0,
)
content = response.choices[0].message.content or ""
result = content.strip()
if result:
set_cache(source_lang, target_lang, text, result, self.model)
return result
except Exception as e:
last_error = e
if attempt >= self.max_retries:
break
delay = self.retry_base_delay * (2 ** (attempt - 1))
time.sleep(delay)
raise RuntimeError(
f"Translation failed after {self.max_retries} attempts: {last_error}"
)
def _translate_one(self, block: Dict, source_lang: str, target_lang: str) -> Dict:
new_block = dict(block)
new_block["text"] = self._translate_text(
block["text"],
source_lang,
target_lang,
)
return new_block
def translate_blocks(
self,
blocks: List[Dict],
source_lang: str,
target_lang: str,
progress_callback: Optional[Callable[[int, int], None]] = None,
) -> List[Dict]:
total = len(blocks)
output: List[Optional[Dict]] = [None] * total
with ThreadPoolExecutor(max_workers=self.workers) as executor:
futures = {
executor.submit(self._translate_one, block, source_lang, target_lang): i
for i, block in enumerate(blocks)
}
done = 0
for future in as_completed(futures):
idx = futures[future]
output[idx] = future.result()
done += 1
if progress_callback:
progress_callback(done, total)
return [block for block in output if block is not None]
def translate_batch(
self,
batch: List[Dict],
source_lang: str,
target_lang: str,
) -> List[str]:
translated_blocks = self.translate_blocks(batch, source_lang, target_lang)
return [block["text"] for block in translated_blocks]

View file

@ -0,0 +1,58 @@
# app/translators/smart_engine.py
import json
import os
from typing import List
from openai import OpenAI
from .base import BaseTranslator
class SmartTranslator(BaseTranslator):
def __init__(self, api_key: str | None = None, model: str = "gpt-4.1-mini", batch_size: int = 40):
api_key = api_key or os.getenv("OPENAI_API_KEY")
if not api_key:
raise ValueError("OPENAI_API_KEY mangler for smart mode")
self.client = OpenAI(api_key=api_key)
self.model = model
self.batch_size = batch_size
def _translate_batch(self, batch: List[str], source_lang: str, target_lang: str) -> List[str]:
payload = [{"i": i, "text": t} for i, t in enumerate(batch)]
prompt = (
f"Translate these subtitle lines from {source_lang} to {target_lang}.\n"
"Return ONLY valid JSON array.\n"
'Each item must be like: {"i": 0, "text": "..."}\n'
"Keep same order, keep line meaning natural and concise.\n\n"
f"{json.dumps(payload, ensure_ascii=False)}"
)
response = self.client.chat.completions.create(
model=self.model,
messages=[{"role": "user", "content": prompt}],
temperature=0.2,
)
text = response.choices[0].message.content or ""
try:
data = json.loads(text)
return [item["text"] for item in data]
except Exception:
return batch
def translate_blocks(
self,
texts: List[str],
source_lang: str = "auto",
target_lang: str = "da",
) -> List[str]:
out = []
for i in range(0, len(texts), self.batch_size):
batch = texts[i:i + self.batch_size]
out.extend(self._translate_batch(batch, source_lang, target_lang))
return out