Make SubFox production-ready with parallel translation and UI controls
This commit is contained in:
parent
c40b8bed2b
commit
2b1d05f02c
6046 changed files with 798327 additions and 0 deletions
183
.venv/lib/python3.10/site-packages/deep_translator/base.py
Normal file
183
.venv/lib/python3.10/site-packages/deep_translator/base.py
Normal file
|
|
@ -0,0 +1,183 @@
|
|||
"""base translator class"""
|
||||
|
||||
__copyright__ = "Copyright (C) 2020 Nidhal Baccouri"
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from pathlib import Path
|
||||
from typing import List, Optional, Union
|
||||
|
||||
from deep_translator.constants import GOOGLE_LANGUAGES_TO_CODES
|
||||
from deep_translator.exceptions import (
|
||||
InvalidSourceOrTargetLanguage,
|
||||
LanguageNotSupportedException,
|
||||
)
|
||||
|
||||
|
||||
class BaseTranslator(ABC):
|
||||
"""
|
||||
Abstract class that serve as a base translator for other different translators
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
base_url: str = None,
|
||||
languages: dict = GOOGLE_LANGUAGES_TO_CODES,
|
||||
source: str = "auto",
|
||||
target: str = "en",
|
||||
payload_key: Optional[str] = None,
|
||||
element_tag: Optional[str] = None,
|
||||
element_query: Optional[dict] = None,
|
||||
**url_params,
|
||||
):
|
||||
"""
|
||||
@param source: source language to translate from
|
||||
@param target: target language to translate to
|
||||
"""
|
||||
self._base_url = base_url
|
||||
self._languages = languages
|
||||
self._supported_languages = list(self._languages.keys())
|
||||
if not source:
|
||||
raise InvalidSourceOrTargetLanguage(source)
|
||||
if not target:
|
||||
raise InvalidSourceOrTargetLanguage(target)
|
||||
|
||||
self._source, self._target = self._map_language_to_code(source, target)
|
||||
self._url_params = url_params
|
||||
self._element_tag = element_tag
|
||||
self._element_query = element_query
|
||||
self.payload_key = payload_key
|
||||
super().__init__()
|
||||
|
||||
@property
|
||||
def source(self):
|
||||
return self._source
|
||||
|
||||
@source.setter
|
||||
def source(self, lang):
|
||||
self._source = lang
|
||||
|
||||
@property
|
||||
def target(self):
|
||||
return self._target
|
||||
|
||||
@target.setter
|
||||
def target(self, lang):
|
||||
self._target = lang
|
||||
|
||||
def _type(self):
|
||||
return self.__class__.__name__
|
||||
|
||||
def _map_language_to_code(self, *languages):
|
||||
"""
|
||||
map language to its corresponding code (abbreviation) if the language was passed
|
||||
by its full name by the user
|
||||
@param languages: list of languages
|
||||
@return: mapped value of the language or raise an exception if the language is
|
||||
not supported
|
||||
"""
|
||||
for language in languages:
|
||||
if language in self._languages.values() or language == "auto":
|
||||
yield language
|
||||
elif language in self._languages.keys():
|
||||
yield self._languages[language]
|
||||
else:
|
||||
raise LanguageNotSupportedException(
|
||||
language,
|
||||
message=f"No support for the provided language.\n"
|
||||
f"Please select on of the supported languages:\n"
|
||||
f"{self._languages}",
|
||||
)
|
||||
|
||||
def _same_source_target(self) -> bool:
|
||||
return self._source == self._target
|
||||
|
||||
def get_supported_languages(
|
||||
self, as_dict: bool = False, **kwargs
|
||||
) -> Union[list, dict]:
|
||||
"""
|
||||
return the supported languages by the Google translator
|
||||
@param as_dict: if True, the languages will be returned as a dictionary
|
||||
mapping languages to their abbreviations
|
||||
@return: list or dict
|
||||
"""
|
||||
return self._supported_languages if not as_dict else self._languages
|
||||
|
||||
def is_language_supported(self, language: str, **kwargs) -> bool:
|
||||
"""
|
||||
check if the language is supported by the translator
|
||||
@param language: a string for 1 language
|
||||
@return: bool or raise an Exception
|
||||
"""
|
||||
if (
|
||||
language == "auto"
|
||||
or language in self._languages.keys()
|
||||
or language in self._languages.values()
|
||||
):
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
@abstractmethod
|
||||
def translate(self, text: str, **kwargs) -> str:
|
||||
"""
|
||||
translate a text using a translator under the hood and return
|
||||
the translated text
|
||||
@param text: text to translate
|
||||
@param kwargs: additional arguments
|
||||
@return: str
|
||||
"""
|
||||
return NotImplemented("You need to implement the translate method!")
|
||||
|
||||
def _read_docx(self, f: str):
|
||||
import docx2txt
|
||||
|
||||
return docx2txt.process(f)
|
||||
|
||||
def _read_pdf(self, f: str):
|
||||
import pypdf
|
||||
|
||||
reader = pypdf.PdfReader(f)
|
||||
page = reader.pages[0]
|
||||
return page.extract_text()
|
||||
|
||||
def _translate_file(self, path: str, **kwargs) -> str:
|
||||
"""
|
||||
translate directly from file
|
||||
@param path: path to the target file
|
||||
@type path: str
|
||||
@param kwargs: additional args
|
||||
@return: str
|
||||
"""
|
||||
if not isinstance(path, Path):
|
||||
path = Path(path)
|
||||
|
||||
if not path.exists():
|
||||
print("Path to the file is wrong!")
|
||||
exit(1)
|
||||
|
||||
ext = path.suffix
|
||||
|
||||
if ext == ".docx":
|
||||
text = self._read_docx(f=str(path))
|
||||
|
||||
elif ext == ".pdf":
|
||||
text = self._read_pdf(f=str(path))
|
||||
else:
|
||||
with open(path, "r", encoding="utf-8") as f:
|
||||
text = f.read().strip()
|
||||
|
||||
return self.translate(text)
|
||||
|
||||
def _translate_batch(self, batch: List[str], **kwargs) -> List[str]:
|
||||
"""
|
||||
translate a list of texts
|
||||
@param batch: list of texts you want to translate
|
||||
@return: list of translations
|
||||
"""
|
||||
if not batch:
|
||||
raise Exception("Enter your text list that you want to translate")
|
||||
arr = []
|
||||
for i, text in enumerate(batch):
|
||||
translated = self.translate(text, **kwargs)
|
||||
arr.append(translated)
|
||||
return arr
|
||||
Loading…
Add table
Add a link
Reference in a new issue