Make SubFox production-ready with parallel translation and UI controls

This commit is contained in:
Eddie Nielsen 2026-03-25 11:24:54 +00:00
parent c40b8bed2b
commit 2b1d05f02c
6046 changed files with 798327 additions and 0 deletions

View file

@ -0,0 +1,38 @@
"""Top-level package for Deep Translator"""
__copyright__ = "Copyright (C) 2020 Nidhal Baccouri"
from deep_translator.baidu import BaiduTranslator
from deep_translator.chatgpt import ChatGptTranslator
from deep_translator.deepl import DeeplTranslator
from deep_translator.detection import batch_detection, single_detection
from deep_translator.google import GoogleTranslator
from deep_translator.libre import LibreTranslator
from deep_translator.linguee import LingueeTranslator
from deep_translator.microsoft import MicrosoftTranslator
from deep_translator.mymemory import MyMemoryTranslator
from deep_translator.papago import PapagoTranslator
from deep_translator.pons import PonsTranslator
from deep_translator.qcri import QcriTranslator
from deep_translator.yandex import YandexTranslator
__author__ = """Nidhal Baccouri"""
__email__ = "nidhalbacc@gmail.com"
__version__ = "1.9.1"
__all__ = [
"GoogleTranslator",
"PonsTranslator",
"LingueeTranslator",
"MyMemoryTranslator",
"YandexTranslator",
"MicrosoftTranslator",
"QcriTranslator",
"DeeplTranslator",
"LibreTranslator",
"PapagoTranslator",
"ChatGptTranslator",
"BaiduTranslator",
"single_detection",
"batch_detection",
]

View file

@ -0,0 +1,12 @@
__copyright__ = "Copyright (C) 2020 Nidhal Baccouri"
from deep_translator.cli import CLI
def main():
CLI().run()
if __name__ == "__main__":
main()

View file

@ -0,0 +1,120 @@
"""
baidu translator API
"""
__copyright__ = "Copyright (C) 2020 Nidhal Baccouri"
import hashlib
import os
import random
from typing import List, Optional
import requests
from deep_translator.base import BaseTranslator
from deep_translator.constants import (
BAIDU_APPID_ENV_VAR,
BAIDU_APPKEY_ENV_VAR,
BAIDU_LANGUAGE_TO_CODE,
BASE_URLS,
)
from deep_translator.exceptions import (
ApiKeyException,
BaiduAPIerror,
ServerException,
TranslationNotFound,
)
from deep_translator.validate import is_empty, is_input_valid
class BaiduTranslator(BaseTranslator):
"""
class that wraps functions, which use the BaiduTranslator translator
under the hood to translate word(s)
"""
def __init__(
self,
source: str = "en",
target: str = "zh",
appid: Optional[str] = os.getenv(BAIDU_APPID_ENV_VAR, None),
appkey: Optional[str] = os.getenv(BAIDU_APPKEY_ENV_VAR, None),
**kwargs
):
"""
@param appid: your baidu cloud api appid.
Get one here: https://fanyi-api.baidu.com/choose
@param appkey: your baidu cloud api appkey.
@param source: source language
@param target: target language
"""
if not appid:
raise ApiKeyException(env_var=BAIDU_APPID_ENV_VAR)
if not appkey:
raise ApiKeyException(env_var=BAIDU_APPKEY_ENV_VAR)
self.appid = appid
self.appkey = appkey
super().__init__(
base_url=BASE_URLS.get("BAIDU"),
source=source,
target=target,
languages=BAIDU_LANGUAGE_TO_CODE,
**kwargs
)
def translate(self, text: str, **kwargs) -> str:
"""
@param text: text to translate
@return: translated text
"""
if is_input_valid(text):
if self._same_source_target() or is_empty(text):
return text
# Create the request parameters.
salt = random.randint(32768, 65536)
sign = hashlib.md5(
(self.appid + text + str(salt) + self.appkey).encode("utf-8")
).hexdigest()
headers = {"Content-Type": "application/x-www-form-urlencoded"}
payload = {
"appid": self.appid,
"q": text,
"from": self.source,
"to": self.target,
"salt": salt,
"sign": sign,
}
# Do the request and check the connection.
try:
response = requests.post(
self._base_url, params=payload, headers=headers
)
except ConnectionError:
raise ServerException(503)
if response.status_code != 200:
raise ServerException(response.status_code)
# Get the response and check is not empty.
res = response.json()
if not res:
raise TranslationNotFound(text)
# Process and return the response.
if "error_code" in res:
raise BaiduAPIerror(res["error_msg"])
if "trans_result" in res:
return "\n".join([s["dst"] for s in res["trans_result"]])
else:
raise TranslationNotFound(text)
def translate_file(self, path: str, **kwargs) -> str:
return self._translate_file(path, **kwargs)
def translate_batch(self, batch: List[str], **kwargs) -> List[str]:
"""
@param batch: list of texts to translate
@return: list of translations
"""
return self._translate_batch(batch, **kwargs)

View file

@ -0,0 +1,183 @@
"""base translator class"""
__copyright__ = "Copyright (C) 2020 Nidhal Baccouri"
from abc import ABC, abstractmethod
from pathlib import Path
from typing import List, Optional, Union
from deep_translator.constants import GOOGLE_LANGUAGES_TO_CODES
from deep_translator.exceptions import (
InvalidSourceOrTargetLanguage,
LanguageNotSupportedException,
)
class BaseTranslator(ABC):
"""
Abstract class that serve as a base translator for other different translators
"""
def __init__(
self,
base_url: str = None,
languages: dict = GOOGLE_LANGUAGES_TO_CODES,
source: str = "auto",
target: str = "en",
payload_key: Optional[str] = None,
element_tag: Optional[str] = None,
element_query: Optional[dict] = None,
**url_params,
):
"""
@param source: source language to translate from
@param target: target language to translate to
"""
self._base_url = base_url
self._languages = languages
self._supported_languages = list(self._languages.keys())
if not source:
raise InvalidSourceOrTargetLanguage(source)
if not target:
raise InvalidSourceOrTargetLanguage(target)
self._source, self._target = self._map_language_to_code(source, target)
self._url_params = url_params
self._element_tag = element_tag
self._element_query = element_query
self.payload_key = payload_key
super().__init__()
@property
def source(self):
return self._source
@source.setter
def source(self, lang):
self._source = lang
@property
def target(self):
return self._target
@target.setter
def target(self, lang):
self._target = lang
def _type(self):
return self.__class__.__name__
def _map_language_to_code(self, *languages):
"""
map language to its corresponding code (abbreviation) if the language was passed
by its full name by the user
@param languages: list of languages
@return: mapped value of the language or raise an exception if the language is
not supported
"""
for language in languages:
if language in self._languages.values() or language == "auto":
yield language
elif language in self._languages.keys():
yield self._languages[language]
else:
raise LanguageNotSupportedException(
language,
message=f"No support for the provided language.\n"
f"Please select on of the supported languages:\n"
f"{self._languages}",
)
def _same_source_target(self) -> bool:
return self._source == self._target
def get_supported_languages(
self, as_dict: bool = False, **kwargs
) -> Union[list, dict]:
"""
return the supported languages by the Google translator
@param as_dict: if True, the languages will be returned as a dictionary
mapping languages to their abbreviations
@return: list or dict
"""
return self._supported_languages if not as_dict else self._languages
def is_language_supported(self, language: str, **kwargs) -> bool:
"""
check if the language is supported by the translator
@param language: a string for 1 language
@return: bool or raise an Exception
"""
if (
language == "auto"
or language in self._languages.keys()
or language in self._languages.values()
):
return True
else:
return False
@abstractmethod
def translate(self, text: str, **kwargs) -> str:
"""
translate a text using a translator under the hood and return
the translated text
@param text: text to translate
@param kwargs: additional arguments
@return: str
"""
return NotImplemented("You need to implement the translate method!")
def _read_docx(self, f: str):
import docx2txt
return docx2txt.process(f)
def _read_pdf(self, f: str):
import pypdf
reader = pypdf.PdfReader(f)
page = reader.pages[0]
return page.extract_text()
def _translate_file(self, path: str, **kwargs) -> str:
"""
translate directly from file
@param path: path to the target file
@type path: str
@param kwargs: additional args
@return: str
"""
if not isinstance(path, Path):
path = Path(path)
if not path.exists():
print("Path to the file is wrong!")
exit(1)
ext = path.suffix
if ext == ".docx":
text = self._read_docx(f=str(path))
elif ext == ".pdf":
text = self._read_pdf(f=str(path))
else:
with open(path, "r", encoding="utf-8") as f:
text = f.read().strip()
return self.translate(text)
def _translate_batch(self, batch: List[str], **kwargs) -> List[str]:
"""
translate a list of texts
@param batch: list of texts you want to translate
@return: list of translations
"""
if not batch:
raise Exception("Enter your text list that you want to translate")
arr = []
for i, text in enumerate(batch):
translated = self.translate(text, **kwargs)
arr.append(translated)
return arr

View file

@ -0,0 +1,70 @@
__copyright__ = "Copyright (C) 2020 Nidhal Baccouri"
import os
from typing import List, Optional
from deep_translator.base import BaseTranslator
from deep_translator.constants import OPEN_AI_ENV_VAR
from deep_translator.exceptions import ApiKeyException
class ChatGptTranslator(BaseTranslator):
"""
class that wraps functions, which use the DeeplTranslator translator
under the hood to translate word(s)
"""
def __init__(
self,
source: str = "auto",
target: str = "english",
api_key: Optional[str] = os.getenv(OPEN_AI_ENV_VAR, None),
model: Optional[str] = "gpt-3.5-turbo",
**kwargs,
):
"""
@param api_key: your openai api key.
@param source: source language
@param target: target language
"""
if not api_key:
raise ApiKeyException(env_var=OPEN_AI_ENV_VAR)
self.api_key = api_key
self.model = model
super().__init__(source=source, target=target, **kwargs)
def translate(self, text: str, **kwargs) -> str:
"""
@param text: text to translate
@return: translated text
"""
import openai
openai.api_key = self.api_key
prompt = f"Translate the text below into {self.target}.\n"
prompt += f'Text: "{text}"'
response = openai.ChatCompletion.create(
model=self.model,
messages=[
{
"role": "user",
"content": prompt,
}
],
)
return response.choices[0].message.content
def translate_file(self, path: str, **kwargs) -> str:
return self._translate_file(path, **kwargs)
def translate_batch(self, batch: List[str], **kwargs) -> List[str]:
"""
@param batch: list of texts to translate
@return: list of translations
"""
return self._translate_batch(batch, **kwargs)

View file

@ -0,0 +1,102 @@
"""Console script for deep_translator."""
__copyright__ = "Copyright (C) 2020 Nidhal Baccouri"
import argparse
from typing import Optional
from deep_translator.engines import __engines__
class CLI(object):
translators_dict = __engines__
translator = None
def __init__(self, custom_args: Optional[list] = None):
self.custom_args = custom_args
self.args = self.parse_args()
translator_class = self.translators_dict.get(
self.args.translator, None
)
if not translator_class:
raise Exception(
f"Translator {self.args.translator} is not supported."
f"Supported translators: {list(self.translators_dict.keys())}"
)
self.translator = translator_class(
source=self.args.source, target=self.args.target
)
def translate(self) -> None:
"""
function used to provide translations from the parsed terminal arguments
@return: None
"""
res = self.translator.translate(self.args.text)
print(f"Translation from {self.args.source} to {self.args.target}")
print("-" * 50)
print(f"Translation result: {res}")
def get_supported_languages(self) -> None:
"""
function used to return the languages supported by the translator service
from the parsed terminal arguments
@return: None
"""
translator_supported_languages = (
self.translator.get_supported_languages(as_dict=True)
)
print(f"Languages supported by '{self.args.translator}' are :\n")
print(translator_supported_languages)
def parse_args(self) -> argparse.Namespace:
"""
function responsible for parsing terminal arguments and provide
them for further use in the translation process
"""
parser = argparse.ArgumentParser(
add_help=True,
description="Official CLI for deep-translator",
usage="dt --help",
)
parser.add_argument(
"--translator",
"-trans",
default="google",
type=str,
help="name of the translator you want to use",
)
parser.add_argument(
"--source",
"-src",
default="auto",
type=str,
help="source language to translate from",
)
parser.add_argument(
"--target", "-tg", type=str, help="target language to translate to"
)
parser.add_argument(
"--text", "-txt", type=str, help="text you want to translate"
)
parser.add_argument(
"--languages",
"-lang",
action="store_true",
help="all the languages available with the translator"
"Run the command deep_translator -trans <translator service> -lang",
)
parsed_args = (
parser.parse_args(self.custom_args)
if self.custom_args
else parser.parse_args()
)
return parsed_args
def run(self) -> None:
if self.args.languages:
self.get_supported_languages()
else:
self.translate()

View file

@ -0,0 +1,641 @@
__copyright__ = "Copyright (C) 2020 Nidhal Baccouri"
OPEN_AI_ENV_VAR = "OPEN_API_KEY"
DEEPL_ENV_VAR = "DEEPL_API_KEY"
LIBRE_ENV_VAR = "LIBRE_API_KEY"
MSFT_ENV_VAR = "MICROSOFT_API_KEY"
QCRI_ENV_VAR = "QCRI_API_KEY"
YANDEX_ENV_VAR = "YANDEX_API_KEY"
BAIDU_APPID_ENV_VAR = "BAIDU_APPID"
BAIDU_APPKEY_ENV_VAR = "BAIDU_APPKEY"
BASE_URLS = {
"GOOGLE_TRANSLATE": "https://translate.google.com/m",
"PONS": "https://en.pons.com/translate/",
"YANDEX": "https://translate.yandex.net/api/{version}/tr.json/{endpoint}",
"LINGUEE": "https://www.linguee.com/",
"MYMEMORY": "http://api.mymemory.translated.net/get",
"QCRI": "https://mt.qcri.org/api/v1/{endpoint}?",
"DEEPL": "https://api.deepl.com/{version}/",
"DEEPL_FREE": "https://api-free.deepl.com/{version}/",
"MICROSOFT_TRANSLATE": "https://api.cognitive.microsofttranslator.com/translate?api-version=3.0",
"PAPAGO": "https://papago.naver.com/",
"PAPAGO_API": "https://openapi.naver.com/v1/papago/n2mt",
"LIBRE": "https://libretranslate.com/",
"LIBRE_FREE": "https://libretranslate.de/",
"BAIDU": "https://fanyi-api.baidu.com/api/trans/vip/translate",
}
GOOGLE_LANGUAGES_TO_CODES = {
"afrikaans": "af",
"albanian": "sq",
"amharic": "am",
"arabic": "ar",
"armenian": "hy",
"assamese": "as",
"aymara": "ay",
"azerbaijani": "az",
"bambara": "bm",
"basque": "eu",
"belarusian": "be",
"bengali": "bn",
"bhojpuri": "bho",
"bosnian": "bs",
"bulgarian": "bg",
"catalan": "ca",
"cebuano": "ceb",
"chichewa": "ny",
"chinese (simplified)": "zh-CN",
"chinese (traditional)": "zh-TW",
"corsican": "co",
"croatian": "hr",
"czech": "cs",
"danish": "da",
"dhivehi": "dv",
"dogri": "doi",
"dutch": "nl",
"english": "en",
"esperanto": "eo",
"estonian": "et",
"ewe": "ee",
"filipino": "tl",
"finnish": "fi",
"french": "fr",
"frisian": "fy",
"galician": "gl",
"georgian": "ka",
"german": "de",
"greek": "el",
"guarani": "gn",
"gujarati": "gu",
"haitian creole": "ht",
"hausa": "ha",
"hawaiian": "haw",
"hebrew": "iw",
"hindi": "hi",
"hmong": "hmn",
"hungarian": "hu",
"icelandic": "is",
"igbo": "ig",
"ilocano": "ilo",
"indonesian": "id",
"irish": "ga",
"italian": "it",
"japanese": "ja",
"javanese": "jw",
"kannada": "kn",
"kazakh": "kk",
"khmer": "km",
"kinyarwanda": "rw",
"konkani": "gom",
"korean": "ko",
"krio": "kri",
"kurdish (kurmanji)": "ku",
"kurdish (sorani)": "ckb",
"kyrgyz": "ky",
"lao": "lo",
"latin": "la",
"latvian": "lv",
"lingala": "ln",
"lithuanian": "lt",
"luganda": "lg",
"luxembourgish": "lb",
"macedonian": "mk",
"maithili": "mai",
"malagasy": "mg",
"malay": "ms",
"malayalam": "ml",
"maltese": "mt",
"maori": "mi",
"marathi": "mr",
"meiteilon (manipuri)": "mni-Mtei",
"mizo": "lus",
"mongolian": "mn",
"myanmar": "my",
"nepali": "ne",
"norwegian": "no",
"odia (oriya)": "or",
"oromo": "om",
"pashto": "ps",
"persian": "fa",
"polish": "pl",
"portuguese": "pt",
"punjabi": "pa",
"quechua": "qu",
"romanian": "ro",
"russian": "ru",
"samoan": "sm",
"sanskrit": "sa",
"scots gaelic": "gd",
"sepedi": "nso",
"serbian": "sr",
"sesotho": "st",
"shona": "sn",
"sindhi": "sd",
"sinhala": "si",
"slovak": "sk",
"slovenian": "sl",
"somali": "so",
"spanish": "es",
"sundanese": "su",
"swahili": "sw",
"swedish": "sv",
"tajik": "tg",
"tamil": "ta",
"tatar": "tt",
"telugu": "te",
"thai": "th",
"tigrinya": "ti",
"tsonga": "ts",
"turkish": "tr",
"turkmen": "tk",
"twi": "ak",
"ukrainian": "uk",
"urdu": "ur",
"uyghur": "ug",
"uzbek": "uz",
"vietnamese": "vi",
"welsh": "cy",
"xhosa": "xh",
"yiddish": "yi",
"yoruba": "yo",
"zulu": "zu",
}
PONS_CODES_TO_LANGUAGES = {
"ar": "arabic",
"bg": "bulgarian",
"zh-cn": "chinese",
"cs": "czech",
"da": "danish",
"nl": "dutch",
"en": "english",
"fr": "french",
"de": "german",
"el": "greek",
"hu": "hungarian",
"it": "italian",
"la": "latin",
"no": "norwegian",
"pl": "polish",
"pt": "portuguese",
"ru": "russian",
"sl": "slovenian",
"es": "spanish",
"sv": "swedish",
"tr": "turkish",
"elv": "elvish",
}
LINGUEE_LANGUAGES_TO_CODES = {
"maltese": "maltese",
"english": "english",
"german": "german",
"bulgarian": "bulgarian",
"polish": "polish",
"portuguese": "portuguese",
"hungarian": "hungarian",
"romanian": "romanian",
"russian": "russian",
# "serbian": "sr",
"dutch": "dutch",
"slovakian": "slovakian",
"greek": "greek",
"slovenian": "slovenian",
"danish": "danish",
"italian": "italian",
"spanish": "spanish",
"finnish": "finnish",
"chinese": "chinese",
"french": "french",
# "croatian": "hr",
"czech": "czech",
"laotian": "laotian",
"swedish": "swedish",
"latvian": "latvian",
"estonian": "estonian",
"japanese": "japanese",
}
MY_MEMORY_LANGUAGES_TO_CODES = {
"acehnese": "ace-ID",
"afrikaans": "af-ZA",
"akan": "ak-GH",
"albanian": "sq-AL",
"amharic": "am-ET",
"antigua and barbuda creole english": "aig-AG",
"arabic": "ar-SA",
"arabic egyptian": "ar-EG",
"aragonese": "an-ES",
"armenian": "hy-AM",
"assamese": "as-IN",
"asturian": "ast-ES",
"austrian german": "de-AT",
"awadhi": "awa-IN",
"ayacucho quechua": "quy-PE",
"azerbaijani": "az-AZ",
"bahamas creole english": "bah-BS",
"bajan": "bjs-BB",
"balinese": "ban-ID",
"balkan gipsy": "rm-RO",
"bambara": "bm-ML",
"banjar": "bjn-ID",
"bashkir": "ba-RU",
"basque": "eu-ES",
"belarusian": "be-BY",
"belgian french": "fr-BE",
"bemba": "bem-ZM",
"bengali": "bn-IN",
"bhojpuri": "bho-IN",
"bihari": "bh-IN",
"bislama": "bi-VU",
"borana": "gax-KE",
"bosnian": "bs-BA",
"bosnian (cyrillic)": "bs-Cyrl-BA",
"breton": "br-FR",
"buginese": "bug-ID",
"bulgarian": "bg-BG",
"burmese": "my-MM",
"catalan": "ca-ES",
"catalan valencian": "cav-ES",
"cebuano": "ceb-PH",
"central atlas tamazight": "tzm-MA",
"central aymara": "ayr-BO",
"central kanuri (latin script)": "knc-NG",
"chadian arabic": "shu-TD",
"chamorro": "ch-GU",
"cherokee": "chr-US",
"chhattisgarhi": "hne-IN",
"chinese simplified": "zh-CN",
"chinese trad. (hong kong)": "zh-HK",
"chinese traditional": "zh-TW",
"chinese traditional macau": "zh-MO",
"chittagonian": "ctg-BD",
"chokwe": "cjk-AO",
"classical greek": "grc-GR",
"comorian ngazidja": "zdj-KM",
"coptic": "cop-EG",
"crimean tatar": "crh-RU",
"crioulo upper guinea": "pov-GW",
"croatian": "hr-HR",
"czech": "cs-CZ",
"danish": "da-DK",
"dari": "prs-AF",
"dimli": "diq-TR",
"dutch": "nl-NL",
"dyula": "dyu-CI",
"dzongkha": "dz-BT",
"eastern yiddish": "ydd-US",
"emakhuwa": "vmw-MZ",
"english": "en-GB",
"english australia": "en-AU",
"english canada": "en-CA",
"english india": "en-IN",
"english ireland": "en-IE",
"english new zealand": "en-NZ",
"english singapore": "en-SG",
"english south africa": "en-ZA",
"english us": "en-US",
"esperanto": "eo-EU",
"estonian": "et-EE",
"ewe": "ee-GH",
"fanagalo": "fn-FNG",
"faroese": "fo-FO",
"fijian": "fj-FJ",
"filipino": "fil-PH",
"finnish": "fi-FI",
"flemish": "nl-BE",
"fon": "fon-BJ",
"french": "fr-FR",
"french canada": "fr-CA",
"french swiss": "fr-CH",
"friulian": "fur-IT",
"fula": "ff-FUL",
"galician": "gl-ES",
"gamargu": "mfi-NG",
"garo": "grt-IN",
"georgian": "ka-GE",
"german": "de-DE",
"gilbertese": "gil-KI",
"glavda": "glw-NG",
"greek": "el-GR",
"grenadian creole english": "gcl-GD",
"guarani": "gn-PY",
"gujarati": "gu-IN",
"guyanese creole english": "gyn-GY",
"haitian creole french": "ht-HT",
"halh mongolian": "khk-MN",
"hausa": "ha-NE",
"hawaiian": "haw-US",
"hebrew": "he-IL",
"higi": "hig-NG",
"hiligaynon": "hil-PH",
"hill mari": "mrj-RU",
"hindi": "hi-IN",
"hmong": "hmn-CN",
"hungarian": "hu-HU",
"icelandic": "is-IS",
"igbo ibo": "ibo-NG",
"igbo ig": "ig-NG",
"ilocano": "ilo-PH",
"indonesian": "id-ID",
"inuktitut greenlandic": "kl-GL",
"irish gaelic": "ga-IE",
"italian": "it-IT",
"italian swiss": "it-CH",
"jamaican creole english": "jam-JM",
"japanese": "ja-JP",
"javanese": "jv-ID",
"jingpho": "kac-MM",
"k'iche'": "quc-GT",
"kabiyè": "kbp-TG",
"kabuverdianu": "kea-CV",
"kabylian": "kab-DZ",
"kalenjin": "kln-KE",
"kamba": "kam-KE",
"kannada": "kn-IN",
"kanuri": "kr-KAU",
"karen": "kar-MM",
"kashmiri (devanagari script)": "ks-IN",
"kashmiri (arabic script)": "kas-IN",
"kazakh": "kk-KZ",
"khasi": "kha-IN",
"khmer": "km-KH",
"kikuyu kik": "kik-KE",
"kikuyu ki": "ki-KE",
"kimbundu": "kmb-AO",
"kinyarwanda": "rw-RW",
"kirundi": "rn-BI",
"kisii": "guz-KE",
"kongo": "kg-CG",
"konkani": "kok-IN",
"korean": "ko-KR",
"northern kurdish": "kmr-TR",
"kurdish sorani": "ckb-IQ",
"kyrgyz": "ky-KG",
"lao": "lo-LA",
"latgalian": "ltg-LV",
"latin": "la-XN",
"latvian": "lv-LV",
"ligurian": "lij-IT",
"limburgish": "li-NL",
"lingala": "ln-LIN",
"lithuanian": "lt-LT",
"lombard": "lmo-IT",
"luba-kasai": "lua-CD",
"luganda": "lg-UG",
"luhya": "luy-KE",
"luo": "luo-KE",
"luxembourgish": "lb-LU",
"maa": "mas-KE",
"macedonian": "mk-MK",
"magahi": "mag-IN",
"maithili": "mai-IN",
"malagasy": "mg-MG",
"malay": "ms-MY",
"malayalam": "ml-IN",
"maldivian": "dv-MV",
"maltese": "mt-MT",
"mandara": "mfi-CM",
"manipuri": "mni-IN",
"manx gaelic": "gv-IM",
"maori": "mi-NZ",
"marathi": "mr-IN",
"margi": "mrt-NG",
"mari": "mhr-RU",
"marshallese": "mh-MH",
"mende": "men-SL",
"meru": "mer-KE",
"mijikenda": "nyf-KE",
"minangkabau": "min-ID",
"mizo": "lus-IN",
"mongolian": "mn-MN",
"montenegrin": "sr-ME",
"morisyen": "mfe-MU",
"moroccan arabic": "ar-MA",
"mossi": "mos-BF",
"ndau": "ndc-MZ",
"ndebele": "nr-ZA",
"nepali": "ne-NP",
"nigerian fulfulde": "fuv-NG",
"niuean": "niu-NU",
"north azerbaijani": "azj-AZ",
"sesotho": "nso-ZA",
"northern uzbek": "uzn-UZ",
"norwegian bokmål": "nb-NO",
"norwegian nynorsk": "nn-NO",
"nuer": "nus-SS",
"nyanja": "ny-MW",
"occitan": "oc-FR",
"occitan aran": "oc-ES",
"odia": "or-IN",
"oriya": "ory-IN",
"urdu": "ur-PK",
"palauan": "pau-PW",
"pali": "pi-IN",
"pangasinan": "pag-PH",
"papiamentu": "pap-CW",
"pashto": "ps-PK",
"persian": "fa-IR",
"pijin": "pis-SB",
"plateau malagasy": "plt-MG",
"polish": "pl-PL",
"portuguese": "pt-PT",
"portuguese brazil": "pt-BR",
"potawatomi": "pot-US",
"punjabi": "pa-IN",
"punjabi (pakistan)": "pnb-PK",
"quechua": "qu-PE",
"rohingya": "rhg-MM",
"rohingyalish": "rhl-MM",
"romanian": "ro-RO",
"romansh": "roh-CH",
"rundi": "run-BI",
"russian": "ru-RU",
"saint lucian creole french": "acf-LC",
"samoan": "sm-WS",
"sango": "sg-CF",
"sanskrit": "sa-IN",
"santali": "sat-IN",
"sardinian": "sc-IT",
"scots gaelic": "gd-GB",
"sena": "seh-ZW",
"serbian cyrillic": "sr-Cyrl-RS",
"serbian latin": "sr-Latn-RS",
"seselwa creole french": "crs-SC",
"setswana (south africa)": "tn-ZA",
"shan": "shn-MM",
"shona": "sn-ZW",
"sicilian": "scn-IT",
"silesian": "szl-PL",
"sindhi snd": "snd-PK",
"sindhi sd": "sd-PK",
"sinhala": "si-LK",
"slovak": "sk-SK",
"slovenian": "sl-SI",
"somali": "so-SO",
"sotho southern": "st-LS",
"south azerbaijani": "azb-AZ",
"southern pashto": "pbt-PK",
"southwestern dinka": "dik-SS",
"spanish": "es-ES",
"spanish argentina": "es-AR",
"spanish colombia": "es-CO",
"spanish latin america": "es-419",
"spanish mexico": "es-MX",
"spanish united states": "es-US",
"sranan tongo": "srn-SR",
"standard latvian": "lvs-LV",
"standard malay": "zsm-MY",
"sundanese": "su-ID",
"swahili": "sw-KE",
"swati": "ss-SZ",
"swedish": "sv-SE",
"swiss german": "de-CH",
"syriac (aramaic)": "syc-TR",
"tagalog": "tl-PH",
"tahitian": "ty-PF",
"tajik": "tg-TJ",
"tamashek (tuareg)": "tmh-DZ",
"tamasheq": "taq-ML",
"tamil india": "ta-IN",
"tamil sri lanka": "ta-LK",
"taroko": "trv-TW",
"tatar": "tt-RU",
"telugu": "te-IN",
"tetum": "tet-TL",
"thai": "th-TH",
"tibetan": "bo-CN",
"tigrinya": "ti-ET",
"tok pisin": "tpi-PG",
"tokelauan": "tkl-TK",
"tongan": "to-TO",
"tosk albanian": "als-AL",
"tsonga": "ts-ZA",
"tswa": "tsc-MZ",
"tswana": "tn-BW",
"tumbuka": "tum-MW",
"turkish": "tr-TR",
"turkmen": "tk-TM",
"tuvaluan": "tvl-TV",
"twi": "tw-GH",
"udmurt": "udm-RU",
"ukrainian": "uk-UA",
"uma": "ppk-ID",
"umbundu": "umb-AO",
"uyghur uig": "uig-CN",
"uyghur ug": "ug-CN",
"uzbek": "uz-UZ",
"venetian": "vec-IT",
"vietnamese": "vi-VN",
"vincentian creole english": "svc-VC",
"virgin islands creole english": "vic-US",
"wallisian": "wls-WF",
"waray (philippines)": "war-PH",
"welsh": "cy-GB",
"west central oromo": "gaz-ET",
"western persian": "pes-IR",
"wolof": "wo-SN",
"xhosa": "xh-ZA",
"yiddish": "yi-YD",
"yoruba": "yo-NG",
"zulu": "zu-ZA",
}
DEEPL_LANGUAGE_TO_CODE = {
"bulgarian": "bg",
"czech": "cs",
"danish": "da",
"german": "de",
"greek": "el",
"english": "en",
"spanish": "es",
"estonian": "et",
"finnish": "fi",
"french": "fr",
"hungarian": "hu",
"indonesian": "id",
"italian": "it",
"japanese": "ja",
"lithuanian": "lt",
"latvian": "lv",
"dutch": "nl",
"polish": "pl",
"portuguese": "pt",
"romanian": "ro",
"russian": "ru",
"slovak": "sk",
"slovenian": "sl",
"swedish": "sv",
"turkish": "tr",
"ukrainian": "uk",
"chinese": "zh",
}
PAPAGO_LANGUAGE_TO_CODE = {
"ko": "Korean",
"en": "English",
"ja": "Japanese",
"zh-CN": "Chinese",
"zh-TW": "Chinese traditional",
"es": "Spanish",
"fr": "French",
"vi": "Vietnamese",
"th": "Thai",
"id": "Indonesia",
}
QCRI_LANGUAGE_TO_CODE = {"Arabic": "ar", "English": "en", "Spanish": "es"}
LIBRE_LANGUAGES_TO_CODES = {
"English": "en",
"Arabic": "ar",
"Chinese": "zh",
"French": "fr",
"German": "de",
"Hindi": "hi",
"Indonesian": "id",
"Irish": "ga",
"Italian": "it",
"Japanese": "ja",
"Korean": "ko",
"Polish": "pl",
"Portuguese": "pt",
"Russian": "ru",
"Spanish": "es",
"Turkish": "tr",
"Vietnamese": "vi",
}
BAIDU_LANGUAGE_TO_CODE = {
"arabic": "ara",
"bulgarian": "bul",
"chinese (classical)": "wyw",
"chinese (simplified)": "zh",
"chinese (traditional)": "cht",
"czech": "cs",
"danish": "dan",
"dutch": "nl",
"english": "en",
"estonian": "est",
"finnish": "fin",
"french": "fra",
"german": "de",
"greek": "el",
"hungarian": "hu",
"italian": "it",
"japanese": "jp",
"korean": "kor",
"polish": "pl",
"portuguese": "pt",
"romanian": "ro",
"russian": "ru",
"slovenian": "slo",
"spanish": "spa",
"swedish": "swe",
"thai": "th",
"vietnamese": "vie",
"yueyu": "yue",
}

View file

@ -0,0 +1,111 @@
__copyright__ = "Copyright (C) 2020 Nidhal Baccouri"
import os
from typing import List, Optional
import requests
from deep_translator.base import BaseTranslator
from deep_translator.constants import (
BASE_URLS,
DEEPL_ENV_VAR,
DEEPL_LANGUAGE_TO_CODE,
)
from deep_translator.exceptions import (
ApiKeyException,
AuthorizationException,
ServerException,
TranslationNotFound,
)
from deep_translator.validate import is_empty, is_input_valid, request_failed
class DeeplTranslator(BaseTranslator):
"""
class that wraps functions, which use the DeeplTranslator translator
under the hood to translate word(s)
"""
def __init__(
self,
source: str = "de",
target: str = "en",
api_key: Optional[str] = os.getenv(DEEPL_ENV_VAR, None),
use_free_api: bool = True,
**kwargs
):
"""
@param api_key: your DeeplTranslator api key.
Get one here: https://www.deepl.com/docs-api/accessing-the-api/
@param source: source language
@param target: target language
"""
if not api_key:
raise ApiKeyException(env_var=DEEPL_ENV_VAR)
self.version = "v2"
self.api_key = api_key
url = (
BASE_URLS.get("DEEPL_FREE").format(version=self.version)
if use_free_api
else BASE_URLS.get("DEEPL").format(version=self.version)
)
super().__init__(
base_url=url,
source=source,
target=target,
languages=DEEPL_LANGUAGE_TO_CODE,
**kwargs
)
def translate(self, text: str, **kwargs) -> str:
"""
@param text: text to translate
@return: translated text
"""
if is_input_valid(text):
if self._same_source_target() or is_empty(text):
return text
# Create the request parameters.
translate_endpoint = "translate"
params = {
"auth_key": self.api_key,
"source_lang": self._source,
"target_lang": self._target,
"text": text,
}
# Do the request and check the connection.
try:
response = requests.get(
self._base_url + translate_endpoint, params=params
)
except ConnectionError:
raise ServerException(503)
# If the answer is not success, raise server exception.
if response.status_code == 403:
raise AuthorizationException(self.api_key)
if request_failed(status_code=response.status_code):
raise ServerException(response.status_code)
# Get the response and check is not empty.
res = response.json()
if not res:
raise TranslationNotFound(text)
# Process and return the response.
return res["translations"][0]["text"]
def translate_file(self, path: str, **kwargs) -> str:
return self._translate_file(path, **kwargs)
def translate_batch(self, batch: List[str], **kwargs) -> List[str]:
"""
@param batch: list of texts to translate
@return: list of translations
"""
return self._translate_batch(batch, **kwargs)
if __name__ == "__main__":
d = DeeplTranslator(target="en", api_key="some-key")
t = d.translate("Ich habe keine ahnung")
print("text: ", t)

View file

@ -0,0 +1,102 @@
"""
language detection API
"""
__copyright__ = "Copyright (C) 2020 Nidhal Baccouri"
from typing import List, Optional, Union
import requests
from requests.exceptions import HTTPError
# Module global config
config = {
"url": "https://ws.detectlanguage.com/0.2/detect",
"headers": {
"User-Agent": "Detect Language API Python Client 1.4.0",
"Authorization": "Bearer {}",
},
}
def get_request_body(
text: Union[str, List[str]], api_key: str, *args, **kwargs
):
"""
send a request and return the response body parsed as dictionary
@param text: target text that you want to detect its language
@type text: str
@type api_key: str
@param api_key: your private API key
"""
if not api_key:
raise Exception(
"you need to get an API_KEY for this to work. "
"Get one for free here: https://detectlanguage.com/documentation"
)
if not text:
raise Exception("Please provide an input text")
else:
try:
headers = config["headers"]
headers["Authorization"] = headers["Authorization"].format(api_key)
response = requests.post(
config["url"], json={"q": text}, headers=headers
)
body = response.json().get("data")
return body
except HTTPError as e:
print("Error occured while requesting from server: ", e.args)
raise e
def single_detection(
text: str,
api_key: Optional[str] = None,
detailed: bool = False,
*args,
**kwargs
):
"""
function responsible for detecting the language from a text
@param text: target text that you want to detect its language
@type text: str
@type api_key: str
@param api_key: your private API key
@param detailed: set to True if you want to get detailed
information about the detection process
"""
body = get_request_body(text, api_key)
detections = body.get("detections")
if detailed:
return detections[0]
lang = detections[0].get("language", None)
if lang:
return lang
def batch_detection(
text_list: List[str], api_key: str, detailed: bool = False, *args, **kwargs
):
"""
function responsible for detecting the language from a text
@param text_list: target batch that you want to detect its language
@param api_key: your private API key
@param detailed: set to True if you want to
get detailed information about the detection process
"""
body = get_request_body(text_list, api_key)
detections = body.get("detections")
res = [obj[0] for obj in detections]
if detailed:
return res
else:
return [obj["language"] for obj in res]

View file

@ -0,0 +1,8 @@
__copyright__ = "Copyright (C) 2020 Nidhal Baccouri"
from deep_translator.base import BaseTranslator
__engines__ = {
translator.__name__.replace("Translator", "").lower(): translator
for translator in BaseTranslator.__subclasses__()
}

View file

@ -0,0 +1,195 @@
__copyright__ = "Copyright (C) 2020 Nidhal Baccouri"
class BaseError(Exception):
"""
base error structure class
"""
def __init__(self, val, message):
"""
@param val: actual value
@param message: message shown to the user
"""
self.val = val
self.message = message
super().__init__()
def __str__(self):
return "{} --> {}".format(self.val, self.message)
class LanguageNotSupportedException(BaseError):
"""
exception thrown if the user uses a language
that is not supported by the deep_translator
"""
def __init__(
self, val, message="There is no support for the chosen language"
):
super().__init__(val, message)
class NotValidPayload(BaseError):
"""
exception thrown if the user enters an invalid payload
"""
def __init__(
self,
val,
message="text must be a valid text with maximum 5000 character,"
"otherwise it cannot be translated",
):
super(NotValidPayload, self).__init__(val, message)
class InvalidSourceOrTargetLanguage(BaseError):
"""
exception thrown if the user enters an invalid payload
"""
def __init__(self, val, message="Invalid source or target language!"):
super(InvalidSourceOrTargetLanguage, self).__init__(val, message)
class TranslationNotFound(BaseError):
"""
exception thrown if no translation was found for the text provided by the user
"""
def __init__(
self,
val,
message="No translation was found using the current translator. Try another translator?",
):
super(TranslationNotFound, self).__init__(val, message)
class ElementNotFoundInGetRequest(BaseError):
"""
exception thrown if the html element was not found in the body parsed by beautifulsoup
"""
def __init__(
self, val, message="Required element was not found in the API response"
):
super(ElementNotFoundInGetRequest, self).__init__(val, message)
class NotValidLength(BaseError):
"""
exception thrown if the provided text exceed the length limit of the translator
"""
def __init__(self, val, min_chars, max_chars):
message = f"Text length need to be between {min_chars} and {max_chars} characters"
super(NotValidLength, self).__init__(val, message)
class RequestError(Exception):
"""
exception thrown if an error occurred during the request call, e.g a connection problem.
"""
def __init__(
self,
message="Request exception can happen due to an api connection error. "
"Please check your connection and try again",
):
self.message = message
def __str__(self):
return self.message
class MicrosoftAPIerror(Exception):
"""
exception thrown if Microsoft API returns one of its errors
"""
def __init__(self, api_message):
self.api_message = str(api_message)
self.message = "Microsoft API returned the following error"
def __str__(self):
return "{}: {}".format(self.message, self.api_message)
class TooManyRequests(Exception):
"""
exception thrown if an error occurred during the request call, e.g a connection problem.
"""
def __init__(
self,
message="Server Error: You made too many requests to the server."
"According to google, you are allowed to make 5 requests per second"
"and up to 200k requests per day. You can wait and try again later or"
"you can try the translate_batch function",
):
self.message = message
def __str__(self):
return self.message
class ServerException(Exception):
"""
Default YandexTranslate exception from the official website
"""
errors = {
400: "ERR_BAD_REQUEST",
401: "ERR_KEY_INVALID",
402: "ERR_KEY_BLOCKED",
403: "ERR_DAILY_REQ_LIMIT_EXCEEDED",
404: "ERR_DAILY_CHAR_LIMIT_EXCEEDED",
413: "ERR_TEXT_TOO_LONG",
429: "ERR_TOO_MANY_REQUESTS",
422: "ERR_UNPROCESSABLE_TEXT",
500: "ERR_INTERNAL_SERVER_ERROR",
501: "ERR_LANG_NOT_SUPPORTED",
503: "ERR_SERVICE_NOT_AVAIBLE",
}
def __init__(self, status_code, *args):
message = self.errors.get(status_code, "API server error")
super(ServerException, self).__init__(message, *args)
class ApiKeyException(BaseError):
"""
exception thrown if no ApiKey was provided
"""
def __init__(self, env_var):
msg = f"""
You have to pass your api_key!
You can do this by passing the key as a parameter/argument to the translator class
or by setting the environment variable {env_var}
Example: export {env_var}="your_api_key"
"""
super().__init__(None, msg)
class AuthorizationException(Exception):
def __init__(self, api_key, *args):
msg = "Unauthorized access with the api key " + api_key
super().__init__(msg, *args)
class BaiduAPIerror(Exception):
"""
exception thrown if Baidu API returns one of its errors
"""
def __init__(self, api_message):
self.api_message = str(api_message)
self.message = "Baidu API returned the following error"
def __str__(self):
return "{}: {}".format(self.message, self.api_message)

View file

@ -0,0 +1,122 @@
"""
google translator API
"""
__copyright__ = "Copyright (C) 2020 Nidhal Baccouri"
from typing import List, Optional
import requests
from bs4 import BeautifulSoup
from deep_translator.base import BaseTranslator
from deep_translator.constants import BASE_URLS
from deep_translator.exceptions import (
RequestError,
TooManyRequests,
TranslationNotFound,
)
from deep_translator.validate import is_empty, is_input_valid, request_failed
class GoogleTranslator(BaseTranslator):
"""
class that wraps functions, which use Google Translate under the hood to translate text(s)
"""
def __init__(
self,
source: str = "auto",
target: str = "en",
proxies: Optional[dict] = None,
**kwargs
):
"""
@param source: source language to translate from
@param target: target language to translate to
"""
self.proxies = proxies
super().__init__(
base_url=BASE_URLS.get("GOOGLE_TRANSLATE"),
source=source,
target=target,
element_tag="div",
element_query={"class": "t0"},
payload_key="q", # key of text in the url
**kwargs
)
self._alt_element_query = {"class": "result-container"}
def translate(self, text: str, **kwargs) -> str:
"""
function to translate a text
@param text: desired text to translate
@return: str: translated text
"""
if is_input_valid(text, max_chars=5000):
text = text.strip()
if self._same_source_target() or is_empty(text):
return text
self._url_params["tl"] = self._target
self._url_params["sl"] = self._source
if self.payload_key:
self._url_params[self.payload_key] = text
response = requests.get(
self._base_url, params=self._url_params, proxies=self.proxies
)
if response.status_code == 429:
raise TooManyRequests()
if request_failed(status_code=response.status_code):
raise RequestError()
soup = BeautifulSoup(response.text, "html.parser")
element = soup.find(self._element_tag, self._element_query)
response.close()
if not element:
element = soup.find(self._element_tag, self._alt_element_query)
if not element:
raise TranslationNotFound(text)
if element.get_text(strip=True) == text.strip():
to_translate_alpha = "".join(
ch for ch in text.strip() if ch.isalnum()
)
translated_alpha = "".join(
ch for ch in element.get_text(strip=True) if ch.isalnum()
)
if (
to_translate_alpha
and translated_alpha
and to_translate_alpha == translated_alpha
):
self._url_params["tl"] = self._target
if "hl" not in self._url_params:
return text.strip()
del self._url_params["hl"]
return self.translate(text)
else:
return element.get_text(strip=True)
def translate_file(self, path: str, **kwargs) -> str:
"""
translate directly from file
@param path: path to the target file
@type path: str
@param kwargs: additional args
@return: str
"""
return self._translate_file(path, **kwargs)
def translate_batch(self, batch: List[str], **kwargs) -> List[str]:
"""
translate a list of texts
@param batch: list of texts you want to translate
@return: list of translations
"""
return self._translate_batch(batch, **kwargs)

View file

@ -0,0 +1,123 @@
"""
LibreTranslate API
"""
__copyright__ = "Copyright (C) 2020 Nidhal Baccouri"
import os
from typing import List, Optional
import requests
from deep_translator.base import BaseTranslator
from deep_translator.constants import (
BASE_URLS,
LIBRE_ENV_VAR,
LIBRE_LANGUAGES_TO_CODES,
)
from deep_translator.exceptions import (
ApiKeyException,
AuthorizationException,
ServerException,
TranslationNotFound,
)
from deep_translator.validate import is_empty, is_input_valid, request_failed
class LibreTranslator(BaseTranslator):
"""
class that wraps functions, which use libre translator under the hood to translate text(s)
"""
def __init__(
self,
source: str = "en",
target: str = "es",
api_key: Optional[str] = os.getenv(LIBRE_ENV_VAR, None),
use_free_api: bool = True,
custom_url: Optional[str] = None,
**kwargs
):
"""
@param api_key: your api key
@param source: source language to translate from
List of LibreTranslate endpoint can be found at :
https://github.com/LibreTranslate/LibreTranslate#mirrors
Some require an API key
@param target: target language to translate to
@param use_free_api: set True if you want to use the free api.
This means a url that does not require and api key would be used
@param custom_url: you can use a custom endpoint
"""
if not api_key:
raise ApiKeyException(env_var=LIBRE_ENV_VAR)
self.api_key = api_key
url = (
BASE_URLS.get("LIBRE")
if not use_free_api
else BASE_URLS.get("LIBRE_FREE")
)
super().__init__(
base_url=url if not custom_url else custom_url,
source=source,
target=target,
languages=LIBRE_LANGUAGES_TO_CODES,
)
def translate(self, text: str, **kwargs) -> str:
"""
function that uses microsoft translate to translate a text
@param text: desired text to translate
@return: str: translated text
"""
if is_input_valid(text):
if self._same_source_target() or is_empty(text):
return text
translate_endpoint = "translate"
params = {
"q": text,
"source": self._source,
"target": self._target,
"format": "text",
}
# Add API Key if required
if self.api_key:
params["api_key"] = self.api_key
# Do the request and check the connection.
try:
response = requests.post(
self._base_url + translate_endpoint, params=params
)
except ConnectionError:
raise ServerException(503)
# If the answer is not success, raise server exception.
if response.status_code == 403:
raise AuthorizationException(self.api_key)
elif request_failed(status_code=response.status_code):
raise ServerException(response.status_code)
# Get the response and check is not empty.
res = response.json()
if not res:
raise TranslationNotFound(text)
# Process and return the response.
return res["translatedText"]
def translate_file(self, path: str, **kwargs) -> str:
"""
translate directly from file
@param path: path to the target file
@type path: str
@param kwargs: additional args
@return: str
"""
return self._translate_file(path, **kwargs)
def translate_batch(self, batch: List[str], **kwargs) -> List[str]:
"""
translate a list of texts
@param batch: list of texts you want to translate
@return: list of translations
"""
return self._translate_batch(batch, **kwargs)

View file

@ -0,0 +1,115 @@
"""
linguee translator API
"""
__copyright__ = "Copyright (C) 2020 Nidhal Baccouri"
from typing import List, Optional, Union
import requests
from bs4 import BeautifulSoup
from requests.utils import requote_uri
from deep_translator.base import BaseTranslator
from deep_translator.constants import BASE_URLS, LINGUEE_LANGUAGES_TO_CODES
from deep_translator.exceptions import (
ElementNotFoundInGetRequest,
NotValidPayload,
RequestError,
TooManyRequests,
TranslationNotFound,
)
from deep_translator.validate import is_empty, is_input_valid, request_failed
class LingueeTranslator(BaseTranslator):
"""
class that wraps functions, which use the linguee translator under the hood to translate word(s)
"""
def __init__(
self,
source: str = "en",
target: str = "de",
proxies: Optional[dict] = None,
**kwargs,
):
"""
@param source: source language to translate from
@param target: target language to translate to
"""
self.proxies = proxies
super().__init__(
base_url=BASE_URLS.get("LINGUEE"),
source=source,
target=target,
languages=LINGUEE_LANGUAGES_TO_CODES,
element_tag="a",
element_query={"class": "dictLink featured"},
payload_key=None, # key of text in the url
)
def translate(
self, word: str, return_all: bool = False, **kwargs
) -> Union[str, List[str]]:
"""
function that uses linguee to translate a word
@param word: word to translate
@type word: str
@param return_all: set to True to return all synonym of the translated word
@type return_all: bool
@return: str: translated word
"""
if self._same_source_target() or is_empty(word):
return word
if is_input_valid(word, max_chars=50):
# %s-%s/translation/%s.html
url = f"{self._base_url}{self._source}-{self._target}/search/?source={self._source}&query={word}"
url = requote_uri(url)
response = requests.get(url, proxies=self.proxies)
if response.status_code == 429:
raise TooManyRequests()
if request_failed(status_code=response.status_code):
raise RequestError()
soup = BeautifulSoup(response.text, "html.parser")
elements = soup.find_all(self._element_tag, self._element_query)
response.close()
if not elements:
raise ElementNotFoundInGetRequest(elements)
filtered_elements = []
for el in elements:
try:
pronoun = el.find(
"span", {"class": "placeholder"}
).get_text(strip=True)
except AttributeError:
pronoun = ""
filtered_elements.append(
el.get_text(strip=True).replace(pronoun, "")
)
if not filtered_elements:
raise TranslationNotFound(word)
return filtered_elements if return_all else filtered_elements[0]
def translate_words(self, words: List[str], **kwargs) -> List[str]:
"""
translate a batch of words together by providing them in a list
@param words: list of words you want to translate
@param kwargs: additional args
@return: list of translated words
"""
if not words:
raise NotValidPayload(words)
translated_words = []
for word in words:
translated_words.append(self.translate(word=word, **kwargs))
return translated_words

View file

@ -0,0 +1,129 @@
# -*- coding: utf-8 -*-
__copyright__ = "Copyright (C) 2020 Nidhal Baccouri"
import logging
import os
import sys
from typing import List, Optional
import requests
from deep_translator.base import BaseTranslator
from deep_translator.constants import BASE_URLS, MSFT_ENV_VAR
from deep_translator.exceptions import ApiKeyException, MicrosoftAPIerror
from deep_translator.validate import is_input_valid
class MicrosoftTranslator(BaseTranslator):
"""
the class that wraps functions, which use the Microsoft translator under the hood to translate word(s)
"""
def __init__(
self,
source: str = "auto",
target: str = "en",
api_key: Optional[str] = os.getenv(MSFT_ENV_VAR, None),
region: Optional[str] = None,
proxies: Optional[dict] = None,
**kwargs,
):
"""
@params api_key and target are the required params
@param api_key: your Microsoft API key
@param region: your Microsoft Location
"""
if not api_key:
raise ApiKeyException(env_var=MSFT_ENV_VAR)
self.api_key = api_key
self.proxies = proxies
self.headers = {
"Ocp-Apim-Subscription-Key": self.api_key,
"Content-type": "application/json",
}
# parameter region is not required but very common and goes to headers if passed
if region:
self.region = region
self.headers["Ocp-Apim-Subscription-Region"] = self.region
super().__init__(
base_url=BASE_URLS.get("MICROSOFT_TRANSLATE"),
source=source,
target=target,
languages=self._get_supported_languages(),
**kwargs,
)
# this function get the actual supported languages of the msft translator and store them in a dict, where
# the keys are the abbreviations and the values are the languages
# a common variable used in the other translators would be: MICROSOFT_CODES_TO_LANGUAGES
def _get_supported_languages(self):
microsoft_languages_api_url = (
"https://api.cognitive.microsofttranslator.com/languages?api-version=3.0&scope"
"=translation "
)
microsoft_languages_response = requests.get(
microsoft_languages_api_url
)
translation_dict = microsoft_languages_response.json()["translation"]
return {
translation_dict[k]["name"].lower(): k.lower()
for k in translation_dict.keys()
}
def translate(self, text: str, **kwargs) -> str:
"""
function that uses microsoft translate to translate a text
@param text: desired text to translate
@return: str: translated text
"""
# a body must be a list of dicts to process multiple texts;
# I have not added multiple text processing here since it is covered by the translate_batch method
response = None
if is_input_valid(text):
self._url_params["from"] = self._source
self._url_params["to"] = self._target
valid_microsoft_json = [{"text": text}]
try:
response = requests.post(
self._base_url,
params=self._url_params,
headers=self.headers,
json=valid_microsoft_json,
proxies=self.proxies,
)
except requests.exceptions.RequestException:
exc_type, value, traceback = sys.exc_info()
logging.warning(f"Returned error: {exc_type.__name__}")
# Where Microsoft API responds with an api error, it returns a dict in response.json()
if type(response.json()) is dict:
error_message = response.json()["error"]
raise MicrosoftAPIerror(error_message)
# Where it responds with a translation, its response.json() is a list
# e.g. [{'translations': [{'text':'Hello world!', 'to': 'en'}]}]
elif type(response.json()) is list:
all_translations = [
i["text"] for i in response.json()[0]["translations"]
]
return "\n".join(all_translations)
def translate_file(self, path: str, **kwargs) -> str:
"""
translate from a file
@param path: path to file
@return: translated text
"""
return self._translate_file(path, **kwargs)
def translate_batch(self, batch: List[str], **kwargs) -> List[str]:
"""
translate a batch of texts
@param batch: list of texts to translate
@return: list of translations
"""
return self._translate_batch(batch, **kwargs)

View file

@ -0,0 +1,113 @@
"""
mymemory translator API
"""
__copyright__ = "Copyright (C) 2020 Nidhal Baccouri"
from typing import List, Optional, Union
import requests
from deep_translator.base import BaseTranslator
from deep_translator.constants import BASE_URLS, MY_MEMORY_LANGUAGES_TO_CODES
from deep_translator.exceptions import (
RequestError,
TooManyRequests,
TranslationNotFound,
)
from deep_translator.validate import is_empty, is_input_valid, request_failed
class MyMemoryTranslator(BaseTranslator):
"""
class that uses the mymemory translator to translate texts
"""
def __init__(
self,
source: str = "auto",
target: str = "en",
proxies: Optional[dict] = None,
**kwargs,
):
"""
@param source: source language to translate from
@param target: target language to translate to
"""
self.proxies = proxies
self.email = kwargs.get("email", None)
super().__init__(
base_url=BASE_URLS.get("MYMEMORY"),
source=source,
target=target,
payload_key="q",
languages=MY_MEMORY_LANGUAGES_TO_CODES,
)
def translate(
self, text: str, return_all: bool = False, **kwargs
) -> Union[str, List[str]]:
"""
function that uses the mymemory translator to translate a text
@param text: desired text to translate
@type text: str
@param return_all: set to True to return all synonym/similars of the translated text
@return: str or list
"""
if is_input_valid(text, max_chars=500):
text = text.strip()
if self._same_source_target() or is_empty(text):
return text
self._url_params["langpair"] = f"{self._source}|{self._target}"
if self.payload_key:
self._url_params[self.payload_key] = text
if self.email:
self._url_params["de"] = self.email
response = requests.get(
self._base_url, params=self._url_params, proxies=self.proxies
)
if response.status_code == 429:
raise TooManyRequests()
if request_failed(status_code=response.status_code):
raise RequestError()
data = response.json()
if not data:
TranslationNotFound(text)
response.close()
translation = data.get("responseData").get("translatedText")
all_matches = data.get("matches", [])
if translation:
if not return_all:
return translation
else:
# append translation at the start of the matches list
return [translation] + list(all_matches)
elif not translation:
matches = (match["translation"] for match in all_matches)
next_match = next(matches)
return next_match if not return_all else list(all_matches)
def translate_file(self, path: str, **kwargs) -> str:
"""
translate directly from file
@param path: path to the target file
@type path: str
@param kwargs: additional args
@return: str
"""
return self._translate_file(path, **kwargs)
def translate_batch(self, batch: List[str], **kwargs) -> List[str]:
"""
translate a list of texts
@param batch: list of texts you want to translate
@return: list of translations
"""
return self._translate_batch(batch, **kwargs)

View file

@ -0,0 +1,101 @@
"""
papago translator API
"""
__copyright__ = "Copyright (C) 2020 Nidhal Baccouri"
import json
from typing import List, Optional
import requests
from deep_translator.base import BaseTranslator
from deep_translator.constants import BASE_URLS, PAPAGO_LANGUAGE_TO_CODE
from deep_translator.exceptions import TranslationNotFound
from deep_translator.validate import is_input_valid, request_failed
class PapagoTranslator(BaseTranslator):
"""
class that wraps functions, which use google translate under the hood to translate text(s)
"""
def __init__(
self,
client_id: Optional[str] = None,
secret_key: Optional[str] = None,
source: str = "auto",
target: str = "en",
**kwargs,
):
"""
@param source: source language to translate from
@param target: target language to translate to
"""
if not client_id or not secret_key:
raise Exception(
"Please pass your client id and secret key! visit the papago website for more infos"
)
self.client_id = client_id
self.secret_key = secret_key
super().__init__(
base_url=BASE_URLS.get("PAPAGO_API"),
source=source,
target=target,
languages=PAPAGO_LANGUAGE_TO_CODE,
**kwargs,
)
def translate(self, text: str, **kwargs) -> str:
"""
function that uses google translate to translate a text
@param text: desired text to translate
@return: str: translated text
"""
if is_input_valid(text):
payload = {
"source": self._source,
"target": self._target,
"text": text,
}
headers = {
"X-Naver-Client-Id": self.client_id,
"X-Naver-Client-Secret": self.secret_key,
"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
}
response = requests.post(
self._base_url, headers=headers, data=payload
)
if request_failed(status_code=response.status_code):
raise Exception(
f"Translation error! -> status code: {response.status_code}"
)
res_body = json.loads(response.text)
if "message" not in res_body:
raise TranslationNotFound(text)
msg = res_body.get("message")
result = msg.get("result", None)
if not result:
raise TranslationNotFound(text)
translated_text = result.get("translatedText")
return translated_text
def translate_file(self, path: str, **kwargs) -> str:
"""
translate directly from file
@param path: path to the target file
@type path: str
@param kwargs: additional args
@return: str
"""
return self._translate_file(path, **kwargs)
def translate_batch(self, batch: List[str], **kwargs) -> List[str]:
"""
translate a list of texts
@param batch: list of texts you want to translate
@return: list of translations
"""
return self._translate_batch(batch, **kwargs)

View file

@ -0,0 +1,118 @@
"""
pons translator API
"""
__copyright__ = "Copyright (C) 2020 Nidhal Baccouri"
from typing import List, Optional, Union
import requests
from bs4 import BeautifulSoup
from requests.utils import requote_uri
from deep_translator.base import BaseTranslator
from deep_translator.constants import BASE_URLS, PONS_CODES_TO_LANGUAGES
from deep_translator.exceptions import (
ElementNotFoundInGetRequest,
NotValidPayload,
RequestError,
TooManyRequests,
TranslationNotFound,
)
from deep_translator.validate import is_empty, is_input_valid, request_failed
class PonsTranslator(BaseTranslator):
"""
class that uses PONS translator to translate words
"""
def __init__(
self,
source: str,
target: str = "en",
proxies: Optional[dict] = None,
**kwargs,
):
"""
@param source: source language to translate from
@param target: target language to translate to
"""
self.proxies = proxies
super().__init__(
base_url=BASE_URLS.get("PONS"),
languages=PONS_CODES_TO_LANGUAGES,
source=source,
target=target,
payload_key=None,
element_tag="div",
element_query={"class": "target"},
**kwargs,
)
def translate(
self, word: str, return_all: bool = False, **kwargs
) -> Union[str, List[str]]:
"""
function that uses PONS to translate a word
@param word: word to translate
@type word: str
@param return_all: set to True to return all synonym of the translated word
@type return_all: bool
@return: str: translated word
"""
if is_input_valid(word, max_chars=50):
if self._same_source_target() or is_empty(word):
return word
url = f"{self._base_url}{self._source}-{self._target}/{word}"
url = requote_uri(url)
response = requests.get(url, proxies=self.proxies)
if response.status_code == 429:
raise TooManyRequests()
if request_failed(status_code=response.status_code):
raise RequestError()
soup = BeautifulSoup(response.text, "html.parser")
elements = soup.find("div", {"class": "result_list"}).findAll(
self._element_tag, self._element_query
)
response.close()
if not elements:
raise ElementNotFoundInGetRequest(word)
filtered_elements = []
for el in elements:
temp = []
for e in el.findAll("a"):
temp.append(e.get_text())
filtered_elements.append(" ".join(temp))
if not filtered_elements:
raise ElementNotFoundInGetRequest(word)
word_list = [
word for word in filtered_elements if word and len(word) > 1
]
if not word_list:
raise TranslationNotFound(word)
return word_list if return_all else word_list[0]
def translate_words(self, words: List[str], **kwargs) -> List[str]:
"""
translate a batch of words together by providing them in a list
@param words: list of words you want to translate
@param kwargs: additional args
@return: list of translated words
"""
if not words:
raise NotValidPayload(words)
translated_words = []
for word in words:
translated_words.append(self.translate(word=word, **kwargs))
return translated_words

View file

@ -0,0 +1,119 @@
__copyright__ = "Copyright (C) 2020 Nidhal Baccouri"
import os
from typing import List, Optional
import requests
from deep_translator.base import BaseTranslator
from deep_translator.constants import (
BASE_URLS,
QCRI_ENV_VAR,
QCRI_LANGUAGE_TO_CODE,
)
from deep_translator.exceptions import (
ApiKeyException,
ServerException,
TranslationNotFound,
)
from deep_translator.validate import request_failed
class QcriTranslator(BaseTranslator):
"""
class that wraps functions, which use the QRCI translator under the hood to translate word(s)
"""
def __init__(
self,
source: str = "en",
target: str = "en",
api_key: Optional[str] = os.getenv(QCRI_ENV_VAR, None),
**kwargs,
):
"""
@param api_key: your qrci api key.
Get one for free here https://mt.qcri.org/api/v1/ref
"""
if not api_key:
raise ApiKeyException(QCRI_ENV_VAR)
self.api_key = api_key
self.api_endpoints = {
"get_languages": "getLanguagePairs",
"get_domains": "getDomains",
"translate": "translate",
}
self.params = {"key": self.api_key}
super().__init__(
base_url=BASE_URLS.get("QCRI"),
source=source,
target=target,
languages=QCRI_LANGUAGE_TO_CODE,
**kwargs,
)
def _get(
self,
endpoint: str,
params: Optional[dict] = None,
return_text: bool = True,
):
if not params:
params = self.params
try:
res = requests.get(
self._base_url.format(endpoint=self.api_endpoints[endpoint]),
params=params,
)
return res.text if return_text else res
except Exception as e:
raise e
@property
def languages(self):
return self.get_supported_languages()
def get_domains(self):
domains = self._get("get_domains")
return domains
@property
def domains(self):
return self.get_domains()
def translate(self, text: str, **kwargs) -> str:
params = {
"key": self.api_key,
"langpair": f"{self._source}-{self._target}",
"domain": kwargs["domain"],
"text": text,
}
try:
response = self._get("translate", params=params, return_text=False)
except ConnectionError:
raise ServerException(503)
else:
if request_failed(status_code=response.status_code):
ServerException(response.status_code)
else:
res = response.json()
translation = res.get("translatedText")
if not translation:
raise TranslationNotFound(text)
return translation
def translate_file(self, path: str, **kwargs) -> str:
return self._translate_file(path, **kwargs)
def translate_batch(self, batch: List[str], **kwargs) -> List[str]:
"""
translate a batch of texts
@domain: domain
@param batch: list of texts to translate
@return: list of translations
"""
return self._translate_batch(batch, **kwargs)

View file

@ -0,0 +1,43 @@
__copyright__ = "Copyright (C) 2020 Nidhal Baccouri"
from typing import Optional
from deep_translator.exceptions import NotValidLength, NotValidPayload
def is_empty(text: str) -> bool:
return text == ""
def request_failed(status_code: int) -> bool:
"""Check if a request has failed or not.
A request is considered successfull if the status code is in the 2** range.
Args:
status_code (int): status code of the request
Returns:
bool: indicates request failure
"""
if status_code > 299 or status_code < 200:
return True
return False
def is_input_valid(
text: str, min_chars: int = 0, max_chars: Optional[int] = None
) -> bool:
"""
validate the target text to translate
@param min_chars: min characters
@param max_chars: max characters
@param text: text to translate
@return: bool
"""
if not isinstance(text, str):
raise NotValidPayload(text)
if max_chars and (not min_chars <= len(text) < max_chars):
raise NotValidLength(text, min_chars, max_chars)
return True

View file

@ -0,0 +1,158 @@
"""
Yandex translator API
"""
__copyright__ = "Copyright (C) 2020 Nidhal Baccouri"
import os
from typing import List, Optional
import requests
from deep_translator.base import BaseTranslator
from deep_translator.constants import BASE_URLS, YANDEX_ENV_VAR
from deep_translator.exceptions import (
ApiKeyException,
RequestError,
ServerException,
TooManyRequests,
TranslationNotFound,
)
from deep_translator.validate import is_input_valid, request_failed
class YandexTranslator(BaseTranslator):
"""
class that wraps functions, which use the yandex translator
under the hood to translate word(s)
"""
def __init__(
self,
source: str = "en",
target: str = "de",
api_key: Optional[str] = os.getenv(YANDEX_ENV_VAR, None),
**kwargs
):
"""
@param api_key: your yandex api key
"""
if not api_key:
raise ApiKeyException(YANDEX_ENV_VAR)
self.api_key = api_key
self.api_version = "v1.5"
self.api_endpoints = {
"langs": "getLangs",
"detect": "detect",
"translate": "translate",
}
super().__init__(
base_url=BASE_URLS.get("YANDEX"),
source=source,
target=target,
**kwargs
)
def _get_supported_languages(self):
return set(x.split("-")[0] for x in self.dirs)
@property
def languages(self):
return self.get_supported_languages()
@property
def dirs(self, proxies: Optional[dict] = None):
try:
url = self._base_url.format(
version=self.api_version, endpoint="getLangs"
)
print("url: ", url)
response = requests.get(
url, params={"key": self.api_key}, proxies=proxies
)
except requests.exceptions.ConnectionError:
raise ServerException(503)
else:
data = response.json()
if request_failed(status_code=response.status_code):
raise ServerException(response.status_code)
return data.get("dirs")
def detect(self, text: str, proxies: Optional[dict] = None):
response = None
params = {
"text": text,
"format": "plain",
"key": self.api_key,
}
try:
url = self._base_url.format(
version=self.api_version, endpoint="detect"
)
response = requests.post(url, data=params, proxies=proxies)
except RequestError:
raise
except ConnectionError:
raise ServerException(503)
except ValueError:
raise ServerException(response.status_code)
else:
response = response.json()
language = response["lang"]
status_code = response["code"]
if status_code != 200:
raise RequestError()
elif not language:
raise ServerException(501)
return language
def translate(
self, text: str, proxies: Optional[dict] = None, **kwargs
) -> str:
if is_input_valid(text):
params = {
"text": text,
"format": "plain",
"lang": self._target
if self._source == "auto"
else "{}-{}".format(self._source, self._target),
"key": self.api_key,
}
try:
url = self._base_url.format(
version=self.api_version, endpoint="translate"
)
response = requests.post(url, data=params, proxies=proxies)
except ConnectionError:
raise ServerException(503)
else:
response = response.json()
if response["code"] == 429:
raise TooManyRequests()
if response["code"] != 200:
raise ServerException(response["code"])
if not response["text"]:
raise TranslationNotFound()
return response["text"]
def translate_file(self, path: str, **kwargs) -> str:
"""
translate from a file
@param path: path to file
@return: translated text
"""
return self._translate_file(path, **kwargs)
def translate_batch(self, batch: List[str], **kwargs) -> List[str]:
"""
translate a batch of texts
@param batch: list of texts to translate
@return: list of translations
"""
return self._translate_batch(batch, **kwargs)