Custom MT Engines FreeBeta
ByAwesome CrowdinVerified Author

Use custom code to connect to the machine translation service

Install

About

Copy link

Custom MT app is a tool for developers and power users that allows you to connect Crowdin to any MT/AI engine out there with small efforts.

The app would prompt you to provide two piece of JavaScript codes. One to transform Crowdin MT request to the request format target AI/MT engine expects, second code would have to transform the MT response to the format Crowdin understands.

Sample code

Copy link

Below is the sample integration with the NLLB-200 from Facebook hosted on Huggingface.

NLLB-200 uses it's own language codes. We use a mapping to convert Crowdin language codes to NLLB-200 language codes. If you are translating into a target language that Crowdin doesn't natively support, or there is no mapping for your language, please create a custom language in Crowdin and add a new mapping entry with both the NLLB-200 language code and the code you set in Crowdin.

Request code

Copy link

The request code should set the value to the global config variable. It should set all the common network request parameters.

Sample request:

config = {
    url: 'https://your-url-here.com',
    method: 'POST',
    headers: {
        'Authorization': `Bearer any-headers-you-need`,
        'Content-Type': 'application/json'
    },
    data: {
        texts: strings.map((s) => s.text),
        parameters: {
            'src_lang': sourceLanguage,
            'tgt_lang': targetLanguage
        }
    }
};

The global scope of this script would have three variables defined:

Sample request you can use to integrate with NLLB:

const languages = [
    {
        "name": "Afrikaans",
        "nllb": "afr_Latn",
        "crowdin": "af"
    },
    {
        "name": "Akan",
        "nllb": "aka_Latn",
        "crowdin": "ak"
    },
    {
        "name": "Amharic",
        "nllb": "amh_Ethi",
        "crowdin": "am"
    },
    {
        "name": "Assamese",
        "nllb": "asm_Beng",
        "crowdin": "as"
    },
    {
        "name": "Asturian",
        "nllb": "ast_Latn",
        "crowdin": "ast"
    },
    {
        "name": "Bashkir",
        "nllb": "bak_Cyrl",
        "crowdin": "ba"
    },
    {
        "name": "Bambara",
        "nllb": "bam_Latn",
        "crowdin": "bm"
    },
    {
        "name": "Balinese",
        "nllb": "ban_Latn",
        "crowdin": "ban"
    },
    {
        "name": "Belarusian",
        "nllb": "bel_Cyrl",
        "crowdin": "be"
    },
    {
        "name": "Bengali",
        "nllb": "ben_Beng",
        "crowdin": "bn"
    },
    {
        "name": "Bosnian",
        "nllb": "bos_Latn",
        "crowdin": "bs"
    },
    {
        "name": "Bulgarian",
        "nllb": "bul_Cyrl",
        "crowdin": "bg"
    },
    {
        "name": "Catalan",
        "nllb": "cat_Latn",
        "crowdin": "ca"
    },
    {
        "name": "Cebuano",
        "nllb": "ceb_Latn",
        "crowdin": "ceb"
    },
    {
        "name": "Czech",
        "nllb": "ces_Latn",
        "crowdin": "cs"
    },
    {
        "name": "Welsh",
        "nllb": "cym_Latn",
        "crowdin": "cy"
    },
    {
        "name": "Danish",
        "nllb": "dan_Latn",
        "crowdin": "da"
    },
    {
        "name": "German",
        "nllb": "deu_Latn",
        "crowdin": "de"
    },
    {
        "name": "Dzongkha",
        "nllb": "dzo_Tibt",
        "crowdin": "dz"
    },
    {
        "name": "Greek",
        "nllb": "ell_Grek",
        "crowdin": "el"
    },
    {
        "name": "English",
        "nllb": "eng_Latn",
        "crowdin": "en"
    },
    {
        "name": "Esperanto",
        "nllb": "epo_Latn",
        "crowdin": "eo"
    },
    {
        "name": "Estonian",
        "nllb": "est_Latn",
        "crowdin": "et"
    },
    {
        "name": "Basque",
        "nllb": "eus_Latn",
        "crowdin": "eu"
    },
    {
        "name": "Ewe",
        "nllb": "ewe_Latn",
        "crowdin": "ee"
    },
    {
        "name": "Faroese",
        "nllb": "fao_Latn",
        "crowdin": "fo"
    },
    {
        "name": "Fijian",
        "nllb": "fij_Latn",
        "crowdin": "fj"
    },
    {
        "name": "Finnish",
        "nllb": "fin_Latn",
        "crowdin": "fi"
    },
    {
        "name": "French",
        "nllb": "fra_Latn",
        "crowdin": "fr"
    },
    {
        "name": "Friulian",
        "nllb": "fur_Latn",
        "crowdin": "fur-IT"
    },
    {
        "name": "Scottish Gaelic",
        "nllb": "gla_Latn",
        "crowdin": "gd"
    },
    {
        "name": "Irish",
        "nllb": "gle_Latn",
        "crowdin": "ga-IE"
    },
    {
        "name": "Galician",
        "nllb": "glg_Latn",
        "crowdin": "gl"
    },
    {
        "name": "Guarani",
        "nllb": "grn_Latn",
        "crowdin": "gn"
    },
    {
        "name": "Gujarati",
        "nllb": "guj_Gujr",
        "crowdin": "gu-IN"
    },
    {
        "name": "Haitian Creole",
        "nllb": "hat_Latn",
        "crowdin": "ht"
    },
    {
        "name": "Hausa",
        "nllb": "hau_Latn",
        "crowdin": "ha"
    },
    {
        "name": "Hebrew",
        "nllb": "heb_Hebr",
        "crowdin": "he"
    },
    {
        "name": "Hindi",
        "nllb": "hin_Deva",
        "crowdin": "hi"
    },
    {
        "name": "Croatian",
        "nllb": "hrv_Latn",
        "crowdin": "hr"
    },
    {
        "name": "Hungarian",
        "nllb": "hun_Latn",
        "crowdin": "hu"
    },
    {
        "name": "Armenian",
        "nllb": "hye_Armn",
        "crowdin": "hy-AM"
    },
    {
        "name": "Igbo",
        "nllb": "ibo_Latn",
        "crowdin": "ig"
    },
    {
        "name": "Indonesian",
        "nllb": "ind_Latn",
        "crowdin": "id"
    },
    {
        "name": "Icelandic",
        "nllb": "isl_Latn",
        "crowdin": "is"
    },
    {
        "name": "Italian",
        "nllb": "ita_Latn",
        "crowdin": "it"
    },
    {
        "name": "Javanese",
        "nllb": "jav_Latn",
        "crowdin": "jv"
    },
    {
        "name": "Japanese",
        "nllb": "jpn_Jpan",
        "crowdin": "ja"
    },
    {
        "name": "Kabyle",
        "nllb": "kab_Latn",
        "crowdin": "kab"
    },
    {
        "name": "Kannada",
        "nllb": "kan_Knda",
        "crowdin": "kn"
    },
    {
        "name": "Georgian",
        "nllb": "kat_Geor",
        "crowdin": "ka"
    },
    {
        "name": "Kazakh",
        "nllb": "kaz_Cyrl",
        "crowdin": "kk"
    },
    {
        "name": "Khmer",
        "nllb": "khm_Khmr",
        "crowdin": "km"
    },
    {
        "name": "Kinyarwanda",
        "nllb": "kin_Latn",
        "crowdin": "rw"
    },
    {
        "name": "Kyrgyz",
        "nllb": "kir_Cyrl",
        "crowdin": "ky"
    },
    {
        "name": "Korean",
        "nllb": "kor_Hang",
        "crowdin": "ko"
    },
    {
        "name": "Lao",
        "nllb": "lao_Laoo",
        "crowdin": "lo"
    },
    {
        "name": "Ligurian",
        "nllb": "lij_Latn",
        "crowdin": "lij"
    },
    {
        "name": "Limburgish",
        "nllb": "lim_Latn",
        "crowdin": "li"
    },
    {
        "name": "Lingala",
        "nllb": "lin_Latn",
        "crowdin": "ln"
    },
    {
        "name": "Lithuanian",
        "nllb": "lit_Latn",
        "crowdin": "lt"
    },
    {
        "name": "Luxembourgish",
        "nllb": "ltz_Latn",
        "crowdin": "lb"
    },
    {
        "name": "Maithili",
        "nllb": "mai_Deva",
        "crowdin": "mai"
    },
    {
        "name": "Malayalam",
        "nllb": "mal_Mlym",
        "crowdin": "ml-IN"
    },
    {
        "name": "Marathi",
        "nllb": "mar_Deva",
        "crowdin": "mr"
    },
    {
        "name": "Macedonian",
        "nllb": "mkd_Cyrl",
        "crowdin": "mk"
    },
    {
        "name": "Maltese",
        "nllb": "mlt_Latn",
        "crowdin": "mt"
    },
    {
        "name": "Mossi",
        "nllb": "mos_Latn",
        "crowdin": "mos"
    },
    {
        "name": "Maori",
        "nllb": "mri_Latn",
        "crowdin": "mi"
    },
    {
        "name": "Burmese",
        "nllb": "mya_Mymr",
        "crowdin": "my"
    },
    {
        "name": "Dutch",
        "nllb": "nld_Latn",
        "crowdin": "nl"
    },
    {
        "name": "Norwegian Nynorsk",
        "nllb": "nno_Latn",
        "crowdin": "nn-NO"
    },
    {
        "name": "Nepali",
        "nllb": "npi_Deva",
        "crowdin": "ne-NP"
    },
    {
        "name": "Northern Sotho",
        "nllb": "nso_Latn",
        "crowdin": "nso"
    },
    {
        "name": "Occitan",
        "nllb": "oci_Latn",
        "crowdin": "oc"
    },
    {
        "name": "Odia",
        "nllb": "ory_Orya",
        "crowdin": "or"
    },
    {
        "name": "Papiamento",
        "nllb": "pap_Latn",
        "crowdin": "pap"
    },
    {
        "name": "Polish",
        "nllb": "pol_Latn",
        "crowdin": "pl"
    },
    {
        "name": "Portuguese",
        "nllb": "por_Latn",
        "crowdin": "pt-PT"
    },
    {
        "name": "Dari",
        "nllb": "prs_Arab",
        "crowdin": "fa-AF"
    },
    {
        "name": "Romanian",
        "nllb": "ron_Latn",
        "crowdin": "ro"
    },
    {
        "name": "Rundi",
        "nllb": "run_Latn",
        "crowdin": "rn"
    },
    {
        "name": "Russian",
        "nllb": "rus_Cyrl",
        "crowdin": "ru"
    },
    {
        "name": "Sango",
        "nllb": "sag_Latn",
        "crowdin": "sg"
    },
    {
        "name": "Sanskrit",
        "nllb": "san_Deva",
        "crowdin": "sa"
    },
    {
        "name": "Santali",
        "nllb": "sat_Olck",
        "crowdin": "sat"
    },
    {
        "name": "Sinhala",
        "nllb": "sin_Sinh",
        "crowdin": "si-LK"
    },
    {
        "name": "Slovak",
        "nllb": "slk_Latn",
        "crowdin": "sk"
    },
    {
        "name": "Slovenian",
        "nllb": "slv_Latn",
        "crowdin": "sl"
    },
    {
        "name": "Shona",
        "nllb": "sna_Latn",
        "crowdin": "sn"
    },
    {
        "name": "Sindhi",
        "nllb": "snd_Arab",
        "crowdin": "sd"
    },
    {
        "name": "Somali",
        "nllb": "som_Latn",
        "crowdin": "so"
    },
    {
        "name": "Southern Sotho",
        "nllb": "sot_Latn",
        "crowdin": "st"
    },
    {
        "name": "Spanish",
        "nllb": "spa_Latn",
        "crowdin": "es-ES"
    },
    {
        "name": "Sardinian",
        "nllb": "srd_Latn",
        "crowdin": "sc"
    },
    {
        "name": "Swati",
        "nllb": "ssw_Latn",
        "crowdin": "ss"
    },
    {
        "name": "Sundanese",
        "nllb": "sun_Latn",
        "crowdin": "su"
    },
    {
        "name": "Swedish",
        "nllb": "swe_Latn",
        "crowdin": "sv-SE"
    },
    {
        "name": "Swahili",
        "nllb": "swh_Latn",
        "crowdin": "sw"
    },
    {
        "name": "Tamil",
        "nllb": "tam_Taml",
        "crowdin": "ta"
    },
    {
        "name": "Tatar",
        "nllb": "tat_Cyrl",
        "crowdin": "tt-RU"
    },
    {
        "name": "Telugu",
        "nllb": "tel_Telu",
        "crowdin": "te"
    },
    {
        "name": "Tajik",
        "nllb": "tgk_Cyrl",
        "crowdin": "tg"
    },
    {
        "name": "Tagalog",
        "nllb": "tgl_Latn",
        "crowdin": "tl"
    },
    {
        "name": "Thai",
        "nllb": "tha_Thai",
        "crowdin": "th"
    },
    {
        "name": "Tigrinya",
        "nllb": "tir_Ethi",
        "crowdin": "ti"
    },
    {
        "name": "Tswana",
        "nllb": "tsn_Latn",
        "crowdin": "tn"
    },
    {
        "name": "Tsonga",
        "nllb": "tso_Latn",
        "crowdin": "ts"
    },
    {
        "name": "Turkmen",
        "nllb": "tuk_Latn",
        "crowdin": "tk"
    },
    {
        "name": "Turkish",
        "nllb": "tur_Latn",
        "crowdin": "tr"
    },
    {
        "name": "Uyghur",
        "nllb": "uig_Arab",
        "crowdin": "ug"
    },
    {
        "name": "Ukrainian",
        "nllb": "ukr_Cyrl",
        "crowdin": "uk"
    },
    {
        "name": "Venetian",
        "nllb": "vec_Latn",
        "crowdin": "vec"
    },
    {
        "name": "Vietnamese",
        "nllb": "vie_Latn",
        "crowdin": "vi"
    },
    {
        "name": "Wolof",
        "nllb": "wol_Latn",
        "crowdin": "wo"
    },
    {
        "name": "Xhosa",
        "nllb": "xho_Latn",
        "crowdin": "xh"
    },
    {
        "name": "Yoruba",
        "nllb": "yor_Latn",
        "crowdin": "yo"
    },
    {
        "name": "Zulu",
        "nllb": "zul_Latn",
        "crowdin": "zu"
    }
];

function getNLLBCode(lang) {
    const language = languages.find((l) => l.crowdin === lang);

    if (language) {
        return language.nllb;
    } else {
        return lang;    // fallback to the language code, use NLLB code if you create a custom Crowdin language
    }
}

config = {
    url: 'https://api-inference.huggingface.co/models/facebook/nllb-200-distilled-600m',
    method: 'POST',
    headers: {
        'Authorization': `Bearer api_org_XXXXXXXXXXXX`, // <<< your API token here
        'Content-Type': 'application/json'
    },
    data: { // POST data
        inputs: strings.map((s) => s.text),
        options: {
            wait_for_model: true
        },
        parameters: {
            'src_lang': getNLLBCode(sourceLanguage),
            'tgt_lang': getNLLBCode(targetLanguage)
        }
    }
};

log(config);

Response

Copy link

translations would contain the raw response from the AI/ML server, and is expected to be transformed into an array of strings by the custom script.

if (translations && translations.length > 0) {
    translations = translations.map((r) => r.translation_text);
} else if (error) {
    error = {
        error: {
            message: `An error occurred while translating the text: ${err?.message || 'Unknown error'}`
        }
    };
}

Screenshot

Copy link

Crowdin Custom MT integrating NLLB-200 from Huggingface

Crowdin is a platform that helps you manage and translate content into different languages. Integrate Crowdin with your repo, CMS, or other systems. Source content is always up to date for your translators, and translated content is returned automatically.

Learn More
Categories
Machine Translation
Works with
  • Crowdin Enterprise
  • crowdin.com
Details

Released on Mar 11, 2024

Updated on Mar 21, 2024

Published by Awesome Crowdin

Identifier:custom-mt