69 lines
5.2 KiB
Python
69 lines
5.2 KiB
Python
|
from conversion.translation.translation_resources import ibm_watson, aws, azure, google_simple
|
||
|
from conversion.translation.detection import script_det
|
||
|
def test_run():
|
||
|
|
||
|
# -> Text Used for Testing
|
||
|
text = "My name is dharmesh and my age is 15. Can you help me with "
|
||
|
|
||
|
languages = \
|
||
|
['Afrikaans',
|
||
|
'Albanian', 'Amharic', 'Arabic', 'Armenian', 'Assamese', 'Aymara', 'Azerbaijani', 'Bambara', 'Basque',
|
||
|
'Belarusian', 'Bengali', 'Bhojpuri', 'Bosnian', 'Bulgarian', 'Catalan', 'Cebuano', 'Chichewa',
|
||
|
'Chinese (Simplified)', 'Chinese (Traditional)', 'Corsican', 'Croatian', 'Czech', 'Danish', 'Divehi', 'Dogri',
|
||
|
'Dutch', 'English', 'Esperanto', 'Estonian', 'Ewe', 'Filipino', 'Finnish', 'French', 'Frisian', 'Galician',
|
||
|
'Ganda', 'Georgian', 'German', 'Greek', 'Guarani', 'Gujarati', 'Haitian Creole', 'Hausa', 'Hawaiian', 'Hebrew',
|
||
|
'Hindi', 'Hmong', 'Hungarian', 'Icelandic', 'Igbo', 'Iloko', 'Indonesian', 'Irish Gaelic', 'Italian',
|
||
|
'Japanese', 'Javanese', 'Kannada', 'Kazakh', 'Khmer', 'Kinyarwanda', 'Konkani', 'Korean', 'Krio',
|
||
|
'Kurdish (Kurmanji)', 'Kurdish (Sorani)', 'Kyrgyz', 'Lao', 'Latin', 'Latvian', 'Lingala', 'Lithuanian',
|
||
|
'Luxembourgish', 'Macedonian', 'Maithili', 'Malagasy', 'Malay', 'Malayalam', 'Maltese', 'Maori', 'Marathi',
|
||
|
'Meiteilon (Manipuri)', 'Mizo', 'Mongolian', 'Myanmar (Burmese)', 'Nepali', 'Northern Sotho', 'Norwegian',
|
||
|
'Odia (Oriya)', 'Oromo', 'Pashto', 'Persian', 'Polish', 'Portuguese', 'Punjabi', 'Quechua', 'Romanian',
|
||
|
'Russian', 'Samoan', 'Sanskrit', 'Scots Gaelic', 'Serbian', 'Sesotho', 'Shona', 'Sindhi', 'Sinhala', 'Slovak',
|
||
|
'Slovenian', 'Somali', 'Spanish', 'Sundanese', 'Swahili', 'Swedish', 'Tajik', 'Tamil', 'Tatar', 'Telugu',
|
||
|
'Thai', 'Tigrinya', 'Tsonga', 'Turkish', 'Turkmen', 'Twi', 'Ukrainian', 'Urdu', 'Uyghur', 'Uzbek',
|
||
|
'Vietnamese', 'Welsh', 'Xhosa', 'Yiddish', 'Yoruba', 'Zulu']
|
||
|
language_code = \
|
||
|
{'Afrikaans': 'af',
|
||
|
'Albanian': 'sq', 'Amharic': 'am', 'Arabic': 'ar', 'Armenian': 'hy', 'Assamese': 'as', 'Aymara': 'ay',
|
||
|
'Azerbaijani': 'az', 'Bambara': 'bm', 'Basque': 'eu', 'Belarusian': 'be', 'Bengali': 'bn', 'Bhojpuri': 'bho',
|
||
|
'Bosnian': 'bs', 'Bulgarian': 'bg', 'Catalan': 'ca', 'Cebuano': 'ceb', 'Chichewa': 'ny',
|
||
|
'Chinese (Simplified)': ['zh-CN', 'zh'], 'Chinese (Traditional)': 'zh-TW', 'Corsican': 'co', 'Croatian': 'hr',
|
||
|
'Czech': 'cs', 'Danish': 'da', 'Divehi': 'dv', 'Dogri': 'doi', 'Dutch': 'nl', 'English': 'en',
|
||
|
'Esperanto': 'eo', 'Estonian': 'et', 'Ewe': 'ee', 'Filipino': 'tl', 'Finnish': 'fi', 'French': 'fr',
|
||
|
'Frisian': 'fy', 'Galician': 'gl', 'Ganda': 'lg', 'Georgian': 'ka', 'German': 'de', 'Greek': 'el',
|
||
|
'Guarani': 'gn', 'Gujarati': 'gu', 'Haitian Creole': 'ht', 'Hausa': 'ha', 'Hawaiian': 'haw', 'Hebrew': ['he', 'iw'],
|
||
|
'Hindi': 'hi', 'Hmong': 'hmn', 'Hungarian': 'hu', 'Icelandic': 'is', 'Igbo': 'ig', 'Iloko': 'ilo',
|
||
|
'Indonesian': 'id', 'Irish Gaelic': 'ga', 'Italian': 'it', 'Japanese': 'ja', 'Javanese': ['jv', 'jw'], 'Kannada': 'kn',
|
||
|
'Kazakh': 'kk', 'Khmer': 'km', 'Kinyarwanda': 'rw', 'Konkani': 'gom', 'Korean': 'ko', 'Krio': 'kri',
|
||
|
'Kurdish (Kurmanji)': 'ku', 'Kurdish (Sorani)': 'ckb', 'Kyrgyz': 'ky', 'Lao': 'lo', 'Latin': 'la',
|
||
|
'Latvian': 'lv', 'Lingala': 'ln', 'Lithuanian': 'lt', 'Luxembourgish': 'lb', 'Macedonian': 'mk',
|
||
|
'Maithili': 'mai', 'Malagasy': 'mg', 'Malay': 'ms', 'Malayalam': 'ml', 'Maltese': 'mt', 'Maori': 'mi',
|
||
|
'Marathi': 'mr', 'Meiteilon (Manipuri)': 'mni-Mtei', 'Mizo': 'lus', 'Mongolian': 'mn',
|
||
|
'Myanmar (Burmese)': 'my', 'Nepali': 'ne', 'Northern Sotho': 'nso', 'Norwegian': 'no', 'Odia (Oriya)': 'or',
|
||
|
'Oromo': 'om', 'Pashto': 'ps', 'Persian': 'fa', 'Polish': 'pl', 'Portuguese': 'pt', 'Punjabi': 'pa',
|
||
|
'Quechua': 'qu', 'Romanian': 'ro', 'Russian': 'ru', 'Samoan': 'sm', 'Sanskrit': 'sa', 'Scots Gaelic': 'gd',
|
||
|
'Serbian': 'sr', 'Sesotho': 'st', 'Shona': 'sn', 'Sindhi': 'sd', 'Sinhala': 'si', 'Slovak': 'sk',
|
||
|
'Slovenian': 'sl', 'Somali': 'so', 'Spanish': 'es', 'Sundanese': 'su', 'Swahili': 'sw', 'Swedish': 'sv',
|
||
|
'Tajik': 'tg', 'Tamil': 'ta', 'Tatar': 'tt', 'Telugu': 'te', 'Thai': 'th', 'Tigrinya': 'ti', 'Tsonga': 'ts',
|
||
|
'Turkish': 'tr', 'Turkmen': 'tk', 'Twi': 'ak', 'Ukrainian': 'uk', 'Urdu': 'ur', 'Uyghur': 'ug', 'Uzbek': 'uz',
|
||
|
'Vietnamese': 'vi', 'Welsh': 'cy', 'Xhosa': 'xh', 'Yiddish': 'yi', 'Yoruba': 'yo', 'Zulu': 'zu'}
|
||
|
|
||
|
language_script = {}
|
||
|
|
||
|
for lang1 in languages:
|
||
|
if isinstance(language_code[lang1], list):
|
||
|
textuse = google_simple(text, "en", language_code[lang1][0])
|
||
|
else:
|
||
|
if language_code[lang1] != "en":
|
||
|
textuse = google_simple(text, "en", language_code[lang1])
|
||
|
else:
|
||
|
textuse = text
|
||
|
try:
|
||
|
language_script[str(lang1)] = str(script_det(textuse))
|
||
|
except:
|
||
|
language_script[str(lang1)] = str(script_det(textuse))
|
||
|
with open("/home/user/mnf/project/MNF/media/file95.txt", "w") as f:
|
||
|
f.write(str(language_script))
|
||
|
# print(language_script)
|
||
|
|
||
|
return True
|