108 lines
3.7 KiB
Python
108 lines
3.7 KiB
Python
|
from indictrans import Transliterator
|
|||
|
from libindic.transliteration import getInstance
|
|||
|
# import final_transliteration_only.py
|
|||
|
t = getInstance()
|
|||
|
from indic_transliteration import sanscript
|
|||
|
from indic_transliteration.sanscript import transliterate
|
|||
|
import requests, uuid, json
|
|||
|
from transliterate import translit, get_available_language_codes
|
|||
|
import polyglot
|
|||
|
from polyglot.text import Text
|
|||
|
# from polyglot.transliteration import Transliterator
|
|||
|
|
|||
|
|
|||
|
# translit = Transliterator(source_lang='en',target_lang='zh')
|
|||
|
# text = "hello, good morning"
|
|||
|
# text = Text(text)
|
|||
|
# new_text = " "
|
|||
|
# for x in text.transliterate("zh"):
|
|||
|
# new_text = new_text + str(x)
|
|||
|
# for word in text.split():
|
|||
|
# new_text = new_text + " " + translit.transliterate(word)
|
|||
|
# print(new_text)
|
|||
|
# from transliterate import translit
|
|||
|
# text = "Lorem ipsum dolor sit amet"
|
|||
|
# blob = """Hello good morning"""
|
|||
|
# text = Text(blob)
|
|||
|
# final = []
|
|||
|
# for x in text.transliterate("ar"):
|
|||
|
# final.append(x)
|
|||
|
# final_text = " ".join(final)
|
|||
|
# print(final_text)
|
|||
|
# transliterator = Transliterator(source_lang="en", target_lang="ru")
|
|||
|
# print(transliterator.transliterate(u"preprocessing"))
|
|||
|
|
|||
|
# indic_src_lang = "eng"
|
|||
|
# indic_dest_lang = "urd"
|
|||
|
|
|||
|
# # azure_src_script = ""
|
|||
|
|
|||
|
|
|||
|
# # #libindic-translation
|
|||
|
|
|||
|
# # code = libindic_dest_script+'_IN'
|
|||
|
# # lib_out = t.transliterate(text, code)
|
|||
|
# # # print("Libindic: ", lib_out)
|
|||
|
# # with open('readme.txt', 'w') as f:
|
|||
|
# # f.write('Libindic Translierated' + "\n")
|
|||
|
# # f.write(lib_out + "\n")
|
|||
|
# # f.close()
|
|||
|
|
|||
|
# # #indic-trans-IAST
|
|||
|
|
|||
|
# # # ind_out = transliterate(text, sanscript.H, sanscript.)
|
|||
|
# # # # print("Indic-trans-IAST: ", ind_out)
|
|||
|
# # # with open('readme.txt', 'w') as f:
|
|||
|
# # # f.write('Indic-trans IAST Translierated' + "\n")
|
|||
|
# # # f.write(lib_out + "\n")
|
|||
|
# # # f.close()
|
|||
|
|
|||
|
#azure-transliteration
|
|||
|
|
|||
|
# subscription_key = "83ce6233419541929f7ab0d3035fca58"
|
|||
|
# endpoint = "https://api.cognitive.microsofttranslator.com"
|
|||
|
# location = "eastus"
|
|||
|
# path = '/translate'
|
|||
|
# constructed_url = endpoint + path
|
|||
|
# headers = {
|
|||
|
# 'Ocp-Apim-Subscription-Key': subscription_key,
|
|||
|
# 'Ocp-Apim-Subscription-Region': location,
|
|||
|
# 'Content-type': 'application/json',
|
|||
|
# 'X-ClientTraceId': str(uuid.uuid4())
|
|||
|
# }
|
|||
|
# constructed_url1 = "https://api.cognitive.microsofttranslator.com/transliterate?api-version=3.0"
|
|||
|
# azure_src_script = "Hans"
|
|||
|
# azure_dest_script = "Latn"
|
|||
|
# lang = "zh-Hans"
|
|||
|
# text = "是的先生。"
|
|||
|
# params = {'api-version': '3.0', 'language':lang, 'fromScript': azure_src_script, 'toScript': azure_dest_script}
|
|||
|
# body = [{'text': text}]
|
|||
|
# request = requests.post(constructed_url1, params=params, headers=headers, json=body)
|
|||
|
# response = request.json()
|
|||
|
# azure_out = response
|
|||
|
# print(azure_out)
|
|||
|
|
|||
|
# # print(transliterate(azure_dest_script,azure_src_script,azure_src_lang,text))
|
|||
|
|
|||
|
|
|||
|
# # azure = azure_transliteration(text, azure_src_lang, azure_dest_script)
|
|||
|
# # with open('readme.txt', 'a') as f:
|
|||
|
# # f.write('Azure Translierated' + "\n")
|
|||
|
# # f.write(azure_out + "\n")
|
|||
|
# # f.close()
|
|||
|
|
|||
|
# accented_string = '''wǒ de míng zì shì tài lóng。 wǒ shì yì
|
|||
|
# míng yuè nán shòu yī 。 qǐng nǐ néng
|
|||
|
# shǎo zhǎo diǎn líng qián 。 wǒ de yòu
|
|||
|
# yǎn bèi cì shāng , xiàn zài wǒ hé fǎ
|
|||
|
# shī míng。 wǒ de qī zi gǎn rǎn le ài
|
|||
|
# zī bìng dú , wǒ men xū yào qián wèi
|
|||
|
# tā mǎi yào。'''
|
|||
|
# # accented_string is of type 'unicode'
|
|||
|
# import unidecode
|
|||
|
# print(unidecode.unidecode(accented_string))
|
|||
|
from anyascii import anyascii
|
|||
|
|
|||
|
s = anyascii('נתראה בקרוב')
|
|||
|
y = anyascii('මට යන්න බෑ ')
|
|||
|
print(s)
|