Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def highlight_diacritics_html(self, text):
"""
Highlight dfiactitics in the HTML text.
@param text: the given text
@type text: unicode.
@return: the result as HTML.
@rtype: unicode.
"""
hight_text = u""
lefttag = u"<span class="tashkeel">"
righttag = u"</span>"
for i in range(len(text)):
if text[i] in (araby.FATHA, araby.DAMMA, araby.KASRA, araby.SUKUN):
if (i>0 and text[i-1] not in (araby.ALEF,
araby.ALEF_HAMZA_ABOVE, araby.WAW_HAMZA, araby.ALEF_MADDA,
araby.DAL, araby.THAL, araby.WAW, araby.REH, araby.ZAIN,
araby.SHADDA)) and (i+1%s"%text[i]
hight_text += u"".join([lefttag, " ", text[i], righttag])
else:
hight_text += text[i]
return hight_text
def highlight_diacritics_html(self, text):
"""
Highlight dfiactitics in the HTML text.
@param text: the given text
@type text: unicode.
@return: the result as HTML.
@rtype: unicode.
"""
hight_text = u""
lefttag = u"<span class="tashkeel">"
righttag = u"</span>"
for i in range(len(text)):
if text[i] in (araby.FATHA, araby.DAMMA, araby.KASRA, araby.SUKUN):
if (i>0 and text[i-1] not in (araby.ALEF,
araby.ALEF_HAMZA_ABOVE, araby.WAW_HAMZA, araby.ALEF_MADDA,
araby.DAL, araby.THAL, araby.WAW, araby.REH, araby.ZAIN,
araby.SHADDA)) and (i+1%s"%text[i]
hight_text += u"".join([lefttag, " ", text[i], righttag])
else:
hight_text += text[i]
return hight_text
TenseFuture:(u"ري", DAMMA+KASRA+FATHA),
TensePassiveFuture:(u"ري", DAMMA+FATHA+FATHA),
TenseImperative:(u"ءري", FATHA+KASRA+FATHA),
}
#~ ان يتصرف من باب (عَلِمَ يَعْلَمُ)،
#~ لا تحذف واوه؛ نحو: وَجِلَ، يَوْجَلُ،
#~ عدا ثلاثة أفعال هي: (وذر), و(وسع)، و(وطأ)،
#~ تحذف واوها؛ فنقول: وَذِرَ، يَذَرُ،
# ونقول: وَسِعَ، يَسَعُ، ونقول: وَطِئَ، يَطَأُ.
#إذا ك# الفعل وذر يذر
# KASRA FATHA
IRREGULAR_VERB_CONJUG[u"وذر"+KASRA+FATHA] = {
CONJUG_BAB:(KASRA, FATHA),
TenseFuture:(u"ذر", FATHA+FATHA+DAMMA),
TensePassiveFuture:(u"ذر", DAMMA+FATHA+DAMMA),
TenseImperative:(u"ذر", FATHA+SUKUN),
}
# الفعل وَسِعَ يسع
# KASRA FATHA
IRREGULAR_VERB_CONJUG[u"وسع"+KASRA+FATHA] = {
CONJUG_BAB:(KASRA, FATHA),
TenseFuture:(u"سع", FATHA+FATHA+DAMMA),
TensePassiveFuture:(u"سع", DAMMA+FATHA+DAMMA),
TenseImperative:(u"سع", FATHA+SUKUN),
}
# الفعل وطئ يطأ
# KASRA FATHA
IRREGULAR_VERB_CONJUG[u"وطء"+KASRA+FATHA] = {
CONJUG_BAB:(KASRA, FATHA),
TenseFuture:(u"طء", FATHA+FATHA+DAMMA),
TensePassiveFuture:(u"وطء", DAMMA+SUKUN+FATHA+DAMMA),
TenseImperative:(u"طء", FATHA+SUKUN),
# أما ما لا تحذف همزته وجوبا مثل سأل وأمر، فلا تعتبر شاذة
# الفعل أكَل يأكُل، كُل
#FATHA, DAMMA
IRREGULAR_VERB_CONJUG[u"ءكل"+FATHA+DAMMA] = {
CONJUG_BAB:(FATHA, DAMMA),
TenseFuture:(u"ءكل", FATHA+SUKUN+DAMMA+DAMMA),
TensePassiveFuture:(u"ءكل", DAMMA+SUKUN+FATHA+FATHA),
TenseImperative:(u"كل", DAMMA+SUKUN),
}
#الفعل أخَذَ يأخُذُ، خُذ
#FATHA, DAMMA
IRREGULAR_VERB_CONJUG[u"ءخذ"+FATHA+DAMMA] = {
CONJUG_BAB:(FATHA, DAMMA),
TenseFuture:(u"ءخذ", FATHA+SUKUN+DAMMA+DAMMA),
TensePassiveFuture:(u"ءخذ", DAMMA+SUKUN+FATHA+FATHA),
TenseImperative:(u"خذ", DAMMA+SUKUN),
}
#ج- إذا كان يتصرف من باب (مَنَعَ يَمْنَعُ)،
#~ تحذف واوه, نحو: وَضَعَ، يَضَعُ، وَجَأَ يَجَأُ، وَدَعَ يَدَعُ، وَزَعَ يَزَعُ،
#~ وَضَأَ يَضَأُ، وَطَأَ يَطَأُ، وَقَعَ يَقَعُ، وَلَغَ يَلَغُ، وَهَبَ يَهَبُ،
#~ عدا خمسة أفعال هي:
#~ (وَبَأ)، و(وَبَهَ)، و(وَجَعَ)، و(وَسَعَ)، و(وَهَلَ)،
#~ فلا تحذف منها الواو؛ فنقول: يَوْبَأُ، يَوْبَهُ، يَوْجَعُ، يَوْسَعُ، يَوْهَلُ.
# الأفعال (وَبَأ)، و(وَبَهَ)، و(وَجَعَ)، و(وَسَعَ)، و(وَهَلَ)،#الفعل وبَأ يوبأ
#FATHA FATHA
IRREGULAR_VERB_CONJUG[u"وبء"+FATHA+FATHA] = {
CONJUG_BAB:(FATHA, FATHA),
TenseFuture:(u"وبء", FATHA+SUKUN+FATHA+DAMMA),
TensePassiveFuture:(u"وبء", DAMMA+SUKUN+FATHA+DAMMA),
TenseImperative:(u"وبء", SUKUN+FATHA+SUKUN),
}
"""
Ajust the resulted text after vocalization to correct some case
like 'meeting of two queiscents = ألتقاء الساكنين'
@param text: vocalized text
@type text: unicode
@return: ajusted text.
@rtype: unicode
"""
# min = > mina
text = re.sub(ur'\sمِنْ\s+ا', u' مِنَ ا', text)
# man = > mani
text = re.sub(ur'\sمَنْ\s+ا', u' مَنِ ا', text)
#An = > ani
text = re.sub(ur'\sعَنْ\s+ا', u' عَنِ ا', text)
#sukun + alef = > kasra +alef
text = re.sub(ur'\s%s\s+ا'%araby.SUKUN, u' %s ا' % araby.KASRA, text)
#~ text = re.sub(ur'\s%s\s+ا'%araby.SUKUN, u' %s ا' % araby.SUKUN, text)
#ajust pounctuation
text = re.sub(ur" ([.?!, :)”—]($| ))", ur"\1", text)
#binu = > bin
# temporary, to be analysed by syntaxical analyzer
text = re.sub(ur'\sبْنُ\s', u' بْن ', text)
# # # اختصارات مثل حدثنا إلى ثنا وه تكثر في كتب التراث
# text = re.sub(ur'\seثِنَا\s', u' ثَنَا ', text)
return text
#HARAKAT = u"%s%s%s%s%s"%(SUKUN, FATHA, DAMMA, KASRA, SHADDA)
HARAKAT = (SUKUN, FATHA, DAMMA, KASRA)
HARAKAT2 = u"".join([ALEF_HARAKA, WAW_HARAKA, YEH_HARAKA, SUKUN,
FATHA, DAMMA, KASRA])
HAMZAT_PATTERN = re.compile(u"[%s%s%s%s%s]"%(ALEF_HAMZA_ABOVE, WAW_HAMZA,
YEH_HAMZA , HAMZA, ALEF_HAMZA_BELOW), re.UNICODE)
HAMZAT = (ALEF_HAMZA_ABOVE, WAW_HAMZA, YEH_HAMZA , HAMZA, ALEF_HAMZA_BELOW)
LAM_ALEF_PAT = re.compile(u'[\ufef7\ufef9\ufef5]', re.UNICODE)
#uniformate harkat
UNIFORMATE_MARKS_4 = FATHA+SUKUN+FATHA+FATHA
UNIFORMATE_MARKS_5TEH = FATHA+FATHA+SUKUN+FATHA+FATHA
UNIFORMATE_MARKS_5 = KASRA+SUKUN+FATHA+FATHA+FATHA
UNIFORMATE_MARKS_6 = KASRA+SUKUN+FATHA+SUKUN+FATHA+FATHA
BEGIN_WORD = u"^"
END_WORD = u"$"
LONG_HARAKAT = (ALEF_HARAKA, YEH_HARAKA, WAW_HARAKA, ALEF_YEH_HARAKA,
ALEF_WAW_HARAKA)
_F = FATHA
_D = DAMMA
_K = KASRA
_S = SUKUN
_A = ALEF_HARAKA
_W = WAW_HARAKA
_Y = YEH_HARAKA
_AH = ALEF_HARAKA
_YH = YEH_HARAKA
self.conj_display.add(tense, vconst.PronounAntunna,
conj_ana+NOON+SHADDA+FATHA)
# indirect conjugation
# Ana pronoun like conjugation
elif pronoun in ( vconst.PronounHya, vconst.PronounHuma_f,
vconst.PronounHuma, vconst.PronounHum):
conj_huwa = self.conj_display.get_conj(tense,
vconst.PronounHuwa)
if conj_huwa == u"":
conj_huwa = self.conjugate_tense_pronoun(tense,
vconst.PronounHuwa)
self.conj_display.add(tense, vconst.PronounHuwa, conj_huwa)
# حالة الفعل مهموز الآخر
if conj_huwa.endswith(YEH+HAMZA+FATHA) :
self.conj_display.add(tense, vconst.PronounHya,
conj_huwa[:-2]+YEH_HAMZA+FATHA+TEH+SUKUN)
self.conj_display.add(tense, vconst.PronounHuma_f,
conj_huwa[:-2]+YEH_HAMZA+FATHA+TEH+FATHA+ALEF)
self.conj_display.add(tense, vconst.PronounHuma,
conj_huwa[:-2]+YEH_HAMZA+FATHA+ALEF)
self.conj_display.add(tense, vconst.PronounHum,
conj_huwa[:-2]+YEH_HAMZA+DAMMA+WAW+ALEF)
else :
self.conj_display.add(tense, vconst.PronounHya,
conj_huwa+TEH+SUKUN)
self.conj_display.add(tense, vconst.PronounHuma_f,
conj_huwa+TEH+FATHA+ALEF)
self.conj_display.add(tense, vconst.PronounHuma,
conj_huwa+ALEF)
if conj_huwa.endswith(KASRA+YEH+FATHA):
u'آيد':[u'ءايد'],
u'آيس':[u'أءيس'],
}
STANDARD_REPLACEMENT=[
#-تحويل همزة القطع على الألف بعدها فتحة
#وهمزة القطع على الألف بعدها سكون إلى ألف ممدودة
( u"".join([ALEF_HAMZA_ABOVE, FATHA, ALEF]), ALEF_MADDA)
, ( u"".join([ALEF_MADDA, FATHA]), ALEF_MADDA)
, ( u"".join([ALEF_MADDA, ALEF]), ALEF_MADDA)
, ( u"".join([ALEF_HAMZA_ABOVE, FATHA, ALEF_HAMZA_ABOVE, SUKUN]), ALEF_MADDA)
, ( u"".join([ALEF_HAMZA_ABOVE, FATHA, ALEF_HAMZA_ABOVE, FATHA]), ALEF_MADDA)
, ( u"".join([ALEF_HAMZA_ABOVE, DAMMA, WAW_HAMZA, SUKUN]), ALEF_HAMZA_ABOVE+DAMMA+WAW)
, ( u"".join([YEH, SHADDA, FATHA, ALEF_MAKSURA]), YEH+SHADDA+FATHA+ALEF)
# إدغام النون الساكنة
, ( u"".join([NOON, SUKUN, NOON]), NOON+SHADDA)
# إذا كان الحرف الأول ساكنا وبعده شدة، ثم أضيفت إليه الألف
, ( u"".join([SUKUN, SHADDA]), SHADDA)
## معالجة ألف التفريق
, ( ALEF_WASLA, ALEF)
## معالجة ألف التفريق
, ( ALEF_MAMDUDA, ALEF)
@param verb: verb found in dictionary.
@type verb: unicode.
@param proclitic: first level prefix.
@type proclitic: unicode.
@param enclitic: first level suffix.
@type enclitic: unicode.
@return: (vocalized word, semivocalized).
@rtype: (unicode, unicode).
"""
#~ print(verb.encode('utf8'))
# لمعالجة حالة ألف التفريق
if enclitic and verb.endswith(ar.WAW + ar.ALEF):
verb = verb[:-1]
if enclitic and verb.endswith(ar.ALEF_MAKSURA):
verb = verb[:-1] + ar.ALEF
if enclitic and verb.endswith(ar.TEH+ar.DAMMA + ar.MEEM+ ar.SUKUN):
verb = verb[:-1] + ar.DAMMA + ar.WAW
if enclitic and verb.endswith(ar.TEH+ar.DAMMA + ar.MEEM):
verb += ar.DAMMA + ar.WAW
word_tuple_list =[]
#~ enclitic_voc = SVC.COMP_SUFFIX_LIST_TAGS[enclitic]["vocalized"][0]
#~ enclitic_voc = self.get_enclitic_variant(verb, enclitic_voc)
#~ proclitic_voc = SVC.COMP_PREFIX_LIST_TAGS[proclitic]["vocalized"][0]
#suffix_voc = suffix #CONJ_SUFFIX_LIST_TAGS[suffix]["vocalized"][0]
for proclitic_voc in SVC.COMP_PREFIX_LIST_TAGS[proclitic]["vocalized"]:
for enclitic_voc in SVC.COMP_SUFFIX_LIST_TAGS[enclitic]["vocalized"]:
enclitic_voc = self.get_enclitic_variant(verb, enclitic_voc)
vocalized = ''.join([proclitic_voc, verb, enclitic_voc])
semivocalized = ''.join(
[proclitic_voc, ar.strip_lastharaka(verb), enclitic_voc])
word_tuple_list.append((vocalized, semivocalized))
TenseFuture:(u"ري", DAMMA+KASRA+FATHA),
TensePassiveFuture:(u"ري", DAMMA+FATHA+FATHA),
TenseImperative:(u"ءري", FATHA+KASRA+FATHA),
}
#~ ان يتصرف من باب (عَلِمَ يَعْلَمُ)،
#~ لا تحذف واوه؛ نحو: وَجِلَ، يَوْجَلُ،
#~ عدا ثلاثة أفعال هي: (وذر), و(وسع)، و(وطأ)،
#~ تحذف واوها؛ فنقول: وَذِرَ، يَذَرُ،
# ونقول: وَسِعَ، يَسَعُ، ونقول: وَطِئَ، يَطَأُ.
#إذا ك# الفعل وذر يذر
# KASRA FATHA
IRREGULAR_VERB_CONJUG[u"وذر"+KASRA+FATHA] = {
CONJUG_BAB:(KASRA, FATHA),
TenseFuture:(u"ذر", FATHA+FATHA+DAMMA),
TensePassiveFuture:(u"ذر", DAMMA+FATHA+DAMMA),
TenseImperative:(u"ذر", FATHA+SUKUN),
}
# الفعل وَسِعَ يسع
# KASRA FATHA
IRREGULAR_VERB_CONJUG[u"وسع"+KASRA+FATHA] = {
CONJUG_BAB:(KASRA, FATHA),
TenseFuture:(u"سع", FATHA+FATHA+DAMMA),
TensePassiveFuture:(u"سع", DAMMA+FATHA+DAMMA),
TenseImperative:(u"سع", FATHA+SUKUN),
}
# الفعل وطئ يطأ
# KASRA FATHA
IRREGULAR_VERB_CONJUG[u"وطء"+KASRA+FATHA] = {
CONJUG_BAB:(KASRA, FATHA),
TenseFuture:(u"طء", FATHA+FATHA+DAMMA),
TensePassiveFuture:(u"وطء", DAMMA+SUKUN+FATHA+DAMMA),
TenseImperative:(u"طء", FATHA+SUKUN),