Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
"""
ajust the Tanwin case, if the word is independent from the next one.
@return: Nothing.
@rtype:
"""
if self.is_noun() and not self.is_stopword() and not self.is_defined()\
and not self.has_encletic() and not self.is_mamnou3():
#self.vocalized += '4'
if self.vocalized.endswith(araby.DAMMA):
self.vocalized = self.vocalized[:-1]+araby.DAMMATAN
elif self.vocalized.endswith(araby.KASRA):
self.vocalized = self.vocalized[:-1]+araby.KASRATAN
elif self.vocalized.endswith(araby.TEH_MARBUTA+araby.FATHA):
self.vocalized = self.vocalized[:-1]+araby.FATHATAN
elif self.vocalized.endswith(araby.FATHA+araby.ALEF):
self.vocalized = self.vocalized[:-2]+araby.FATHATAN+araby.ALEF
# $Date: 2009/06/02 01:10:00 $
# $Author: Taha Zerrouki $
# $Revision: 0.7 $
# $Source: arabtechies.sourceforge.net
#
#***********************************************************************/
import sys
import re
import time
import pyarabic.araby as araby
# treat the root, strip extra characters
stamp_pat = re.compile(u"[%s%s%s%s%s%s%s%s%s]"% (araby.ALEF,
araby.YEH, araby.HAMZA, araby.ALEF_HAMZA_ABOVE, araby.WAW_HAMZA,
araby.YEH_HAMZA, araby.WAW, araby.ALEF_MAKSURA, araby.SHADDA),
re.UNICODE)
def word_stamp(word):
"""
generate a stamp for a word,
remove all letters which can change form in the word :
- ALEF,
- HAMZA,
- YEH,
- WAW,
- ALEF_MAKSURA
- SHADDA
@return: stamped word
"""
def ajust_vocalized_suggestion(self, _suggest_list):
"""
Ajust the resulted text after vocalization to correct some case
like 'meeting of two queiscents = ألتقاء الساكنين'
@param text: _suggest_list
@type text: list of dict of unicode
@return: _suggest_list.
@rtype: list of dict of unicode
"""
for i in range(len(_suggest_list)-1):
if i+1 < len(_suggest_list) and _suggest_list[i+1].has_key('chosen') \
and _suggest_list[i+1]['chosen'].startswith(araby.ALEF):
if _suggest_list[i]['chosen'] in (u'مَنْ', u'مِنْ', u'عَنْ'):
if _suggest_list[i]['chosen'] == u'مِنْ':
_suggest_list[i]['chosen'] = u'مِنَ'
elif _suggest_list[i]['chosen'] == u'عَنْ':
_suggest_list[i]['chosen'] = u'عَنِ'
elif _suggest_list[i]['chosen'] == u'مَنْ':
_suggest_list[i]['chosen'] = u'مَنِ'
elif _suggest_list[i]['chosen'].endswith(araby.SUKUN):
_suggest_list[i]['chosen'] = _suggest_list[i]['chosen'][:-1] + araby.KASRA
# if _suggest_list[i]['chosen'] == u'بْنُ':
# _suggest_list[i]['chosen'] = u'بْن'
return _suggest_list
passive=False;
imperative=False;
future_moode=False;
confirmed=False;
if field==u"يعملان":
all=True;
else:
if field.find(araby.YEH)>=0:
past=True;
if field.find(araby.AIN)>=0:
future=True;
if field.find(araby.MEEM)>=0:
imperative=True;
if field.find(araby.LAM)>=0:
passive=True;
if field.find(araby.ALEF)>=0:
future_moode=True;
if field.find(araby.NOON)>=0:
confirmed=True;
return (all, past, future, passive, imperative, future_moode, confirmed);
def highlight_diacritics_html(self, text):
"""
Highlight dfiactitics in the HTML text.
@param text: the given text
@type text: unicode.
@return: the result as HTML.
@rtype: unicode.
"""
hight_text = u""
lefttag = u"<span class="tashkeel">"
righttag = u"</span>"
for i in range(len(text)):
if text[i] in (araby.FATHA, araby.DAMMA, araby.KASRA, araby.SUKUN):
if (i>0 and text[i-1] not in (araby.ALEF,
araby.ALEF_HAMZA_ABOVE, araby.WAW_HAMZA, araby.ALEF_MADDA,
araby.DAL, araby.THAL, araby.WAW, araby.REH, araby.ZAIN,
araby.SHADDA)) and (i+1%s"%text[i]
hight_text += u"".join([lefttag, " ", text[i], righttag])
else:
hight_text += text[i]
return hight_text
passive=False;
imperative=False;
future_moode=False;
confirmed=False;
if field==u"يعملان":
all=True;
else:
if field.find(araby.YEH)>=0:
past=True;
if field.find(araby.AIN)>=0:
future=True;
if field.find(araby.MEEM)>=0:
imperative=True;
if field.find(araby.LAM)>=0:
passive=True;
if field.find(araby.ALEF)>=0:
future_moode=True;
if field.find(araby.NOON)>=0:
confirmed=True;
return (all, past, future, passive, imperative, future_moode, confirmed);
first_passive_future_mark = DAMMA
future_marks = FATHA + SUKUN + FATHA + SUKUN + KASRA + DAMMA
passive_future_marks = FATHA + SUKUN + FATHA + SUKUN + FATHA + DAMMA
# معالجة الألفات في الفعل والحركات الطويلة
# إذا كان طول الحركات ألأصلية للفعل
# أقل من طول حركات الماضي المبني للمجهول
# هذا يعني وجود حركة طويلة
# نقوم بتحويل الحركة الطويلة إلى ما يوافقها
if len(marks) < len(future_marks):
future_marks = self._homogenize_harakat(marks, future_marks)
passive_future_marks = self._homogenize_harakat(marks,
passive_future_marks)
imp_marks = future_marks
imp_letters = future_letters
# حالة الأفعال التي تبدأ بألف وصل
if letters.startswith(ALEF) or self.hamza_zaida:
future_letters = letters[1:]
future_marks = future_marks[1:]
passive_future_marks = passive_future_marks[1:]
passive_letters = letters[1:]
# حالة الفعل المثال
elif self.vlength == 3 and self.word_letters.startswith(WAW) and \
(self.future_type == KASRA or (self.future_type==FATHA and \
self.word_marks==FATHA+FATHA+FATHA and \
not self.word_letters.endswith(SHADDA))):
future_letters = letters[1:]
future_marks = future_marks[1:]
## passive_future_marks=passive_future_marks[1:]
passive_letters = letters
else:
future_letters = letters
passive_letters = letters
def vocalize_foreign(word):
"""
vocalize a foreign names written in arabic
@param word: given word
@type word: unicode
@return: the vocalized word
@rtype: unicode
"""
marks =[]
previous = ""
for c in word:
if previous and not previous == araby.ALEF:
#--------- add Harakat before letter
if c in (araby.ALEF, araby.ALEF_MAKSURA, araby.TEH_MARBUTA,):
marks.pop()
marks.append(araby.FATHA)
elif c in (araby.WAW, araby.WAW_HAMZA):
marks.pop()
marks.append(araby.DAMMA)
elif c in( araby.YEH , araby.YEH_HAMZA ):
marks.pop()
marks.append(araby.KASRA)
#--------- add Harakat before letter
if c in (araby.ALEF_HAMZA_BELOW):
marks.append(araby.KASRA)
elif previous in (araby.ALEF_HAMZA_BELOW, araby.ALEF_HAMZA_ABOVE):
marks.append(araby.SUKUN)
elif previous in (araby.ALEF, araby.YEH, araby.WAW):
if c == araby.YEH_HAMZA :
marks.append(araby.KASRA)