Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
@return: return True if the start Teh is not original
@rtype: boolean;
"""
# if the lenght of verb is exactely 4 letters and starts by hamza
# and it is in the AF3Al wazn and not FA33al or FAA3la
# ألوزن المعني هو أفعل
# الأوزان غير المعنية هي فاعل وفعّل
# الأوزان المشتقة هي أفعّ من أفعل
# الخلاصة أن يكون الفعل رباعيا، حرفه الأول همزة
# ولا يكون حرفه الثاني ألف، لمنع الوزن فاعل
# ولا يكون حرفه الثالث شدة، لمنع الوزن فعّل
verb = verb_normalized_unvocalized
if len(verb) != 4 or not verb.startswith(HAMZA):
return False
elif len(verb) == 4 and verb.startswith(HAMZA) and \
verb[1]!=ALEF and verb[2]!=SHADDA:
return True
else :
return False
# $Revision: 0.7 $
# $Source: arabtechies.sourceforge.net
#
#***********************************************************************/
import sys
import re
import time
import pyarabic.araby as araby
# treat the root, strip extra characters
stamp_pat = re.compile(u"[%s%s%s%s%s%s%s%s%s]"% (araby.ALEF,
araby.YEH, araby.HAMZA, araby.ALEF_HAMZA_ABOVE, araby.WAW_HAMZA,
araby.YEH_HAMZA, araby.WAW, araby.ALEF_MAKSURA, araby.SHADDA),
re.UNICODE)
def word_stamp(word):
"""
generate a stamp for a word,
remove all letters which can change form in the word :
- ALEF,
- HAMZA,
- YEH,
- WAW,
- ALEF_MAKSURA
- SHADDA
@return: stamped word
"""
# strip the last letter if is doubled
if word[-1:] == word[-2:-1]:
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
# MA 02110-1301, USA.
#
#
import re
import time
import pyarabic.araby as araby
STAMP_PAT = re.compile(u"[%s%s%s%s%s%s%s%s%s%s]"% (araby.ALEF,
araby.YEH, araby.HAMZA, araby.ALEF_HAMZA_ABOVE, araby.WAW_HAMZA,
araby.YEH_HAMZA, araby.WAW, araby.ALEF_MAKSURA, araby.ALEF_MADDA, araby.SHADDA),
re.UNICODE)
def decode_tenses(field):
"""
Decode tenses field
"""
all=False;
past=False;
future=False;
passive=False;
imperative=False;
future_moode=False;
confirmed=False;
if field==u"يعملان":
all=True;
else:
if field.find(araby.YEH)>=0:
"""
Highlight dfiactitics in the HTML text.
@param text: the given text
@type text: unicode.
@return: the result as HTML.
@rtype: unicode.
"""
hight_text = u""
lefttag = u"<span class="tashkeel">"
righttag = u"</span>"
for i in range(len(text)):
if text[i] in (araby.FATHA, araby.DAMMA, araby.KASRA, araby.SUKUN):
if (i>0 and text[i-1] not in (araby.ALEF,
araby.ALEF_HAMZA_ABOVE, araby.WAW_HAMZA, araby.ALEF_MADDA,
araby.DAL, araby.THAL, araby.WAW, araby.REH, araby.ZAIN,
araby.SHADDA)) and (i+1%s"%text[i]
hight_text += u"".join([lefttag, " ", text[i], righttag])
else:
hight_text += text[i]
return hight_text
# get verb subclass
verb_nm = araby.strip_tashkeel(verb_tuple['vocalized'])
verb_class = ""
if verb_nm.startswith(araby.WAW):
verb_class= "W1W" #"Mithal_W"
elif verb_nm[-2:-1] ==araby.ALEF: # before last char
if verb_tuple['future_type'] in (araby.DAMMA, u"ضمة"):
verb_class= "W2W" #"Adjwaf_W"
elif verb_tuple['future_type'] in (araby.KASRA, u"كسرة"):
verb_class= "W2Y" #"Adjwaf_Y"
elif verb_nm[-1:] in (araby.YEH, araby.ALEF_MAKSURA):
verb_class= "W3Y" #"Naqis_Y"
elif verb_nm[-1:] == araby.ALEF:
verb_class= "W3W" #"Naqis_W"
elif araby.SHADDA in (verb_tuple['vocalized']):
verb_class= "Dbl" # doubled
else:
verb_class = "-"
# the passive tenses dont take object suffix, only with double transitie verbs
tags = "V."+verb_class+"."
if verb_tuple['transitive']:
tags +="T"
else:
tags +="I"
if verb_tuple['double_trans']:
tags +="D"
elif verb_tuple['think_trans']:
tags += "T"
elif verb_tuple['reflexive_trans']:
conj_ana = self.conjugate_tense_pronoun(tense,
vconst.PronounAna)
self.conj_display.add(tense, vconst.PronounAna, conj_ana)
conj_ana_without_last_mark = conj_ana[:-1]
self.conj_display.add(tense, vconst.PronounAnta,
conj_ana_without_last_mark+FATHA)
self.conj_display.add(tense, vconst.PronounAnti,
conj_ana_without_last_mark+KASRA)
self.conj_display.add(tense, vconst.PronounAntuma,
conj_ana+MEEM+FATHA+ALEF)
self.conj_display.add(tense, vconst.PronounAntuma_f,
conj_ana+MEEM+FATHA+ALEF)
self.conj_display.add(tense, vconst.PronounAntum,
conj_ana+MEEM)
self.conj_display.add(tense, vconst.PronounAntunna,
conj_ana+NOON+SHADDA+FATHA)
self.conj_display.add(tense, vconst.PronounAna, conj_ana)
conj_nahnu = self.conjugate_tense_pronoun(tense,
vconst.PronounNahnu)
self.conj_display.add(tense, vconst.PronounNahnu, conj_nahnu)
conj_hunna = self.conjugate_tense_pronoun(tense,
vconst.PronounHunna)
self.conj_display.add(tense, vconst.PronounHunna, conj_hunna)
conj_huma = self.conjugate_tense_pronoun(tense,
vconst.PronounHuma)
self.conj_display.add(tense, vconst.PronounHuma, conj_huma)
conj_hum = self.conjugate_tense_pronoun(tense,
vconst.PronounHum)
, u"هي" : [u"ت", FATHA+NOON+SHADDA+FATHA]
}
imperative = {
u"أنت" : [u"", u"ْ"]
, u"أنتِ" : [u"", u"ِي"]
, u"أنتم" : [u"", DAMMA+WAW+ALEF_WASLA]
, u"أنتما" : [u"", u"َا"]
, u"أنتما مؤ" : [u"", u"َا"]
, u"أنتن" : [u"", u"ْنَ"]
}
imperative_confirmed = {
u"أنت" : [u"", FATHA+NOON+SHADDA+FATHA]
, u"أنتِ" : [u"", KASRA+NOON+SHADDA+FATHA]
, u"أنتم" : [u"", DAMMA+NOON+SHADDA+FATHA]
, u"أنتما" : [u"", FATHA+ALEF+NOON+SHADDA+KASRA]
, u"أنتما مؤ" : [u"", FATHA+ALEF+NOON+SHADDA+KASRA]
, u"أنتن" : [u"", SUKUN+NOON+FATHA+ALEF+NOON+SHADDA+KASRA]
}
TableTensePronoun = {}
TableTensePronoun[TensePast] = past
TableTensePronoun[TenseFuture] = future
TableTensePronoun[TenseImperative] = imperative
TableTensePronoun[TenseJussiveFuture] = future_majzoom
TableTensePronoun[TenseSubjunctiveFuture] = future_mansoub
TableTensePronoun[TenseConfirmedFuture] = future_confirmed
TableTensePronoun[TenseConfirmedImperative] = imperative_confirmed
TableTensePronoun[TensePassivePast] = past
TableTensePronoun[TensePassiveFuture] = future
TableTensePronoun[TensePassiveJussiveFuture] = future_majzoom
TableTensePronoun[TensePassiveSubjunctiveFuture] = future_mansoub
u'آوب':[u'ءاوب'],
u'آوى':[u'أءوى'],
u'آيد':[u'ءايد'],
u'آيس':[u'أءيس'],
}
STANDARD_REPLACEMENT=[
#-تحويل همزة القطع على الألف بعدها فتحة
#وهمزة القطع على الألف بعدها سكون إلى ألف ممدودة
( u"".join([ALEF_HAMZA_ABOVE, FATHA, ALEF]), ALEF_MADDA)
, ( u"".join([ALEF_MADDA, FATHA]), ALEF_MADDA)
, ( u"".join([ALEF_MADDA, ALEF]), ALEF_MADDA)
, ( u"".join([ALEF_HAMZA_ABOVE, FATHA, ALEF_HAMZA_ABOVE, SUKUN]), ALEF_MADDA)
, ( u"".join([ALEF_HAMZA_ABOVE, FATHA, ALEF_HAMZA_ABOVE, FATHA]), ALEF_MADDA)
, ( u"".join([ALEF_HAMZA_ABOVE, DAMMA, WAW_HAMZA, SUKUN]), ALEF_HAMZA_ABOVE+DAMMA+WAW)
, ( u"".join([YEH, SHADDA, FATHA, ALEF_MAKSURA]), YEH+SHADDA+FATHA+ALEF)
# إدغام النون الساكنة
, ( u"".join([NOON, SUKUN, NOON]), NOON+SHADDA)
# إذا كان الحرف الأول ساكنا وبعده شدة، ثم أضيفت إليه الألف
, ( u"".join([SUKUN, SHADDA]), SHADDA)
## معالجة ألف التفريق
, ( ALEF_WASLA, ALEF)
## معالجة ألف التفريق
, ( ALEF_MAMDUDA, ALEF)
"""
Highlight dfiactitics in the HTML text.
@param text: the given text
@type text: unicode.
@return: the result as HTML.
@rtype: unicode.
"""
hight_text = u""
lefttag = u"<span class="tashkeel">"
righttag = u"</span>"
for i in range(len(text)):
if text[i] in (araby.FATHA, araby.DAMMA, araby.KASRA, araby.SUKUN):
if (i>0 and text[i-1] not in (araby.ALEF,
araby.ALEF_HAMZA_ABOVE, araby.WAW_HAMZA, araby.ALEF_MADDA,
araby.DAL, araby.THAL, araby.WAW, araby.REH, araby.ZAIN,
araby.SHADDA)) and (i+1%s"%text[i]
hight_text += u"".join([lefttag, " ", text[i], righttag])
else:
hight_text += text[i]
return hight_text
future_marks = self._homogenize_harakat(marks, future_marks)
passive_future_marks = self._homogenize_harakat(marks,
passive_future_marks)
imp_marks = future_marks
imp_letters = future_letters
# حالة الأفعال التي تبدأ بألف وصل
if letters.startswith(ALEF) or self.hamza_zaida:
future_letters = letters[1:]
future_marks = future_marks[1:]
passive_future_marks = passive_future_marks[1:]
passive_letters = letters[1:]
# حالة الفعل المثال
elif self.vlength == 3 and self.word_letters.startswith(WAW) and \
(self.future_type == KASRA or (self.future_type==FATHA and \
self.word_marks==FATHA+FATHA+FATHA and \
not self.word_letters.endswith(SHADDA))):
future_letters = letters[1:]
future_marks = future_marks[1:]
## passive_future_marks=passive_future_marks[1:]
passive_letters = letters
else:
future_letters = letters
passive_letters = letters
new_marks = first_future_mark + future_marks
passive_marks = first_passive_future_mark + passive_future_marks
# حالة الأفعال التي تبدأ بألف وصل
if imp_letters.startswith(ALEF):
imp_letters = letters[1:]
imp_marks = imp_marks[1:]
elif self.vlength == 3 and self.word_letters.startswith(WAW) and \
(self.future_type == KASRA or (self.future_type==FATHA and \