Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
@staticmethod
def get_enclitic_variant(word, enclitic):
"""
Get the enclitic variant to be joined to the word.
For example: word = أرجِهِ , enclitic = هُ.
The enclitic is convert to HEH+ KAsra.
اعبارة في مثل أرجه وأخاه إلى يم الزينة
@param word: word found in dictionary.
@type word: unicode.
@param enclitic: first level suffix vocalized.
@type enclitic: unicode.
@return: variant of enclitic.
@rtype: unicode.
"""
#if the word ends by a haraka
if enclitic == ar.HEH+ar.DAMMA and (word.endswith(ar.KASRA)\
or word.endswith(ar.YEH)):
enclitic = ar.HEH + ar.KASRA
return enclitic
, u"أنتما مؤ" : [u"ت", u"َا"]
, u"أنتن" : [u"ت", u"ْنَ"]
, u"نحن" : [u"ن", u"َ"]
##, u"هم" : [u"ي", DAMMA+WAW+ALEF]
, u"هم" : [u"ي", DAMMA+WAW+ALEF_WASLA]
, u"هما" : [u"ي", u"َا"]
, u"هما مؤ" : [u"ت", u"َا"]
, u"هن" : [u"ي", u"ْنَ"]
, u"هو" : [u"ي", u"َ"]
, u"هي" : [u"ت", u"َ"]
}
future_confirmed = {
u"أنا" : [u"أ", FATHA+NOON+SHADDA+FATHA]
, u"أنت" : [u"ت", FATHA+NOON+SHADDA+FATHA]
, u"أنتِ" : [u"ت", KASRA+NOON+SHADDA+FATHA]
, u"أنتما" : [u"ت", FATHA+ALEF+NOON+SHADDA+KASRA]
, u"أنتما مؤ" : [u"ت", FATHA+ALEF+NOON+SHADDA+KASRA]
, u"أنتم" : [u"ت", DAMMA+NOON+SHADDA+FATHA]
, u"أنتن" : [u"ت", SUKUN+NOON+FATHA+ALEF+NOON+SHADDA+KASRA]
, u"نحن" : [u"ن", FATHA+NOON+SHADDA+FATHA]
, u"هم" : [u"ي", DAMMA+NOON+SHADDA+FATHA]
, u"هما" : [u"ي", FATHA+ALEF+NOON+SHADDA+KASRA]
, u"هما مؤ" : [u"ت", FATHA+ALEF+NOON+SHADDA+KASRA]
, u"هن" : [u"ي", SUKUN+NOON+FATHA+ALEF+NOON+SHADDA+KASRA]
, u"هو" : [u"ي", FATHA+NOON+SHADDA+FATHA]
, u"هي" : [u"ت", FATHA+NOON+SHADDA+FATHA]
}
imperative = {
u"أنت" : [u"", u"ْ"]
, u"أنتِ" : [u"", u"ِي"]
, u"أنتم" : [u"", DAMMA+WAW+ALEF_WASLA]
LAM_ALEF_PAT = re.compile(u'[\ufef7\ufef9\ufef5]', re.UNICODE)
#uniformate harkat
UNIFORMATE_MARKS_4 = FATHA+SUKUN+FATHA+FATHA
UNIFORMATE_MARKS_5TEH = FATHA+FATHA+SUKUN+FATHA+FATHA
UNIFORMATE_MARKS_5 = KASRA+SUKUN+FATHA+FATHA+FATHA
UNIFORMATE_MARKS_6 = KASRA+SUKUN+FATHA+SUKUN+FATHA+FATHA
BEGIN_WORD = u"^"
END_WORD = u"$"
LONG_HARAKAT = (ALEF_HARAKA, YEH_HARAKA, WAW_HARAKA, ALEF_YEH_HARAKA,
ALEF_WAW_HARAKA)
_F = FATHA
_D = DAMMA
_K = KASRA
_S = SUKUN
_A = ALEF_HARAKA
_W = WAW_HARAKA
_Y = YEH_HARAKA
_AH = ALEF_HARAKA
_YH = YEH_HARAKA
_WH = WAW_HARAKA
_AYH = ALEF_YEH_HARAKA
_AWH = ALEF_WAW_HARAKA
_YHALT = ALTERNATIVE_YEH_HARAKA
#HAMZAT
_AHA = ALEF_HAMZA_ABOVE
_AHB = ALEF_HAMZA_BELOW
_AM = ALEF_MADDA
_YHA = YEH_HAMZA
CONJUG_BAB:(KASRA, FATHA),
TenseFuture:(u"ذر", FATHA+FATHA+DAMMA),
TensePassiveFuture:(u"ذر", DAMMA+FATHA+DAMMA),
TenseImperative:(u"ذر", FATHA+SUKUN),
}
# الفعل وَسِعَ يسع
# KASRA FATHA
IRREGULAR_VERB_CONJUG[u"وسع"+KASRA+FATHA] = {
CONJUG_BAB:(KASRA, FATHA),
TenseFuture:(u"سع", FATHA+FATHA+DAMMA),
TensePassiveFuture:(u"سع", DAMMA+FATHA+DAMMA),
TenseImperative:(u"سع", FATHA+SUKUN),
}
# الفعل وطئ يطأ
# KASRA FATHA
IRREGULAR_VERB_CONJUG[u"وطء"+KASRA+FATHA] = {
CONJUG_BAB:(KASRA, FATHA),
TenseFuture:(u"طء", FATHA+FATHA+DAMMA),
TensePassiveFuture:(u"وطء", DAMMA+SUKUN+FATHA+DAMMA),
TenseImperative:(u"طء", FATHA+SUKUN),
}
# الأفعال التي يتغير أمرها بحذف الهمزة وجوبا، مثل أكل، أخذ
# أما ما لا تحذف همزته وجوبا مثل سأل وأمر، فلا تعتبر شاذة
# الفعل أكَل يأكُل، كُل
#FATHA, DAMMA
IRREGULAR_VERB_CONJUG[u"ءكل"+FATHA+DAMMA] = {
CONJUG_BAB:(FATHA, DAMMA),
TenseFuture:(u"ءكل", FATHA+SUKUN+DAMMA+DAMMA),
def ajust_tanwin(self):
"""
ajust the Tanwin case, if the word is independent from the next one.
@return: Nothing.
@rtype:
"""
if self.is_noun() and not self.is_stopword() and not self.is_defined()\
and not self.has_encletic() and not self.is_mamnou3():
#self.vocalized += '4'
if self.vocalized.endswith(araby.DAMMA):
self.vocalized = self.vocalized[:-1]+araby.DAMMATAN
elif self.vocalized.endswith(araby.KASRA):
self.vocalized = self.vocalized[:-1]+araby.KASRATAN
elif self.vocalized.endswith(araby.TEH_MARBUTA+araby.FATHA):
self.vocalized = self.vocalized[:-1]+araby.FATHATAN
elif self.vocalized.endswith(araby.FATHA+araby.ALEF):
self.vocalized = self.vocalized[:-2]+araby.FATHATAN+araby.ALEF
, u"هما" : [u"", u"َا"]
, u"هما مؤ" : [u"", u"َتَا"]
, u"نحن" : [u"", u"ْنَا"]
, u"أنتم" : [u"", u"ْتُم"]
, u"أنتن" : [u"", u"ْتُنَّ"]
##, u"هم" : [u"", u"ُوا"]
, u"هم" : [u"", DAMMA + WAW + ALEF_WASLA]
, u"هن" : [u"", u"ْنَ"]
}
future = {
u"أنا" : [u"أ", u"ُ"]
, u"أنت" : [u"ت", u"ُ"]
, u"أنتِ" : [u"ت", u"ِينَ"]
, u"أنتم" : [u"ت", u"ُونَ"]
, u"أنتما" : [u"ت", FATHA + ALEF + NOON + KASRA]
, u"أنتما مؤ" : [u"ت", FATHA + ALEF + NOON + KASRA]
, u"أنتن" : [u"ت", SUKUN + NOON + FATHA]
, u"نحن" : [u"ن", u"ُ"]
, u"هم" : [u"ي", u"ُونَ"]
, u"هما" : [u"ي", u"َانِ"]
, u"هما مؤ" : [u"ت", u"َانِ"]
, u"هن" : [u"ي", u"ْنَ"]
, u"هو" : [u"ي", u"ُ"]
, u"هي" : [u"ت", u"ُ"]
}
future_majzoom = {
u"أنا" : [u"أ", u"ْ"]
, u"أنت" : [u"ت", u"ْ"]
, u"أنتِ" : [u"ت", u"ِي"]
, u"أنتم" : [u"ت", DAMMA + WAW + ALEF_WASLA]
##, u"أنتم" : [u"ت", DAMMA+WAW+ALEF]
, u"أنتما" : [u"ت", u"َا"]
elif pronoun in (vconst.PronounAnta, vconst.PronounAnti,
vconst.PronounAntuma, vconst.PronounAntuma_f, vconst.PronounAntum,
vconst.PronounAntunna):
conj_ana = self.conj_display.get_conj(tense, vconst.PronounAna)
if conj_ana == u"":
conj_ana = self.conjugate_tense_pronoun(tense,
vconst.PronounAna)
self.conj_display.add(tense, vconst.PronounAna,
conj_ana)
conj_ana_without_last_mark = conj_ana[:-1]
if pronoun == vconst.PronounAnta:
self.conj_display.add(tense, vconst.PronounAnta,
conj_ana_without_last_mark+FATHA)
elif pronoun == vconst.PronounAnti:
self.conj_display.add(tense, vconst.PronounAnti,
conj_ana_without_last_mark+KASRA)
elif pronoun == vconst.PronounAntuma:
self.conj_display.add(tense, vconst.PronounAntuma,
conj_ana+MEEM+FATHA+ALEF)
elif pronoun == vconst.PronounAntuma_f:
self.conj_display.add(tense, vconst.PronounAntuma_f,
conj_ana+MEEM+FATHA+ALEF)
elif pronoun == vconst.PronounAntum:
self.conj_display.add(tense, vconst.PronounAntum,
conj_ana+MEEM)
elif pronoun == vconst.PronounAntunna:
self.conj_display.add(tense, vconst.PronounAntunna,
conj_ana+NOON+SHADDA+FATHA)
# indirect conjugation
# Ana pronoun like conjugation
elif pronoun in ( vconst.PronounHya, vconst.PronounHuma_f,
vconst.PronounHuma, vconst.PronounHum):
previous = ""
for c in word:
if previous and not previous == araby.ALEF:
#--------- add Harakat before letter
if c in (araby.ALEF, araby.ALEF_MAKSURA, araby.TEH_MARBUTA,):
marks.pop()
marks.append(araby.FATHA)
elif c in (araby.WAW, araby.WAW_HAMZA):
marks.pop()
marks.append(araby.DAMMA)
elif c in( araby.YEH , araby.YEH_HAMZA ):
marks.pop()
marks.append(araby.KASRA)
#--------- add Harakat before letter
if c in (araby.ALEF_HAMZA_BELOW):
marks.append(araby.KASRA)
elif previous in (araby.ALEF_HAMZA_BELOW, araby.ALEF_HAMZA_ABOVE):
marks.append(araby.SUKUN)
elif previous in (araby.ALEF, araby.YEH, araby.WAW):
if c == araby.YEH_HAMZA :
marks.append(araby.KASRA)
else:
marks.append(araby.NOT_DEF_HARAKA)
previous = c
#print len(word) ,len(marks)
#print marks
return araby.joint(word, u"".join(marks))
"""
Get the enclitic variant to be joined to the word.
For example: word = أرجِهِ , enclitic = هُ.
The enclitic is convert to HEH+ KAsra.
اعبارة في مثل أرجه وأخاه إلى يم الزينة
@param word: word found in dictionary.
@type word: unicode.
@param enclitic: first level suffix vocalized.
@type enclitic: unicode.
@return: variant of enclitic.
@rtype: unicode.
"""
#if the word ends by a haraka
if enclitic == ar.HEH+ar.DAMMA and (word.endswith(ar.KASRA)\
or word.endswith(ar.YEH)):
enclitic = ar.HEH + ar.KASRA
return enclitic