How to use the pyarabic.araby.SUKUN function in PyArabic

To help you get started, we’ve selected a few PyArabic examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github linuxscout / mishkal / mishkal / lib / qalsadi / libqutrub / conjugatedisplay.py View on Github external
def highlight_diacritics_html(self, text):
        """
        Highlight dfiactitics in the HTML text.
        @param text: the given text
        @type text: unicode.
        @return: the result as HTML.
        @rtype: unicode.
        """        
        hight_text = u""
        lefttag = u"<span class="tashkeel">"
        righttag = u"</span>"
        for i in range(len(text)):
            if text[i] in (araby.FATHA, araby.DAMMA, araby.KASRA, araby.SUKUN):
                if (i&gt;0 and text[i-1] not in (araby.ALEF, 
                araby.ALEF_HAMZA_ABOVE, araby.WAW_HAMZA, araby.ALEF_MADDA,
                 araby.DAL, araby.THAL, araby.WAW, araby.REH, araby.ZAIN,
            araby.SHADDA)) and (i+1%s"%text[i]
                    hight_text += u"".join([lefttag, " ", text[i], righttag])
            else:
                hight_text += text[i]
        return hight_text
github linuxscout / mishkal / support / libqutrub / conjugatedisplay.py View on Github external
def highlight_diacritics_html(self, text):
        """
        Highlight dfiactitics in the HTML text.
        @param text: the given text
        @type text: unicode.
        @return: the result as HTML.
        @rtype: unicode.
        """        
        hight_text = u""
        lefttag = u"<span class="tashkeel">"
        righttag = u"</span>"
        for i in range(len(text)):
            if text[i] in (araby.FATHA, araby.DAMMA, araby.KASRA, araby.SUKUN):
                if (i&gt;0 and text[i-1] not in (araby.ALEF, 
                araby.ALEF_HAMZA_ABOVE, araby.WAW_HAMZA, araby.ALEF_MADDA,
                 araby.DAL, araby.THAL, araby.WAW, araby.REH, araby.ZAIN,
            araby.SHADDA)) and (i+1%s"%text[i]
                    hight_text += u"".join([lefttag, " ", text[i], righttag])
            else:
                hight_text += text[i]
        return hight_text
github linuxscout / mishkal / support / libqutrub / verb_const.py View on Github external
TenseFuture:(u"ري", DAMMA+KASRA+FATHA), 
TensePassiveFuture:(u"ري", DAMMA+FATHA+FATHA), 
TenseImperative:(u"ءري", FATHA+KASRA+FATHA), 
}
#~ ان يتصرف من باب (عَلِمَ يَعْلَمُ)، 
#~ لا تحذف واوه؛ نحو: وَجِلَ، يَوْجَلُ، 
#~ عدا ثلاثة أفعال هي: (وذر), و(وسع)، و(وطأ)،
 #~ تحذف واوها؛ فنقول: وَذِرَ، يَذَرُ،
# ونقول: وَسِعَ، يَسَعُ، ونقول: وَطِئَ، يَطَأُ.
#إذا ك# الفعل وذر يذر
# KASRA FATHA
IRREGULAR_VERB_CONJUG[u"وذر"+KASRA+FATHA] = {
    CONJUG_BAB:(KASRA, FATHA), 
    TenseFuture:(u"ذر", FATHA+FATHA+DAMMA), 
    TensePassiveFuture:(u"ذر", DAMMA+FATHA+DAMMA), 
    TenseImperative:(u"ذر", FATHA+SUKUN), 
}
# الفعل وَسِعَ يسع
# KASRA FATHA
IRREGULAR_VERB_CONJUG[u"وسع"+KASRA+FATHA] = {
    CONJUG_BAB:(KASRA, FATHA), 
    TenseFuture:(u"سع", FATHA+FATHA+DAMMA), 
    TensePassiveFuture:(u"سع", DAMMA+FATHA+DAMMA), 
    TenseImperative:(u"سع", FATHA+SUKUN), 
}
# الفعل وطئ يطأ
# KASRA FATHA
IRREGULAR_VERB_CONJUG[u"وطء"+KASRA+FATHA] = {
    CONJUG_BAB:(KASRA, FATHA), 
    TenseFuture:(u"طء", FATHA+FATHA+DAMMA), 
    TensePassiveFuture:(u"وطء", DAMMA+SUKUN+FATHA+DAMMA), 
    TenseImperative:(u"طء", FATHA+SUKUN),
github linuxscout / mishkal / mishkal / lib / qalsadi / libqutrub / verb_const.py View on Github external
# أما ما لا تحذف همزته وجوبا مثل سأل وأمر، فلا تعتبر شاذة

# الفعل أكَل يأكُل، كُل
#FATHA, DAMMA
IRREGULAR_VERB_CONJUG[u"ءكل"+FATHA+DAMMA] = {
    CONJUG_BAB:(FATHA, DAMMA), 
    TenseFuture:(u"ءكل", FATHA+SUKUN+DAMMA+DAMMA), 
    TensePassiveFuture:(u"ءكل", DAMMA+SUKUN+FATHA+FATHA), 
    TenseImperative:(u"كل", DAMMA+SUKUN), 
}
#الفعل أخَذَ يأخُذُ، خُذ
#FATHA, DAMMA
IRREGULAR_VERB_CONJUG[u"ءخذ"+FATHA+DAMMA] = {
    CONJUG_BAB:(FATHA, DAMMA), 
    TenseFuture:(u"ءخذ", FATHA+SUKUN+DAMMA+DAMMA), 
    TensePassiveFuture:(u"ءخذ", DAMMA+SUKUN+FATHA+FATHA), 
    TenseImperative:(u"خذ", DAMMA+SUKUN), 
}
#ج- إذا كان يتصرف من باب (مَنَعَ يَمْنَعُ)، 
#~ تحذف واوه, نحو: وَضَعَ، يَضَعُ، وَجَأَ يَجَأُ، وَدَعَ يَدَعُ، وَزَعَ يَزَعُ،
 #~ وَضَأَ يَضَأُ، وَطَأَ يَطَأُ، وَقَعَ يَقَعُ، وَلَغَ يَلَغُ، وَهَبَ يَهَبُ، 
#~ عدا خمسة أفعال هي:
 #~ (وَبَأ)، و(وَبَهَ)، و(وَجَعَ)، و(وَسَعَ)، و(وَهَلَ)، 
#~ فلا تحذف منها الواو؛ فنقول: يَوْبَأُ، يَوْبَهُ، يَوْجَعُ، يَوْسَعُ، يَوْهَلُ.
# الأفعال (وَبَأ)، و(وَبَهَ)، و(وَجَعَ)، و(وَسَعَ)، و(وَهَلَ)،#الفعل وبَأ يوبأ
#FATHA FATHA
IRREGULAR_VERB_CONJUG[u"وبء"+FATHA+FATHA] = {
    CONJUG_BAB:(FATHA, FATHA), 
    TenseFuture:(u"وبء", FATHA+SUKUN+FATHA+DAMMA), 
    TensePassiveFuture:(u"وبء", DAMMA+SUKUN+FATHA+DAMMA), 
    TenseImperative:(u"وبء", SUKUN+FATHA+SUKUN), 
}
github linuxscout / mishkal / mishkal / tashkeel / tashkeel.py View on Github external
"""
        Ajust the resulted text after vocalization to correct some case 
        like 'meeting of two queiscents = ألتقاء الساكنين'
        @param text: vocalized text
        @type text: unicode
        @return: ajusted text.
        @rtype: unicode
        """
        # min = > mina
        text = re.sub(ur'\sمِنْ\s+ا', u' مِنَ ا', text)
        # man = > mani
        text = re.sub(ur'\sمَنْ\s+ا', u' مَنِ ا', text)
        #An = > ani
        text = re.sub(ur'\sعَنْ\s+ا', u' عَنِ ا', text)
        #sukun + alef = > kasra +alef
        text = re.sub(ur'\s%s\s+ا'%araby.SUKUN, u' %s ا' % araby.KASRA, text)
        #~ text = re.sub(ur'\s%s\s+ا'%araby.SUKUN, u' %s ا' % araby.SUKUN, text)
        #ajust pounctuation
        text = re.sub(ur" ([.?!, :)”—]($| ))", ur"\1", text)
        #binu = > bin 
        # temporary, to be analysed by syntaxical analyzer
        text = re.sub(ur'\sبْنُ\s', u' بْن ', text)        
        # # # اختصارات مثل حدثنا إلى ثنا وه تكثر في كتب التراث
        # text = re.sub(ur'\seثِنَا\s', u' ثَنَا ', text)        
        return text
github linuxscout / mishkal / mishkal / lib / qalsadi / libqutrub / verb_const.py View on Github external
#HARAKAT = u"%s%s%s%s%s"%(SUKUN, FATHA, DAMMA, KASRA, SHADDA)
HARAKAT = (SUKUN, FATHA, DAMMA, KASRA)
HARAKAT2 = u"".join([ALEF_HARAKA, WAW_HARAKA, YEH_HARAKA, SUKUN, 
          FATHA, DAMMA, KASRA])
HAMZAT_PATTERN = re.compile(u"[%s%s%s%s%s]"%(ALEF_HAMZA_ABOVE, WAW_HAMZA, 
             YEH_HAMZA , HAMZA, ALEF_HAMZA_BELOW), re.UNICODE)
HAMZAT = (ALEF_HAMZA_ABOVE, WAW_HAMZA, YEH_HAMZA , HAMZA, ALEF_HAMZA_BELOW)


LAM_ALEF_PAT = re.compile(u'[\ufef7\ufef9\ufef5]', re.UNICODE)

#uniformate harkat
UNIFORMATE_MARKS_4 = FATHA+SUKUN+FATHA+FATHA
UNIFORMATE_MARKS_5TEH = FATHA+FATHA+SUKUN+FATHA+FATHA
UNIFORMATE_MARKS_5 = KASRA+SUKUN+FATHA+FATHA+FATHA
UNIFORMATE_MARKS_6 = KASRA+SUKUN+FATHA+SUKUN+FATHA+FATHA

BEGIN_WORD = u"^"
END_WORD = u"$"

LONG_HARAKAT = (ALEF_HARAKA, YEH_HARAKA, WAW_HARAKA, ALEF_YEH_HARAKA, 
                  ALEF_WAW_HARAKA)
_F = FATHA
_D = DAMMA
_K = KASRA
_S = SUKUN
_A = ALEF_HARAKA
_W = WAW_HARAKA
_Y = YEH_HARAKA

_AH = ALEF_HARAKA
_YH = YEH_HARAKA
github linuxscout / mishkal / support / qalsadi / libqutrub / classverb.py View on Github external
self.conj_display.add(tense, vconst.PronounAntunna, 
                    conj_ana+NOON+SHADDA+FATHA)
            # indirect conjugation
            # Ana pronoun like conjugation
            elif pronoun in ( vconst.PronounHya, vconst.PronounHuma_f, 
            vconst.PronounHuma, vconst.PronounHum):
                conj_huwa = self.conj_display.get_conj(tense, 
                vconst.PronounHuwa)
                if conj_huwa == u"":
                    conj_huwa = self.conjugate_tense_pronoun(tense, 
                    vconst.PronounHuwa)
                    self.conj_display.add(tense, vconst.PronounHuwa, conj_huwa)
# حالة الفعل مهموز الآخر
                if conj_huwa.endswith(YEH+HAMZA+FATHA) :
                    self.conj_display.add(tense, vconst.PronounHya, 
                    conj_huwa[:-2]+YEH_HAMZA+FATHA+TEH+SUKUN)
                    self.conj_display.add(tense, vconst.PronounHuma_f, 
                    conj_huwa[:-2]+YEH_HAMZA+FATHA+TEH+FATHA+ALEF)
                    self.conj_display.add(tense, vconst.PronounHuma, 
                    conj_huwa[:-2]+YEH_HAMZA+FATHA+ALEF)

                    self.conj_display.add(tense, vconst.PronounHum, 
                    conj_huwa[:-2]+YEH_HAMZA+DAMMA+WAW+ALEF)

                else :
                    self.conj_display.add(tense, vconst.PronounHya, 
                    conj_huwa+TEH+SUKUN)
                    self.conj_display.add(tense, vconst.PronounHuma_f, 
                    conj_huwa+TEH+FATHA+ALEF)
                    self.conj_display.add(tense, vconst.PronounHuma, 
                    conj_huwa+ALEF)
                    if conj_huwa.endswith(KASRA+YEH+FATHA):
github linuxscout / mishkal / support / libqutrub / verb_const.py View on Github external
u'آيد':[u'ءايد'], 
u'آيس':[u'أءيس'], 
}

STANDARD_REPLACEMENT=[
    #-تحويل همزة القطع على الألف بعدها فتحة 
#وهمزة القطع على الألف بعدها سكون إلى ألف ممدودة
( u"".join([ALEF_HAMZA_ABOVE, FATHA, ALEF]), ALEF_MADDA)
, ( u"".join([ALEF_MADDA, FATHA]), ALEF_MADDA)
, ( u"".join([ALEF_MADDA, ALEF]), ALEF_MADDA)
, ( u"".join([ALEF_HAMZA_ABOVE, FATHA, ALEF_HAMZA_ABOVE, SUKUN]), ALEF_MADDA)
, ( u"".join([ALEF_HAMZA_ABOVE, FATHA, ALEF_HAMZA_ABOVE, FATHA]), ALEF_MADDA)
, ( u"".join([ALEF_HAMZA_ABOVE, DAMMA, WAW_HAMZA, SUKUN]), ALEF_HAMZA_ABOVE+DAMMA+WAW)
, ( u"".join([YEH, SHADDA, FATHA, ALEF_MAKSURA]), YEH+SHADDA+FATHA+ALEF)
# إدغام النون الساكنة
, ( u"".join([NOON, SUKUN, NOON]), NOON+SHADDA)
# إذا كان الحرف الأول ساكنا وبعده شدة، ثم أضيفت إليه الألف
, ( u"".join([SUKUN, SHADDA]), SHADDA)
##  معالجة ألف التفريق
, ( ALEF_WASLA, ALEF)
##  معالجة ألف التفريق
, ( ALEF_MAMDUDA, ALEF)
github linuxscout / alyahmor / alyahmor / verb_affixer.py View on Github external
        @param verb: verb found in dictionary.
        @type verb: unicode.
        @param proclitic: first level prefix.
        @type proclitic: unicode.
        @param enclitic: first level suffix.
        @type enclitic: unicode.
        @return: (vocalized word, semivocalized).
        @rtype: (unicode, unicode).
        """
        #~ print(verb.encode('utf8'))
        # لمعالجة حالة ألف التفريق
        if enclitic and verb.endswith(ar.WAW + ar.ALEF):
            verb = verb[:-1]
        if enclitic and verb.endswith(ar.ALEF_MAKSURA):
            verb = verb[:-1] + ar.ALEF
        if enclitic and verb.endswith(ar.TEH+ar.DAMMA + ar.MEEM+ ar.SUKUN):
            verb  = verb[:-1] + ar.DAMMA + ar.WAW
        if enclitic and verb.endswith(ar.TEH+ar.DAMMA + ar.MEEM):
            verb += ar.DAMMA + ar.WAW
        word_tuple_list =[]
        #~ enclitic_voc = SVC.COMP_SUFFIX_LIST_TAGS[enclitic]["vocalized"][0]
        #~ enclitic_voc = self.get_enclitic_variant(verb, enclitic_voc)
        #~ proclitic_voc = SVC.COMP_PREFIX_LIST_TAGS[proclitic]["vocalized"][0]
        #suffix_voc = suffix #CONJ_SUFFIX_LIST_TAGS[suffix]["vocalized"][0]
            
        for proclitic_voc in SVC.COMP_PREFIX_LIST_TAGS[proclitic]["vocalized"]:
            for enclitic_voc in SVC.COMP_SUFFIX_LIST_TAGS[enclitic]["vocalized"]:
                enclitic_voc = self.get_enclitic_variant(verb, enclitic_voc)
                vocalized = ''.join([proclitic_voc, verb, enclitic_voc])
                semivocalized = ''.join(
            [proclitic_voc, ar.strip_lastharaka(verb), enclitic_voc])
                word_tuple_list.append((vocalized, semivocalized))
github linuxscout / mishkal / mishkal / lib / qalsadi / libqutrub / verb_const.py View on Github external
TenseFuture:(u"ري", DAMMA+KASRA+FATHA), 
TensePassiveFuture:(u"ري", DAMMA+FATHA+FATHA), 
TenseImperative:(u"ءري", FATHA+KASRA+FATHA), 
}
#~ ان يتصرف من باب (عَلِمَ يَعْلَمُ)، 
#~ لا تحذف واوه؛ نحو: وَجِلَ، يَوْجَلُ، 
#~ عدا ثلاثة أفعال هي: (وذر), و(وسع)، و(وطأ)،
 #~ تحذف واوها؛ فنقول: وَذِرَ، يَذَرُ،
# ونقول: وَسِعَ، يَسَعُ، ونقول: وَطِئَ، يَطَأُ.
#إذا ك# الفعل وذر يذر
# KASRA FATHA
IRREGULAR_VERB_CONJUG[u"وذر"+KASRA+FATHA] = {
    CONJUG_BAB:(KASRA, FATHA), 
    TenseFuture:(u"ذر", FATHA+FATHA+DAMMA), 
    TensePassiveFuture:(u"ذر", DAMMA+FATHA+DAMMA), 
    TenseImperative:(u"ذر", FATHA+SUKUN), 
}
# الفعل وَسِعَ يسع
# KASRA FATHA
IRREGULAR_VERB_CONJUG[u"وسع"+KASRA+FATHA] = {
    CONJUG_BAB:(KASRA, FATHA), 
    TenseFuture:(u"سع", FATHA+FATHA+DAMMA), 
    TensePassiveFuture:(u"سع", DAMMA+FATHA+DAMMA), 
    TenseImperative:(u"سع", FATHA+SUKUN), 
}
# الفعل وطئ يطأ
# KASRA FATHA
IRREGULAR_VERB_CONJUG[u"وطء"+KASRA+FATHA] = {
    CONJUG_BAB:(KASRA, FATHA), 
    TenseFuture:(u"طء", FATHA+FATHA+DAMMA), 
    TensePassiveFuture:(u"وطء", DAMMA+SUKUN+FATHA+DAMMA), 
    TenseImperative:(u"طء", FATHA+SUKUN),