How to use the pyarabic.araby.WAW_HAMZA function in PyArabic

To help you get started, we’ve selected a few PyArabic examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github linuxscout / arramooz / scripts / verbs / verbdict_functions.py View on Github external
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#  
#  You should have received a copy of the GNU General Public License
#  along with this program; if not, write to the Free Software
#  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
#  MA 02110-1301, USA.
#  
#  

import re
import time
import pyarabic.araby as araby

STAMP_PAT = re.compile(u"[%s%s%s%s%s%s%s%s%s%s]"% (araby.ALEF, 
        araby.YEH, araby.HAMZA, araby.ALEF_HAMZA_ABOVE, araby.WAW_HAMZA,
         araby.YEH_HAMZA, araby.WAW, araby.ALEF_MAKSURA, araby.ALEF_MADDA, araby.SHADDA), 
         re.UNICODE)   
def decode_tenses(field):
    """
    Decode tenses field
    """
    all=False;
    past=False;
    future=False;
    passive=False;
    imperative=False;
    future_moode=False;
    confirmed=False;
    if field==u"يعملان":
        all=True;
    else:
github linuxscout / mishkal / mishkal / lib / qalsadi / libqutrub / verb_const.py View on Github external
# table of conversion if التقاء الساكنين
CONVERSION_TABLE = {
    ALEF_YEH_HARAKA:        KASRA, 
    ALEF_WAW_HARAKA:         DAMMA, 
    WAW_HARAKA:                DAMMA, 
    YEH_HARAKA :            KASRA, 
    ALTERNATIVE_YEH_HARAKA:    DAMMA, 
}
##WAW_MAKSURA = WAW

#HARAKAT = u"%s%s%s%s%s"%(SUKUN, FATHA, DAMMA, KASRA, SHADDA)
HARAKAT = (SUKUN, FATHA, DAMMA, KASRA)
HARAKAT2 = u"".join([ALEF_HARAKA, WAW_HARAKA, YEH_HARAKA, SUKUN, 
          FATHA, DAMMA, KASRA])
HAMZAT_PATTERN = re.compile(u"[%s%s%s%s%s]"%(ALEF_HAMZA_ABOVE, WAW_HAMZA, 
             YEH_HAMZA , HAMZA, ALEF_HAMZA_BELOW), re.UNICODE)
HAMZAT = (ALEF_HAMZA_ABOVE, WAW_HAMZA, YEH_HAMZA , HAMZA, ALEF_HAMZA_BELOW)


LAM_ALEF_PAT = re.compile(u'[\ufef7\ufef9\ufef5]', re.UNICODE)

#uniformate harkat
UNIFORMATE_MARKS_4 = FATHA+SUKUN+FATHA+FATHA
UNIFORMATE_MARKS_5TEH = FATHA+FATHA+SUKUN+FATHA+FATHA
UNIFORMATE_MARKS_5 = KASRA+SUKUN+FATHA+FATHA+FATHA
UNIFORMATE_MARKS_6 = KASRA+SUKUN+FATHA+SUKUN+FATHA+FATHA

BEGIN_WORD = u"^"
END_WORD = u"$"

LONG_HARAKAT = (ALEF_HARAKA, YEH_HARAKA, WAW_HARAKA, ALEF_YEH_HARAKA,
github linuxscout / mishkal / support / libqutrub / verb_const.py View on Github external
CONVERSION_TABLE = {
    ALEF_YEH_HARAKA:        KASRA, 
    ALEF_WAW_HARAKA:         DAMMA, 
    WAW_HARAKA:                DAMMA, 
    YEH_HARAKA :            KASRA, 
    ALTERNATIVE_YEH_HARAKA:    DAMMA, 
}
##WAW_MAKSURA = WAW

#HARAKAT = u"%s%s%s%s%s"%(SUKUN, FATHA, DAMMA, KASRA, SHADDA)
HARAKAT = (SUKUN, FATHA, DAMMA, KASRA)
HARAKAT2 = u"".join([ALEF_HARAKA, WAW_HARAKA, YEH_HARAKA, SUKUN, 
          FATHA, DAMMA, KASRA])
HAMZAT_PATTERN = re.compile(u"[%s%s%s%s%s]"%(ALEF_HAMZA_ABOVE, WAW_HAMZA, 
             YEH_HAMZA , HAMZA, ALEF_HAMZA_BELOW), re.UNICODE)
HAMZAT = (ALEF_HAMZA_ABOVE, WAW_HAMZA, YEH_HAMZA , HAMZA, ALEF_HAMZA_BELOW)


LAM_ALEF_PAT = re.compile(u'[\ufef7\ufef9\ufef5]', re.UNICODE)

#uniformate harkat
UNIFORMATE_MARKS_4 = FATHA+SUKUN+FATHA+FATHA
UNIFORMATE_MARKS_5TEH = FATHA+FATHA+SUKUN+FATHA+FATHA
UNIFORMATE_MARKS_5 = KASRA+SUKUN+FATHA+FATHA+FATHA
UNIFORMATE_MARKS_6 = KASRA+SUKUN+FATHA+SUKUN+FATHA+FATHA

BEGIN_WORD = u"^"
END_WORD = u"$"

LONG_HARAKAT = (ALEF_HARAKA, YEH_HARAKA, WAW_HARAKA, ALEF_YEH_HARAKA, 
                  ALEF_WAW_HARAKA)
_F = FATHA
github linuxscout / mishkal / support / libqutrub / conjugatedisplay.py View on Github external
def highlight_diacritics_html(self, text):
        """
        Highlight dfiactitics in the HTML text.
        @param text: the given text
        @type text: unicode.
        @return: the result as HTML.
        @rtype: unicode.
        """        
        hight_text = u""
        lefttag = u"<span class="tashkeel">"
        righttag = u"</span>"
        for i in range(len(text)):
            if text[i] in (araby.FATHA, araby.DAMMA, araby.KASRA, araby.SUKUN):
                if (i&gt;0 and text[i-1] not in (araby.ALEF, 
                araby.ALEF_HAMZA_ABOVE, araby.WAW_HAMZA, araby.ALEF_MADDA,
                 araby.DAL, araby.THAL, araby.WAW, araby.REH, araby.ZAIN,
            araby.SHADDA)) and (i+1%s"%text[i]
                    hight_text += u"".join([lefttag, " ", text[i], righttag])
            else:
                hight_text += text[i]
        return hight_text
github linuxscout / mishkal / mishkal / tashkeel / unknown_tashkeel.py View on Github external
"""
    vocalize a foreign names written in arabic
    @param word: given word
    @type  word:  unicode
    @return: the vocalized word
    @rtype: unicode
    """
    marks =[]
    previous = ""
    for c in word:
        if previous and not previous == araby.ALEF:
            #--------- add Harakat before letter
            if  c in (araby.ALEF, araby.ALEF_MAKSURA, araby.TEH_MARBUTA,):
                marks.pop()
                marks.append(araby.FATHA)
            elif c in (araby.WAW, araby.WAW_HAMZA):
                marks.pop()
                marks.append(araby.DAMMA)
            elif  c in( araby.YEH , araby.YEH_HAMZA ):
                marks.pop()
                marks.append(araby.KASRA)
        #--------- add Harakat before letter
        if c in (araby.ALEF_HAMZA_BELOW):
                marks.append(araby.KASRA)
        elif previous in (araby.ALEF_HAMZA_BELOW, araby.ALEF_HAMZA_ABOVE):
                marks.append(araby.SUKUN)
        elif previous in (araby.ALEF, araby.YEH, araby.WAW):
                if c == araby.YEH_HAMZA : 
                    marks.append(araby.KASRA)
        else:
                marks.append(araby.NOT_DEF_HARAKA)
        previous = c
github linuxscout / arramooz / scripts / nouns / noundict_functions.py View on Github external
#  $Author: Taha Zerrouki $
#  $Revision: 0.7 $
#  $Source: arabtechies.sourceforge.net
#
#***********************************************************************/




import sys
import re
import time
import pyarabic.araby as araby
# treat the root, strip extra characters
stamp_pat = re.compile(u"[%s%s%s%s%s%s%s%s%s]"% (araby.ALEF, 
araby.YEH, araby.HAMZA, araby.ALEF_HAMZA_ABOVE, araby.WAW_HAMZA,
 araby.YEH_HAMZA, araby.WAW, araby.ALEF_MAKSURA, araby.SHADDA), 
 re.UNICODE)

def word_stamp(word):
    """
    generate a stamp for a word, 
    remove all letters which can change form in the word :
        - ALEF, 
        - HAMZA, 
        - YEH, 
        - WAW, 
        - ALEF_MAKSURA
        - SHADDA
    @return: stamped word
    """
    # strip the last letter if is doubled
github linuxscout / mishkal / support / libqutrub / verb_const.py View on Github external
u'آهل':[u'أءهل'], 
u'آوب':[u'ءاوب'], 
u'آوى':[u'أءوى'], 
u'آيد':[u'ءايد'], 
u'آيس':[u'أءيس'], 
}

STANDARD_REPLACEMENT=[
    #-تحويل همزة القطع على الألف بعدها فتحة 
#وهمزة القطع على الألف بعدها سكون إلى ألف ممدودة
( u"".join([ALEF_HAMZA_ABOVE, FATHA, ALEF]), ALEF_MADDA)
, ( u"".join([ALEF_MADDA, FATHA]), ALEF_MADDA)
, ( u"".join([ALEF_MADDA, ALEF]), ALEF_MADDA)
, ( u"".join([ALEF_HAMZA_ABOVE, FATHA, ALEF_HAMZA_ABOVE, SUKUN]), ALEF_MADDA)
, ( u"".join([ALEF_HAMZA_ABOVE, FATHA, ALEF_HAMZA_ABOVE, FATHA]), ALEF_MADDA)
, ( u"".join([ALEF_HAMZA_ABOVE, DAMMA, WAW_HAMZA, SUKUN]), ALEF_HAMZA_ABOVE+DAMMA+WAW)
, ( u"".join([YEH, SHADDA, FATHA, ALEF_MAKSURA]), YEH+SHADDA+FATHA+ALEF)
# إدغام النون الساكنة
, ( u"".join([NOON, SUKUN, NOON]), NOON+SHADDA)
# إذا كان الحرف الأول ساكنا وبعده شدة، ثم أضيفت إليه الألف
, ( u"".join([SUKUN, SHADDA]), SHADDA)
##  معالجة ألف التفريق
, ( ALEF_WASLA, ALEF)
##  معالجة ألف التفريق
, ( ALEF_MAMDUDA, ALEF)
github linuxscout / mishkal / support / libqutrub / verb_const.py View on Github external
_A = ALEF_HARAKA
_W = WAW_HARAKA
_Y = YEH_HARAKA

_AH = ALEF_HARAKA
_YH = YEH_HARAKA
_WH = WAW_HARAKA
_AYH = ALEF_YEH_HARAKA
_AWH = ALEF_WAW_HARAKA
_YHALT = ALTERNATIVE_YEH_HARAKA
#HAMZAT
_AHA = ALEF_HAMZA_ABOVE
_AHB = ALEF_HAMZA_BELOW
_AM = ALEF_MADDA
_YHA = YEH_HAMZA
_WHA = WAW_HAMZA
_HZ = HAMZA


INITIAL_TAHMEEZ_TABLE = {_S:_HZ, _F:_AHA, _D:_AHA, _K:_AHB, _AH:_AM , 
            _WH:_AHA, _YH:_AHB, _YHALT:_AHB}


MIDDLE_TAHMEEZ_TABLE = {
_S: {_S:_HZ, _F:_AHA, _D:_WHA, _K:_YHA, _AH:_AHA, _WH:_WHA, _YH:_YHA }, 
_F: {_S:_AHA, _F:_AHA, _D:_WHA, _K:_YHA, _AH:_AHA, _WH:_WHA, _YH:_YHA }, 
_D: {_S:_WHA, _F:_WHA, _D:_WHA, _K:_YHA, _AH:_WHA, _WH:_WHA, _YH:_YHA }, 
_K: {_S:_YHA, _F:_YHA, _D:_YHA, _K:_YHA, _AH:_YHA, _WH:_YHA, _YH:_YHA }, 
_AH: {_S:_HZ, _F:_HZ, _D:_WHA, _K:_YHA, _AH:_HZ, _WH:_WHA, _YH:_YHA }, 
_WH: {_S:_HZ, _F:_HZ, _D:_WHA, _K:_YHA, _AH:_HZ, _WH:_WHA, _YH:_YHA }, 
_YH: {_S:_YHA, _F:_YHA, _D:_YHA, _K:_YHA, _AH:_YHA, _WH:_YHA, _YH:_YHA }, 
}
github linuxscout / mishkal / mishkal / lib / qalsadi / libqutrub / verb_const.py View on Github external
u'آهل':[u'أءهل'], 
u'آوب':[u'ءاوب'], 
u'آوى':[u'أءوى'], 
u'آيد':[u'ءايد'], 
u'آيس':[u'أءيس'], 
}

STANDARD_REPLACEMENT=[
    #-تحويل همزة القطع على الألف بعدها فتحة 
#وهمزة القطع على الألف بعدها سكون إلى ألف ممدودة
( u"".join([ALEF_HAMZA_ABOVE, FATHA, ALEF]), ALEF_MADDA)
, ( u"".join([ALEF_MADDA, FATHA]), ALEF_MADDA)
, ( u"".join([ALEF_MADDA, ALEF]), ALEF_MADDA)
, ( u"".join([ALEF_HAMZA_ABOVE, FATHA, ALEF_HAMZA_ABOVE, SUKUN]), ALEF_MADDA)
, ( u"".join([ALEF_HAMZA_ABOVE, FATHA, ALEF_HAMZA_ABOVE, FATHA]), ALEF_MADDA)
, ( u"".join([ALEF_HAMZA_ABOVE, DAMMA, WAW_HAMZA, SUKUN]), ALEF_HAMZA_ABOVE+DAMMA+WAW)
, ( u"".join([YEH, SHADDA, FATHA, ALEF_MAKSURA]), YEH+SHADDA+FATHA+ALEF)
# إدغام النون الساكنة
, ( u"".join([NOON, SUKUN, NOON]), NOON+SHADDA)
# إذا كان الحرف الأول ساكنا وبعده شدة، ثم أضيفت إليه الألف
, ( u"".join([SUKUN, SHADDA]), SHADDA)
##  معالجة ألف التفريق
, ( ALEF_WASLA, ALEF)
##  معالجة ألف التفريق
, ( ALEF_MAMDUDA, ALEF)
github linuxscout / mishkal / mishkal / lib / qalsadi / libqutrub / verb_const.py View on Github external
_A = ALEF_HARAKA
_W = WAW_HARAKA
_Y = YEH_HARAKA

_AH = ALEF_HARAKA
_YH = YEH_HARAKA
_WH = WAW_HARAKA
_AYH = ALEF_YEH_HARAKA
_AWH = ALEF_WAW_HARAKA
_YHALT = ALTERNATIVE_YEH_HARAKA
#HAMZAT
_AHA = ALEF_HAMZA_ABOVE
_AHB = ALEF_HAMZA_BELOW
_AM = ALEF_MADDA
_YHA = YEH_HAMZA
_WHA = WAW_HAMZA
_HZ = HAMZA


INITIAL_TAHMEEZ_TABLE = {_S:_HZ, _F:_AHA, _D:_AHA, _K:_AHB, _AH:_AM , 
            _WH:_AHA, _YH:_AHB, _YHALT:_AHB}


MIDDLE_TAHMEEZ_TABLE = {
_S: {_S:_HZ, _F:_AHA, _D:_WHA, _K:_YHA, _AH:_AHA, _WH:_WHA, _YH:_YHA }, 
_F: {_S:_AHA, _F:_AHA, _D:_WHA, _K:_YHA, _AH:_AHA, _WH:_WHA, _YH:_YHA }, 
_D: {_S:_WHA, _F:_WHA, _D:_WHA, _K:_YHA, _AH:_WHA, _WH:_WHA, _YH:_YHA }, 
_K: {_S:_YHA, _F:_YHA, _D:_YHA, _K:_YHA, _AH:_YHA, _WH:_YHA, _YH:_YHA }, 
_AH: {_S:_HZ, _F:_HZ, _D:_WHA, _K:_YHA, _AH:_HZ, _WH:_WHA, _YH:_YHA }, 
_WH: {_S:_HZ, _F:_HZ, _D:_WHA, _K:_YHA, _AH:_HZ, _WH:_WHA, _YH:_YHA }, 
_YH: {_S:_YHA, _F:_YHA, _D:_YHA, _K:_YHA, _AH:_YHA, _WH:_YHA, _YH:_YHA }, 
}