Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
"""If the word1 is like a wazn (pattern),
the letters must be equal,
the wazn has FEH, AIN, LAM letters.
this are as generic letters.
The two words can be full vocalized, or partial vocalized
@param word1: input word
@type word1: unicode
@param wazn: given word template وزن
@type wazn: unicode
@return: if two words have similar vocalization
@rtype: Boolean
"""
stack1 = stack.Stack(word1)
stack2 = stack.Stack(wazn)
root = stack.Stack()
last1 = stack1.pop()
last2 = stack2.pop()
vowels = HARAKAT
while last1 != None and last2 != None:
if last1 == last2 and last2 not in (FEH, AIN, LAM):
last1 = stack1.pop()
last2 = stack2.pop()
elif last1 not in vowels and last2 in (FEH, AIN, LAM):
root.push(last1)
# ~ print "t"
last1 = stack1.pop()
last2 = stack2.pop()
elif last1 in vowels and last2 not in vowels:
last1 = stack1.pop()
elif last1 not in vowels and last2 in vowels:
last2 = stack2.pop()
@type partial: unicode
@param fully: the fully vocalized word
@type fully: unicode
@return: if contains shadda
@rtype: Boolean
"""
# المدخل ليس به شدة، لا داعي للبحث
if not has_shadda(partial):
return True
# المدخل به شدة، والنتيجة ليس بها شدة، خاطئ
elif not has_shadda(fully) and has_shadda(partial):
return False
# المدخل والمخرج بهما شدة، نتأكد من موقعهما
partial = strip_harakat(partial)
fully = strip_harakat(fully)
pstack = stack.Stack(partial)
vstack = stack.Stack(fully)
plast = pstack.pop()
vlast = vstack.pop()
# if debug: print "+0", Pstack, Vstack
while plast != None and vlast != None:
if plast == vlast:
plast = pstack.pop()
vlast = vstack.pop()
elif plast == SHADDA and vlast != SHADDA:
# if debug: print "+2", Pstack.items, Plast, Vstack.items, Vlast
break
elif plast != SHADDA and vlast == SHADDA:
# if debug: print "+2", Pstack.items, Plast, Vstack.items, Vlast
vlast = vstack.pop()
else:
# if debug: print "+2", Pstack.items, Plast, Vstack.items, Vlast
def vocalized_similarity(word1, word2):
"""
if the two words has the same letters and the same harakats, this function return True.
The two words can be full vocalized, or partial vocalized
@param word1: first word
@type word1: unicode
@param word2: second word
@type word2: unicode
@return: return if words are similar, else return negative number of errors
@rtype: Boolean / int
"""
stack1 = stack.Stack(word1)
stack2 = stack.Stack(word2)
last1 = stack1.pop()
last2 = stack2.pop()
err_count = 0
vowels = HARAKAT
while last1 != None and last2 != None:
if last1 == last2:
last1 = stack1.pop()
last2 = stack2.pop()
elif last1 in vowels and last2 not in vowels:
last1 = stack1.pop()
elif last1 not in vowels and last2 in vowels:
last2 = stack2.pop()
else:
# break
if last1 == SHADDA:
"""
separate the letters from the vowels, in arabic word,
if a letter hasn't a haraka, the not definited haraka is attributed.
return ( letters, vowels)
@param word: the input word
@type word: unicode
@param extract_shadda: extract shadda as seperate text
@type extract_shadda: Boolean
@return: ( letters, vowels)
@rtype:couple of unicode
"""
stack1 = stack.Stack(word)
# the word is inversed in the stack
stack1.items.reverse()
letters = stack.Stack()
marks = stack.Stack()
vowels = HARAKAT
last1 = stack1.pop()
# if the last element must be a letter,
# the arabic word can't starts with a haraka
# in th stack the word is inversed
while last1 in vowels:
last1 = stack1.pop()
while last1 != None:
if last1 in vowels:
# we can't have two harakats beside.
# the shadda is considered as a letter
marks.pop()
marks.push(last1)
elif last1 == SHADDA:
# is the element is a Shadda,
# the previous letter must have a sukun as mark,
def vocalized_similarity(word1, word2):
"""
if the two words has the same letters and the same harakats, this function return True.
The two words can be full vocalized, or partial vocalized
@param word1: first word
@type word1: unicode
@param word2: second word
@type word2: unicode
@return: return if words are similar, else return negative number of errors
@rtype: Boolean / int
"""
stack1 = stack.Stack(word1)
stack2 = stack.Stack(word2)
last1 = stack1.pop()
last2 = stack2.pop()
err_count = 0
vowels = HARAKAT
while last1 != None and last2 != None:
if last1 == last2:
last1 = stack1.pop()
last2 = stack2.pop()
elif last1 in vowels and last2 not in vowels:
last1 = stack1.pop()
elif last1 not in vowels and last2 in vowels:
last2 = stack2.pop()
else:
# break
if last1 == SHADDA:
last1 = stack1.pop()
def joint(letters, marks):
""" joint the letters with the marks
the length ot letters and marks must be equal
return word
@param letters: the word letters
@type letters: unicode
@param marks: the word marks
@type marks: unicode
@return: word
@rtype: unicode
"""
# The length ot letters and marks must be equal
if len(letters) != len(marks):
return ""
stack_letter = stack.Stack(letters)
stack_letter.items.reverse()
stack_mark = stack.Stack(marks)
stack_mark.items.reverse()
word_stack = stack.Stack()
last_letter = stack_letter.pop()
last_mark = stack_mark.pop()
vowels = HARAKAT
while last_letter != None and last_mark != None:
if last_letter == SHADDA:
top = word_stack.pop()
if top not in vowels:
word_stack.push(top)
word_stack.push(last_letter)
if last_mark != NOT_DEF_HARAKA:
word_stack.push(last_mark)
def waznlike(word1, wazn):
"""If the word1 is like a wazn (pattern),
the letters must be equal,
the wazn has FEH, AIN, LAM letters.
this are as generic letters.
The two words can be full vocalized, or partial vocalized
@param word1: input word
@type word1: unicode
@param wazn: given word template وزن
@type wazn: unicode
@return: if two words have similar vocalization
@rtype: Boolean
"""
stack1 = stack.Stack(word1)
stack2 = stack.Stack(wazn)
root = stack.Stack()
last1 = stack1.pop()
last2 = stack2.pop()
vowels = HARAKAT
while last1 != None and last2 != None:
if last1 == last2 and last2 not in (FEH, AIN, LAM):
last1 = stack1.pop()
last2 = stack2.pop()
elif last1 not in vowels and last2 in (FEH, AIN, LAM):
root.push(last1)
# ~ print "t"
last1 = stack1.pop()
last2 = stack2.pop()
elif last1 in vowels and last2 not in vowels:
last1 = stack1.pop()
elif last1 not in vowels and last2 in vowels:
def separate(word, extract_shadda=False):
"""
separate the letters from the vowels, in arabic word,
if a letter hasn't a haraka, the not definited haraka is attributed.
return ( letters, vowels)
@param word: the input word
@type word: unicode
@param extract_shadda: extract shadda as seperate text
@type extract_shadda: Boolean
@return: ( letters, vowels)
@rtype:couple of unicode
"""
stack1 = stack.Stack(word)
# the word is inversed in the stack
stack1.items.reverse()
letters = stack.Stack()
marks = stack.Stack()
vowels = HARAKAT
last1 = stack1.pop()
# if the last element must be a letter,
# the arabic word can't starts with a haraka
# in th stack the word is inversed
while last1 in vowels:
last1 = stack1.pop()
while last1 != None:
if last1 in vowels:
# we can't have two harakats beside.
# the shadda is considered as a letter
marks.pop()