| /* |
| * Authors: |
| * - Assem Chelli, < assem [dot] ch [at] gmail > |
| * - Abdelkrim Aries <ab [underscore] aries [at] esi [dot] dz> |
| * |
| */ |
| |
| stringescapes { } |
| |
| /* the Arabic letters in Unicode */ |
| // Hamza |
| stringdef o hex '621' // Hamza |
| stringdef ao hex '623' // Hamza above Alef |
| stringdef ao_ hex '625' // Hamza below Alef |
| stringdef a~ hex '622' // Alef madda |
| stringdef wo hex '624' // Hamza above waw |
| stringdef yo hex '626' // Hamza above yeh |
| |
| // Letters |
| stringdef a hex '627' // Alef |
| stringdef a_ hex '649' // Alef Maksura |
| stringdef b hex '628' // Beh |
| stringdef t_ hex '629' // Teh_Marbuta |
| stringdef t hex '62a' // Teh |
| stringdef th hex '62b' // Theh |
| stringdef j hex '62c' // Jeem |
| stringdef h hex '62d' // Hah |
| stringdef x hex '62e' // Khah |
| stringdef d hex '62f' // Dal |
| stringdef dz hex '630' // Thal |
| stringdef r hex '631' // Reh |
| stringdef z hex '632' // Zain |
| stringdef s hex '633' // Seen |
| stringdef sh hex '634' // Sheen |
| stringdef c hex '635' // Sad |
| stringdef dh hex '636' // Dad |
| stringdef tt hex '637' // Tah |
| stringdef zh hex '638' // Zah |
| stringdef i hex '639' // Ain |
| stringdef gh hex '63a' // Ghain |
| stringdef f hex '641' // Feh |
| stringdef q hex '642' // Qaf |
| stringdef k hex '643' // Kaf |
| stringdef l hex '644' // Lam |
| stringdef m hex '645' // Meem |
| stringdef n hex '646' // Noon |
| stringdef e hex '647' // Heh |
| stringdef w hex '648' // Waw |
| stringdef y hex '64a' // Yeh |
| |
| // Diacritics |
| stringdef aan hex '64b' // FatHatan |
| stringdef uun hex '64c' // Dammatan |
| stringdef iin hex '64d' // Kasratan |
| stringdef aa hex '64e' // FatHa |
| stringdef uu hex '64f' // Damma |
| stringdef ii hex '650' // Kasra |
| stringdef oo hex '652' // Sukun |
| stringdef ~ hex '651' // Shadda |
| |
| // Hindu–Arabic numerals |
| stringdef 0 hex '0660' |
| stringdef 1 hex '0661' |
| stringdef 2 hex '0662' |
| stringdef 3 hex '0663' |
| stringdef 4 hex '0664' |
| stringdef 5 hex '0665' |
| stringdef 6 hex '0666' |
| stringdef 7 hex '0667' |
| stringdef 8 hex '0668' |
| stringdef 9 hex '0669' |
| stringdef % hex '066a' // PERCENT |
| stringdef . hex '066b' // DECIMAL |
| stringdef ' hex '066c' // THOUSANDS |
| |
| // Kasheeda |
| stringdef _ hex '640' // Kasheeda, Tatweel |
| |
| // Ponctuation marks |
| stringdef , hex '060C' // COMMA |
| stringdef ; hex '061B' // SEMICOLON |
| stringdef ? hex '061F' // QUESTION |
| |
| // Shaped forms |
| stringdef o1 hex 'fe80' // HAMZA |
| stringdef ao1 hex 'fe83' // ALEF_HAMZA_ABOVE |
| stringdef ao2 hex 'fe84' // ALEF_HAMZA_ABOVE |
| stringdef ao_1 hex 'fe87' // ALEF_HAMZA_BELOW |
| stringdef ao_2 hex 'fe88' // ALEF_HAMZA_BELOW |
| stringdef yo1 hex 'fe8b' // YEH_HAMZA |
| stringdef yo2 hex 'fe8c' // YEH_HAMZA |
| stringdef yo3 hex 'fe89' // YEH_HAMZA |
| stringdef yo4 hex 'fe8a' // YEH_HAMZA |
| stringdef a~1 hex 'fe81' // ALEF_MADDA |
| stringdef a~2 hex 'fe82' // ALEF_MADDA |
| stringdef wo1 hex 'fe85' // WAW_HAMZA |
| stringdef wo2 hex 'fe86' // WAW_HAMZA |
| stringdef a1 hex 'fe8d' // ALEF |
| stringdef a2 hex 'fe8e' // ALEF |
| stringdef b1 hex 'fe8f' // BEH |
| stringdef b2 hex 'fe90' // BEH |
| stringdef b3 hex 'fe91' // BEH |
| stringdef b4 hex 'fe92' // BEH |
| stringdef t_1 hex 'fe93' // TEH_MARBUTA |
| stringdef t_2 hex 'fe94' // TEH_MARBUTA |
| stringdef t1 hex 'fe97' // TEH |
| stringdef t2 hex 'fe98' // TEH |
| stringdef t3 hex 'fe95' // TEH |
| stringdef t4 hex 'fe96' // TEH |
| stringdef th1 hex 'fe9b' // THEH |
| stringdef th2 hex 'fe9c' // THEH |
| stringdef th3 hex 'fe9a' // THEH |
| stringdef th4 hex 'fe99' // THEH |
| stringdef j1 hex 'fe9f' // JEEM |
| stringdef j2 hex 'fea0' // JEEM |
| stringdef j3 hex 'fe9d' // JEEM |
| stringdef j4 hex 'fe9e' // JEEM |
| stringdef h1 hex 'fea3' // HAH |
| stringdef h2 hex 'fea4' // HAH |
| stringdef h3 hex 'fea1' // HAH |
| stringdef h4 hex 'fea2' // HAH |
| stringdef x1 hex 'fea7' // KHAH |
| stringdef x2 hex 'fea8' // KHAH |
| stringdef x3 hex 'fea5' // KHAH |
| stringdef x4 hex 'fea6' // KHAH |
| stringdef d1 hex 'fea9' // DAL |
| stringdef d2 hex 'feaa' // DAL |
| stringdef dz1 hex 'feab' // THAL |
| stringdef dz2 hex 'feac' // THAL |
| stringdef r1 hex 'fead' // REH |
| stringdef r2 hex 'feae' // REH |
| stringdef z1 hex 'feaf' // ZAIN |
| stringdef z2 hex 'feb0' // ZAIN |
| stringdef s1 hex 'feb3' // SEEN |
| stringdef s2 hex 'feb4' // SEEN |
| stringdef s3 hex 'feb1' // SEEN |
| stringdef s4 hex 'feb2' // SEEN |
| stringdef sh1 hex 'feb7' // SHEEN |
| stringdef sh2 hex 'feb8' // SHEEN |
| stringdef sh3 hex 'feb5' // SHEEN |
| stringdef sh4 hex 'feb6' // SHEEN |
| stringdef c1 hex 'febb' // SAD |
| stringdef c2 hex 'febc' // SAD |
| stringdef c3 hex 'feb9' // SAD |
| stringdef c4 hex 'feba' // SAD |
| stringdef dh1 hex 'febf' // DAD |
| stringdef dh2 hex 'fec0' // DAD |
| stringdef dh3 hex 'febd' // DAD |
| stringdef dh4 hex 'febe' // DAD |
| stringdef tt1 hex 'fec3' // TAH |
| stringdef tt2 hex 'fec4' // TAH |
| stringdef tt3 hex 'fec1' // TAH |
| stringdef tt4 hex 'fec2' // TAH |
| stringdef zh1 hex 'fec7' // ZAH |
| stringdef zh2 hex 'fec8' // ZAH |
| stringdef zh3 hex 'fec5' // ZAH |
| stringdef zh4 hex 'fec6' // ZAH |
| stringdef i1 hex 'fecb' // AIN |
| stringdef i2 hex 'fecc' // AIN |
| stringdef i3 hex 'fec9' // AIN |
| stringdef i4 hex 'feca' // AIN |
| stringdef gh1 hex 'fecf' // GHAIN |
| stringdef gh2 hex 'fed0' // GHAIN |
| stringdef gh3 hex 'fecd' // GHAIN |
| stringdef gh4 hex 'fece' // GHAIN |
| stringdef f1 hex 'fed3' // FEH |
| stringdef f2 hex 'fed4' // FEH |
| stringdef f3 hex 'fed1' // FEH |
| stringdef f4 hex 'fed2' // FEH |
| stringdef q1 hex 'fed7' // QAF |
| stringdef q2 hex 'fed8' // QAF |
| stringdef q3 hex 'fed5' // QAF |
| stringdef q4 hex 'fed6' // QAF |
| stringdef k1 hex 'fedb' // KAF |
| stringdef k2 hex 'fedc' // KAF |
| stringdef k3 hex 'fed9' // KAF |
| stringdef k4 hex 'feda' // KAF |
| stringdef l1 hex 'fedf' // LAM |
| stringdef l2 hex 'fee0' // LAM |
| stringdef l3 hex 'fedd' // LAM |
| stringdef l4 hex 'fede' // LAM |
| stringdef m1 hex 'fee3' // MEEM |
| stringdef m2 hex 'fee4' // MEEM |
| stringdef m3 hex 'fee1' // MEEM |
| stringdef m4 hex 'fee2' // MEEM |
| stringdef n1 hex 'fee7' // NOON |
| stringdef n2 hex 'fee8' // NOON |
| stringdef n3 hex 'fee5' // NOON |
| stringdef n4 hex 'fee6' // NOON |
| stringdef e1 hex 'feeb' // HEH |
| stringdef e2 hex 'feec' // HEH |
| stringdef e3 hex 'fee9' // HEH |
| stringdef e4 hex 'feea' // HEH |
| stringdef w1 hex 'feed' // WAW |
| stringdef w2 hex 'feee' // WAW |
| stringdef a_1 hex 'feef' // ALEF_MAKSURA |
| stringdef a_2 hex 'fef0' // ALEF_MAKSURA |
| stringdef y1 hex 'fef3' // YEH |
| stringdef y2 hex 'fef4' // YEH |
| stringdef y3 hex 'fef1' // YEH |
| stringdef y4 hex 'fef2' // YEH |
| |
| // Ligatures Lam-Alef |
| stringdef la hex 'fefb' // LAM_ALEF |
| stringdef la2 hex 'fefc' // LAM_ALEF |
| stringdef lao hex 'fef7' // LAM_ALEF_HAMZA_ABOVE |
| stringdef lao2 hex 'fef8' // LAM_ALEF_HAMZA_ABOVE |
| stringdef lao_ hex 'fef9' // LAM_ALEF_HAMZA_BELOW |
| stringdef lao_2 hex 'fefa' // LAM_ALEF_HAMZA_BELOW |
| stringdef la~ hex 'fef5' // LAM_ALEF_MADDA_ABOVE |
| stringdef la~2 hex 'fef6' // LAM_ALEF_MADDA_ABOVE |
| |
| |
| |
| integers ( |
| word_len |
| ) |
| |
| booleans ( |
| is_noun |
| is_verb |
| is_defined |
| ) |
| |
| routines ( |
| Prefix_Step1 |
| Prefix_Step2 |
| Prefix_Step3a_Noun |
| Prefix_Step3b_Noun |
| Prefix_Step3_Verb |
| Prefix_Step4_Verb |
| |
| Suffix_All_alef_maqsura |
| Suffix_Noun_Step1a |
| Suffix_Noun_Step1b |
| Suffix_Noun_Step2a |
| Suffix_Noun_Step2b |
| Suffix_Noun_Step2c1 |
| Suffix_Noun_Step2c2 |
| Suffix_Noun_Step3 |
| Suffix_Verb_Step1 |
| Suffix_Verb_Step2a |
| Suffix_Verb_Step2b |
| Suffix_Verb_Step2c |
| |
| Normalize_post |
| Normalize_pre |
| |
| Checks1 |
| ) |
| |
| externals ( stem ) |
| |
| groupings ( ) |
| |
| |
| // Normalizations |
| define Normalize_pre as ( |
| loop len ( |
| ( |
| [substring] among ( |
| '{aan}' '{uun}' '{iin}' '{aa}' '{uu}' '{ii}' '{oo}' '{~}'( delete ) // strip vocalization |
| '{_}' ( delete ) // strip kasheeda |
| |
| // Ponctuation marks |
| '.' ',' ';' ':' '?' '!' '/' '*' '%' '\' '"' ( delete) // General |
| '{,}' '{;}' '{?}' ( delete ) // Arabic-specific |
| |
| // Hindu–Arabic numerals |
| '{0}' ( <- '0') |
| '{1}' ( <- '1') |
| '{2}' ( <- '2') |
| '{3}' ( <- '3') |
| '{4}' ( <- '4') |
| '{5}' ( <- '5') |
| '{6}' ( <- '6') |
| '{7}' ( <- '7') |
| '{8}' ( <- '8') |
| '{9}' ( <- '9') |
| '{%}' '{.}' '{'}' ( delete ) |
| |
| // Shaped forms |
| '{o1}' ( <- '{o}' ) // HAMZA |
| '{ao1}' '{ao2}' ( <- '{ao}' ) // ALEF_HAMZA_ABOVE |
| '{ao_1}' '{ao_2}' ( <- '{ao_}' ) // ALEF_HAMZA_BELOW |
| '{yo1}' '{yo2}' '{yo3}' '{yo4}' ( <- '{yo}' ) // YEH_HAMZA |
| '{a~1}' '{a~2}'( <- '{a~}' ) // ALEF_MADDA |
| '{wo1}' '{wo2}'( <- '{wo}' ) // WAW_HAMZA |
| '{a1}' '{a2}' ( <- '{a}' ) // ALEF |
| '{b1}' '{b2}' '{b3}' '{b4}' ( <- '{b}' ) // BEH |
| '{t_1}' '{t_2}' ( <- '{t_}' ) // TEH_MARBUTA |
| '{t1}' '{t2}' '{t3}' '{t4}' ( <- '{t}' ) // TEH |
| '{th1}' '{th2}' '{th3}' '{th4}' ( <- '{th}' ) // THEH |
| '{j1}' '{j2}' '{j3}' '{j4}'( <- '{j}' ) // JEEM |
| '{h1}' '{h2}' '{h3}' '{h4}' ( <- '{h}' ) // HAH |
| '{x1}' '{x2}' '{x3}' '{x4}'( <- '{x}' ) // KHAH |
| '{d1}' '{d2}' ( <- '{d}' ) // DAL |
| '{dz1}''{dz2}' ( <- '{dz}' ) // THAL |
| '{r1}' '{r2}'( <- '{r}' ) // REH |
| '{z1}' '{z2}' ( <- '{z}' ) // ZAIN |
| '{s1}' '{s2}' '{s3}' '{s4}'( <- '{s}' ) // SEEN |
| '{sh1}' '{sh2}' '{sh3}' '{sh4}' ( <- '{sh}' ) // SHEEN |
| '{c1}' '{c2}' '{c3}' '{c4}'( <- '{c}' ) // SAD |
| '{dh1}' '{dh2}' '{dh3}' '{dh4}'( <- '{dh}' ) // DAD |
| '{tt1}' '{tt2}' '{tt3}' '{tt4}' ( <- '{tt}' ) // TAH |
| '{zh1}' '{zh2}' '{zh3}' '{zh4}'( <- '{zh}' ) // ZAH |
| '{i1}' '{i2}' '{i3}' '{i4}'( <- '{i}' ) // AIN |
| '{gh1}' '{gh2}' '{gh3}' '{gh4}'( <- '{gh}' ) // GHAIN |
| '{f1}' '{f2}' '{f3}' '{f4}' ( <- '{f}' ) // FEH |
| '{q1}' '{q2}' '{q3}' '{q4}' ( <- '{q}' ) // QAF |
| '{k1}' '{k2}' '{k3}' '{k4}'( <- '{k}' ) // KAF |
| '{l1}' '{l2}' '{l3}' '{l4}'( <- '{l}' ) // LAM |
| '{m1}' '{m2}' '{m3}' '{m4}' ( <- '{m}' ) // MEEM |
| '{n1}' '{n2}' '{n3}' '{n4}'( <- '{n}' ) // NOON |
| '{e1}' '{e2}' '{e3}' '{e4}' ( <- '{e}' ) // HEH |
| '{w1}' '{w2}' ( <- '{w}' ) // WAW |
| '{a_1}' '{a_2}' ( <- '{a_}' ) // ALEF_MAKSURA |
| '{y1}' '{y2}' '{y3}' '{y4}' ( <- '{y}' ) // YEH |
| |
| // Ligatures Lam-Alef |
| '{la}' '{la2}' (<- '{l}{a}') |
| '{lao}' '{lao2}' (<- '{l}{ao}') |
| '{lao_}' '{lao_2}' (<- '{l}{ao_}') |
| '{la~}' '{la~2}' (<- '{l}{a~}') |
| |
| ) |
| ) |
| or |
| next |
| ) |
| ) |
| |
| define Normalize_post as ( |
| |
| do ( |
| // normalize last hamza |
| backwards ( |
| [substring] among ( |
| '{ao}''{ao_}' '{a~}' ( <- '{o}') |
| '{wo}' ( <- '{o}') |
| '{yo}' ( <- '{o}') |
| ) |
| ) |
| ) |
| |
| do loop word_len ( |
| ( |
| // normalize other hamza's |
| [substring] among ( |
| '{ao}''{ao_}' '{a~}' ( <- '{a}') |
| '{wo}' ( <- '{w}') |
| '{yo}' ( <- '{y}') |
| ) |
| ) |
| or |
| next |
| ) |
| ) |
| |
| // Checks |
| define Checks1 as ( |
| $word_len = len |
| [substring] among ( |
| '{b}{a}{l}' '{k}{a}{l}' ($word_len > 4 set is_noun unset is_verb set is_defined) |
| '{l}{l}' '{a}{l}' ($word_len > 3 set is_noun unset is_verb set is_defined) |
| ) |
| ) |
| |
| |
| //prefixes |
| define Prefix_Step1 as ( |
| $word_len = len |
| [substring] among ( |
| '{ao}{ao}' ($word_len > 3 <- '{ao}' ) |
| '{ao}{a~}' ($word_len > 3 <- '{a~}' ) |
| '{ao}{wo}' ($word_len > 3 <- '{ao}' ) |
| '{ao}{a}' ($word_len > 3 <- '{a}' ) |
| '{ao}{ao_}' ($word_len > 3 <- '{ao_}' ) |
| // '{ao}' ($word_len > 3 delete) //rare case |
| ) |
| ) |
| |
| define Prefix_Step2 as ( |
| $word_len = len |
| not '{f}{a}' |
| not '{w}{a}' |
| [substring] among ( |
| '{f}' ($word_len > 3 delete) |
| '{w}' ($word_len > 3 delete) |
| ) |
| ) |
| |
| define Prefix_Step3a_Noun as ( // it is noun and defined |
| $word_len = len |
| [substring] among ( |
| '{b}{a}{l}' '{k}{a}{l}' ($word_len > 5 delete) |
| '{l}{l}' '{a}{l}' ($word_len > 4 delete) |
| ) |
| ) |
| |
| define Prefix_Step3b_Noun as ( // probably noun and defined |
| $word_len = len |
| not '{b}{a}' // exception |
| [substring] among ( |
| '{b}' ($word_len > 3 delete) |
| // '{k}' '{l}' ($word_len > 3 delete) // BUG: cause confusion |
| '{b}{b}' ($word_len > 3 <- '{b}' ) |
| '{k}{k}' ($word_len > 3 <- '{k}' ) |
| ) |
| |
| ) |
| |
| define Prefix_Step3_Verb as ( |
| $word_len = len |
| [substring] among ( |
| //'{s}' ($word_len > 4 delete)// BUG: cause confusion |
| '{s}{y}' ($word_len > 4 <- '{y}' ) |
| '{s}{t}' ($word_len > 4 <- '{t}') |
| '{s}{n}' ($word_len > 4 <- '{n}') |
| '{s}{ao}' ($word_len > 4 <- '{ao}') |
| ) |
| ) |
| |
| define Prefix_Step4_Verb as ( |
| $word_len = len |
| [substring] among ( |
| '{y}{s}{t}' '{n}{s}{t}' '{t}{s}{t}' ($word_len > 4 set is_verb unset is_noun <- '{a}{s}{t}' ) |
| ) |
| ) |
| |
| // suffixes |
| backwardmode ( |
| |
| define Suffix_Noun_Step1a as ( |
| $word_len = len |
| [substring] among ( |
| '{y}' '{k}' '{e}' ($word_len >= 4 delete) |
| '{n}{a}' '{k}{m}' '{e}{a}' '{e}{n}' '{e}{m}' ($word_len >= 5 delete) |
| '{k}{m}{a}' '{e}{m}{a}' ($word_len >= 6 delete) |
| ) |
| ) |
| define Suffix_Noun_Step1b as ( |
| $word_len = len |
| [substring] among ( |
| '{n}' ($word_len > 5 delete) |
| ) |
| ) |
| |
| define Suffix_Noun_Step2a as ( |
| $word_len = len |
| [substring] among ( |
| '{a}' '{y}' '{w}' ($word_len > 4 delete) |
| ) |
| ) |
| |
| define Suffix_Noun_Step2b as ( |
| $word_len = len |
| [substring] among ( |
| '{a}{t}' ($word_len >= 5 delete) |
| ) |
| ) |
| |
| define Suffix_Noun_Step2c1 as ( |
| $word_len = len |
| [substring] among ( |
| '{t}' ($word_len >= 4 delete) |
| ) |
| ) |
| define Suffix_Noun_Step2c2 as ( // feminine t_ |
| $word_len = len |
| [substring] among ( |
| '{t_}' ($word_len >= 4 delete) |
| ) |
| ) |
| define Suffix_Noun_Step3 as ( // ya' nisbiya |
| $word_len = len |
| [substring] among ( |
| '{y}' ($word_len >= 3 delete) |
| ) |
| ) |
| |
| define Suffix_Verb_Step1 as ( |
| $word_len = len |
| [substring] among ( |
| '{e}' '{k}' ($word_len >= 4 delete) |
| '{n}{y}' '{n}{a}' '{e}{a}' '{e}{m}' '{e}{n}' '{k}{m}' '{k}{n}' ($word_len >= 5 delete) |
| '{e}{m}{a}' '{k}{m}{a}' '{k}{m}{w}'($word_len >= 6 delete) |
| ) |
| ) |
| define Suffix_Verb_Step2a as ( |
| $word_len = len |
| [substring] among ( |
| '{t}' ($word_len >= 4 delete) |
| '{a}' '{n}' '{y}' ($word_len >= 4 delete) |
| '{n}{a}' '{t}{a}' '{t}{n}' ($word_len >= 5 delete)// past |
| '{a}{n}' '{w}{n}' '{y}{n}' ($word_len > 5 delete) // present |
| '{t}{m}{a}' ($word_len >= 6 delete) |
| ) |
| ) |
| |
| define Suffix_Verb_Step2b as ( |
| $word_len = len |
| [substring] among ( |
| '{w}{a}' '{t}{m}' ($word_len >= 5 delete) // len >= 5 |
| ) |
| ) |
| |
| |
| define Suffix_Verb_Step2c as ( |
| $word_len = len |
| [substring] among ( |
| '{w}' ($word_len >= 4 delete) |
| '{t}{m}{w}' ($word_len >= 6 delete) |
| ) |
| ) |
| |
| define Suffix_All_alef_maqsura as ( |
| $word_len = len |
| [substring] among ( |
| '{a_}' ( <- '{y}' ) // spell error |
| // '{a_}' ( delete ) // if noun > 3 |
| // '{a_}' ( <- '{a}') // if verb |
| ) |
| ) |
| ) |
| |
| define stem as ( |
| // set initial values |
| set is_noun |
| set is_verb |
| unset is_defined |
| |
| // guess type and properties |
| do Checks1 |
| |
| // normalization pre-stemming |
| do Normalize_pre |
| |
| |
| backwards ( |
| |
| do ( |
| //Suffixes for verbs |
| ( |
| is_verb |
| ( |
| ( |
| (atleast 1 Suffix_Verb_Step1) |
| ( Suffix_Verb_Step2a or Suffix_Verb_Step2c or next) |
| ) |
| or Suffix_Verb_Step2b |
| or Suffix_Verb_Step2a |
| ) |
| ) |
| //Suffixes for nouns |
| or ( |
| is_noun |
| ( |
| |
| try ( |
| Suffix_Noun_Step2c2 |
| or (not is_defined Suffix_Noun_Step1a ( |
| Suffix_Noun_Step2a |
| or Suffix_Noun_Step2b |
| or Suffix_Noun_Step2c1 |
| or next)) |
| or (Suffix_Noun_Step1b ( |
| Suffix_Noun_Step2a |
| or Suffix_Noun_Step2b |
| or Suffix_Noun_Step2c1)) |
| or (not is_defined Suffix_Noun_Step2a) |
| or (Suffix_Noun_Step2b) |
| ) |
| Suffix_Noun_Step3 |
| ) |
| |
| ) |
| |
| // Suffixes for alef maqsura |
| or Suffix_All_alef_maqsura |
| ) |
| ) |
| |
| //Prefixes |
| do ( |
| try Prefix_Step1 |
| try Prefix_Step2 |
| ( Prefix_Step3a_Noun |
| or (is_noun Prefix_Step3b_Noun) |
| or (is_verb try Prefix_Step3_Verb Prefix_Step4_Verb) |
| ) |
| ) |
| |
| // normalization post-stemming |
| do Normalize_post |
| |
| ) |