blob: c812e055c3cd804c05214f93ed425ce8749947fc [file] [log] [blame] [edit]
/*
Hungarian Stemmer
Removes noun inflections
*/
routines (
mark_regions
R1
v_ending
case
case_special
case_other
plural
owned
sing_owner
plur_owner
instrum
factive
undouble
double
)
externals ( stem )
integers ( p1 )
groupings ( v )
stringescapes {}
/* special characters (in Unicode) */
stringdef a' hex 'E1' //a-acute
stringdef e' hex 'E9' //e-acute
stringdef i' hex 'ED' //i-acute
stringdef o' hex 'F3' //o-acute
stringdef o" hex 'F6' //o-umlaut
stringdef oq hex '151' //o-double acute
stringdef u' hex 'FA' //u-acute
stringdef u" hex 'FC' //u-umlaut
stringdef uq hex '171' //u-double acute
define v 'aeiou{a'}{e'}{i'}{o'}{o"}{oq}{u'}{u"}{uq}'
define mark_regions as (
$p1 = limit
(v goto non-v
among('cs' 'gy' 'ly' 'ny' 'sz' 'ty' 'zs' 'dzs') or next
setmark p1)
or
(non-v gopast v setmark p1)
)
backwardmode (
define R1 as $p1 <= cursor
define v_ending as (
[substring] R1 among(
'{a'}' (<- 'a')
'{e'}' (<- 'e')
)
)
define double as (
test among('bb' 'cc' 'ccs' 'dd' 'ff' 'gg' 'ggy' 'jj' 'kk' 'll' 'lly' 'mm'
'nn' 'nny' 'pp' 'rr' 'ss' 'ssz' 'tt' 'tty' 'vv' 'zz' 'zzs')
)
define undouble as (
next [hop 1] delete
)
define instrum as(
[substring] R1 among(
'al' (double)
'el' (double)
)
delete
undouble
)
define case as (
[substring] R1 among(
'ban' 'ben'
'ba' 'be'
'ra' 're'
'nak' 'nek'
'val' 'vel'
't{o'}l' 't{oq}l'
'r{o'}l' 'r{oq}l'
'b{o'}l' 'b{oq}l'
'hoz' 'hez' 'h{o"}z'
'n{a'}l' 'n{e'}l'
'ig'
'at' 'et' 'ot' '{o"}t'
'{e'}rt'
'k{e'}pp' 'k{e'}ppen'
'kor'
'ul' '{u"}l'
'v{a'}' 'v{e'}'
'onk{e'}nt' 'enk{e'}nt' 'ank{e'}nt'
'k{e'}nt'
'en' 'on' 'an' '{o"}n'
'n'
't'
)
delete
v_ending
)
define case_special as(
[substring] R1 among(
'{e'}n' (<- 'e')
'{a'}n' (<- 'a')
'{a'}nk{e'}nt' (<- 'a')
)
)
define case_other as(
[substring] R1 among(
'astul' 'est{u"}l' (delete)
'stul' 'st{u"}l' (delete)
'{a'}stul' (<- 'a')
'{e'}st{u"}l' (<- 'e')
)
)
define factive as(
[substring] R1 among(
'{a'}' (double)
'{e'}' (double)
)
delete
undouble
)
define plural as (
[substring] R1 among(
'{a'}k' (<- 'a')
'{e'}k' (<- 'e')
'{o"}k' (delete)
'ak' (delete)
'ok' (delete)
'ek' (delete)
'k' (delete)
)
)
define owned as (
[substring] R1 among (
'ok{e'}' '{o"}k{e'}' 'ak{e'}' 'ek{e'}' (delete)
'{e'}k{e'}' (<- 'e')
'{a'}k{e'}' (<- 'a')
'k{e'}' (delete)
'{e'}{e'}i' (<- 'e')
'{a'}{e'}i' (<- 'a')
'{e'}i' (delete)
'{e'}{e'}' (<- 'e')
'{e'}' (delete)
)
)
define sing_owner as (
[substring] R1 among(
'{u"}nk' 'unk' (delete)
'{a'}nk' (<- 'a')
'{e'}nk' (<- 'e')
'nk' (delete)
'{a'}juk' (<- 'a')
'{e'}j{u"}k' (<- 'e')
'juk' 'j{u"}k' (delete)
'uk' '{u"}k' (delete)
'em' 'om' 'am' (delete)
'{a'}m' (<- 'a')
'{e'}m' (<- 'e')
'm' (delete)
'od' 'ed' 'ad' '{o"}d' (delete)
'{a'}d' (<- 'a')
'{e'}d' (<- 'e')
'd' (delete)
'ja' 'je' (delete)
'a' 'e' 'o' (delete)
'{a'}' (<- 'a')
'{e'}' (<- 'e')
)
)
define plur_owner as (
[substring] R1 among(
'jaim' 'jeim' (delete)
'{a'}im' (<- 'a')
'{e'}im' (<- 'e')
'aim' 'eim' (delete)
'im' (delete)
'jaid' 'jeid' (delete)
'{a'}id' (<- 'a')
'{e'}id' (<- 'e')
'aid' 'eid' (delete)
'id' (delete)
'jai' 'jei' (delete)
'{a'}i' (<- 'a')
'{e'}i' (<- 'e')
'ai' 'ei' (delete)
'i' (delete)
'jaink' 'jeink' (delete)
'eink' 'aink' (delete)
'{a'}ink' (<- 'a')
'{e'}ink' (<- 'e')
'ink'
'jaitok' 'jeitek' (delete)
'aitok' 'eitek' (delete)
'{a'}itok' (<- 'a')
'{e'}itek' (<- 'e')
'itek' (delete)
'jeik' 'jaik' (delete)
'aik' 'eik' (delete)
'{a'}ik' (<- 'a')
'{e'}ik' (<- 'e')
'ik' (delete)
)
)
)
define stem as (
do mark_regions
backwards (
do instrum
do case
do case_special
do case_other
do factive
do owned
do sing_owner
do plur_owner
do plural
)
)