blob: b836b1ac7710426995335f7e68351578012ca827 [file] [log] [blame]
stringescapes {}
/* the 32 Cyrillic letters in Unicode */
stringdef a hex '430'
stringdef b hex '431'
stringdef v hex '432'
stringdef g hex '433'
stringdef d hex '434'
stringdef e hex '435'
stringdef zh hex '436'
stringdef z hex '437'
stringdef i hex '438'
stringdef i` hex '439'
stringdef k hex '43A'
stringdef l hex '43B'
stringdef m hex '43C'
stringdef n hex '43D'
stringdef o hex '43E'
stringdef p hex '43F'
stringdef r hex '440'
stringdef s hex '441'
stringdef t hex '442'
stringdef u hex '443'
stringdef f hex '444'
stringdef kh hex '445'
stringdef ts hex '446'
stringdef ch hex '447'
stringdef sh hex '448'
stringdef shch hex '449'
stringdef " hex '44A'
stringdef y hex '44B'
stringdef ' hex '44C'
stringdef e` hex '44D'
stringdef iu hex '44E'
stringdef ia hex '44F'
routines ( mark_regions R2
perfective_gerund
adjective
adjectival
reflexive
verb
noun
derivational
tidy_up
)
externals ( stem )
integers ( pV p2 )
groupings ( v )
define v '{a}{e}{i}{o}{u}{y}{e`}{iu}{ia}'
define mark_regions as (
$pV = limit
$p2 = limit
do (
gopast v setmark pV gopast non-v
gopast v gopast non-v setmark p2
)
)
backwardmode (
define R2 as $p2 <= cursor
define perfective_gerund as (
[substring] among (
'{v}'
'{v}{sh}{i}'
'{v}{sh}{i}{s}{'}'
('{a}' or '{ia}' delete)
'{i}{v}'
'{i}{v}{sh}{i}'
'{i}{v}{sh}{i}{s}{'}'
'{y}{v}'
'{y}{v}{sh}{i}'
'{y}{v}{sh}{i}{s}{'}'
(delete)
)
)
define adjective as (
[substring] among (
'{e}{e}' '{i}{e}' '{y}{e}' '{o}{e}' '{i}{m}{i}' '{y}{m}{i}'
'{e}{i`}' '{i}{i`}' '{y}{i`}' '{o}{i`}' '{e}{m}' '{i}{m}'
'{y}{m}' '{o}{m}' '{e}{g}{o}' '{o}{g}{o}' '{e}{m}{u}'
'{o}{m}{u}' '{i}{kh}' '{y}{kh}' '{u}{iu}' '{iu}{iu}' '{a}{ia}'
'{ia}{ia}'
// and -
'{o}{iu}' // - which is somewhat archaic
'{e}{iu}' // - soft form of {o}{iu}
(delete)
)
)
define adjectival as (
adjective
/* of the participle forms, em, vsh, ivsh, yvsh are readily removable.
nn, {iu}shch, shch, u{iu}shch can be removed, with a small proportion of
errors. Removing im, uem, enn creates too many errors.
*/
try (
[substring] among (
'{e}{m}' // present passive participle
'{n}{n}' // adjective from past passive participle
'{v}{sh}' // past active participle
'{iu}{shch}' '{shch}' // present active participle
('{a}' or '{ia}' delete)
//but not '{i}{m}' '{u}{e}{m}' // present passive participle
//or '{e}{n}{n}' // adjective from past passive participle
'{i}{v}{sh}' '{y}{v}{sh}'// past active participle
'{u}{iu}{shch}' // present active participle
(delete)
)
)
)
define reflexive as (
[substring] among (
'{s}{ia}'
'{s}{'}'
(delete)
)
)
define verb as (
[substring] among (
'{l}{a}' '{n}{a}' '{e}{t}{e}' '{i`}{t}{e}' '{l}{i}' '{i`}'
'{l}' '{e}{m}' '{n}' '{l}{o}' '{n}{o}' '{e}{t}' '{iu}{t}'
'{n}{y}' '{t}{'}' '{e}{sh}{'}'
'{n}{n}{o}'
('{a}' or '{ia}' delete)
'{i}{l}{a}' '{y}{l}{a}' '{e}{n}{a}' '{e}{i`}{t}{e}'
'{u}{i`}{t}{e}' '{i}{t}{e}' '{i}{l}{i}' '{y}{l}{i}' '{e}{i`}'
'{u}{i`}' '{i}{l}' '{y}{l}' '{i}{m}' '{y}{m}' '{e}{n}'
'{i}{l}{o}' '{y}{l}{o}' '{e}{n}{o}' '{ia}{t}' '{u}{e}{t}'
'{u}{iu}{t}' '{i}{t}' '{y}{t}' '{e}{n}{y}' '{i}{t}{'}'
'{y}{t}{'}' '{i}{sh}{'}' '{u}{iu}' '{iu}'
(delete)
/* note the short passive participle tests:
'{n}{a}' '{n}' '{n}{o}' '{n}{y}'
'{e}{n}{a}' '{e}{n}' '{e}{n}{o}' '{e}{n}{y}'
*/
)
)
define noun as (
[substring] among (
'{a}' '{e}{v}' '{o}{v}' '{i}{e}' '{'}{e}' '{e}'
'{i}{ia}{m}{i}' '{ia}{m}{i}' '{a}{m}{i}' '{e}{i}' '{i}{i}'
'{i}' '{i}{e}{i`}' '{e}{i`}' '{o}{i`}' '{i}{i`}' '{i`}'
'{i}{ia}{m}' '{ia}{m}' '{i}{e}{m}' '{e}{m}' '{a}{m}' '{o}{m}'
'{o}' '{u}' '{a}{kh}' '{i}{ia}{kh}' '{ia}{kh}' '{y}' '{'}'
'{i}{iu}' '{'}{iu}' '{iu}' '{i}{ia}' '{'}{ia}' '{ia}'
(delete)
/* the small class of neuter forms '{e}{n}{i}' '{e}{n}{e}{m}'
'{e}{n}{a}' '{e}{n}' '{e}{n}{a}{m}' '{e}{n}{a}{m}{i}' '{e}{n}{a}{x}'
omitted - they only occur on 12 words.
*/
)
)
define derivational as (
[substring] R2 among (
'{o}{s}{t}'
'{o}{s}{t}{'}'
(delete)
)
)
define tidy_up as (
[substring] among (
'{e}{i`}{sh}'
'{e}{i`}{sh}{e}' // superlative forms
(delete
['{n}'] '{n}' delete
)
'{n}'
('{n}' delete) // e.g. -nno endings
'{'}'
(delete) // with some slight false conflations
)
)
)
define stem as (
do mark_regions
backwards setlimit tomark pV for (
do (
perfective_gerund or
( try reflexive
adjectival or verb or noun
)
)
try([ '{i}' ] delete)
// because noun ending -i{iu} is being treated as verb ending -{iu}
do derivational
do tidy_up
)
)