algorithms/turkish.sbl - platform/external/rust/crates/rust-stemmers - Git at Google

 /* Stemmer for Turkish
 	* author: Evren (Kapusuz) Çilden
 	* email: evren.kapusuz at gmail.com
 	* version: 1.0 (15.01.2007)


 	* stems nominal verb suffixes
 	* stems nominal inflections
 	* more than one syllable word check
 	* (y,n,s,U) context check
 	* vowel harmony check
 	* last consonant check and conversion (b, c, d, ğ to p, ç, t, k)

 	* The stemming algorithm is based on the paper "An Affix Stripping
 	* Morphological Analyzer for Turkish" by Gülşen Eryiğit and
 	* Eşref Adalı (Proceedings of the IAESTED International Conference
 	* ARTIFICIAL INTELLIGENCE AND APPLICATIONS, February 16-18,2004,
 	* Innsbruck, Austria

 	* Turkish is an agglutinative language and has a very rich morphological
 	* structure. In Turkish, you can form many different words from a single stem
 	* by appending a sequence of suffixes. Eg. The word "doktoruymuşsunuz" means
 	* "You had been the doctor of him". The stem of the word is "doktor" and it
 	* takes three different suffixes -sU, -ymUs, and -sUnUz. The rules about
 	* the append order of suffixes can be clearly described as FSMs.
 	* The paper referenced above defines some FSMs for right to left
 	* morphological analysis. I generated a method for constructing snowball
 	* expressions from right to left FSMs for stemming suffixes.
 */

 routines (
 	append_U_to_stems_ending_with_d_or_g // for preventing some overstemmings
 	check_vowel_harmony	// tests vowel harmony for suffixes
 	is_reserved_word	// tests whether current string is a reserved word ('ad','soyad')
 	mark_cAsInA		// nominal verb suffix
 	mark_DA			// noun suffix
 	mark_DAn		// noun suffix
 	mark_DUr		// nominal verb suffix
 	mark_ki			// noun suffix
 	mark_lAr		// noun suffix, nominal verb suffix
 	mark_lArI		// noun suffix
 	mark_nA			// noun suffix
 	mark_ncA		// noun suffix
 	mark_ndA		// noun suffix
 	mark_ndAn		// noun suffix
 	mark_nU			// noun suffix
 	mark_nUn		// noun suffix
 	mark_nUz		// nominal verb suffix
 	mark_sU			// noun suffix
 	mark_sUn		// nominal verb suffix
 	mark_sUnUz		// nominal verb suffix
 	mark_possessives	// -(U)m,-(U)n,-(U)mUz,-(U)nUz,
 	mark_yA			// noun suffix
 	mark_ylA		// noun suffix
 	mark_yU			// noun suffix
 	mark_yUm		// nominal verb suffix
 	mark_yUz		// nominal verb suffix
 	mark_yDU		// nominal verb suffix
 	mark_yken		// nominal verb suffix
 	mark_ymUs_		// nominal verb suffix
 	mark_ysA		// nominal verb suffix

 	mark_suffix_with_optional_y_consonant
 	mark_suffix_with_optional_U_vowel
 	mark_suffix_with_optional_n_consonant
 	mark_suffix_with_optional_s_consonant

 	more_than_one_syllable_word

 	post_process_last_consonants
 	postlude

 	stem_nominal_verb_suffixes
 	stem_noun_suffixes
 	stem_suffix_chain_before_ki
 )

 /* Special characters in Unicode Latin-1 and Latin Extended-A */
 stringdef c.   	hex 'E7'	// LATIN SMALL LETTER C WITH CEDILLA
 stringdef g~   	hex '011F'	// LATIN SMALL LETTER G WITH BREVE
 stringdef i'   	hex '0131'	// LATIN SMALL LETTER I WITHOUT DOT
 stringdef o"  	hex 'F6'	// LATIN SMALL LETTER O WITH DIAERESIS
 stringdef s.	hex '015F'	// LATIN SMALL LETTER S WITH CEDILLA
 stringdef u"  	hex 'FC'	// LATIN SMALL LETTER U WITH DIAERESIS

 stringescapes 	{ }

 integers 	( strlen )	// length of a string

 booleans	( continue_stemming_noun_suffixes )

 groupings 	( vowel U vowel1 vowel2 vowel3 vowel4 vowel5 vowel6)

 define vowel 	'ae{i'}io{o"}u{u"}'
 define U	'{i'}iu{u"}'

 // the vowel grouping definitions below are used for checking vowel harmony
 define vowel1  	'a{i'}ou' 		// vowels that can end with suffixes containing 'a'
 define vowel2  	'ei{o"}{u"}' 		// vowels that can end with suffixes containing 'e'
 define vowel3  	'a{i'}' 		// vowels that can end with suffixes containing 'i''
 define vowel4  	'ei'	 		// vowels that can end with suffixes containing 'i'
 define vowel5  	'ou'	 		// vowels that can end with suffixes containing 'o' or 'u'
 define vowel6  	'{o"}{u"}' 		// vowels that can end with suffixes containing 'o"' or 'u"'

 externals 	( stem )

 backwardmode (
 	// checks vowel harmony for possible suffixes,
 	// helps to detect whether the candidate for suffix applies to vowel harmony
 	// this rule is added to prevent over stemming
 	define check_vowel_harmony as (
 		test
 		(
 			(goto vowel)   // if there is a vowel
 			(
 				('a' goto vowel1) or
 				('e' goto vowel2) or
 				('{i'}' goto vowel3) or
 				('i' goto vowel4) or
 				('o' goto vowel5) or
 				('{o"}' goto vowel6) or
 				('u' goto vowel5) or
 				('{u"}' goto vowel6)
 			)
 		)
 	)

 	// if the last consonant before suffix is vowel and n then advance and delete
 	// if the last consonant before suffix is non vowel and n do nothing
 	// if the last consonant before suffix is not n then only delete the suffix
 	// assumption: slice beginning is set correctly
 	define mark_suffix_with_optional_n_consonant as (
 		('n' (test vowel))
 		or
 		((not(test 'n')) test(next vowel))

 	)

 	// if the last consonant before suffix is vowel and s then advance and delete
 	// if the last consonant before suffix is non vowel and s do nothing
 	// if the last consonant before suffix is not s then only delete the suffix
 	// assumption: slice beginning is set correctly
 	define mark_suffix_with_optional_s_consonant as (
 		('s' (test vowel))
 		or
 		((not(test 's')) test(next vowel))
 	)

 	// if the last consonant before suffix is vowel and y then advance and delete
 	// if the last consonant before suffix is non vowel and y do nothing
 	// if the last consonant before suffix is not y then only delete the suffix
 	// assumption: slice beginning is set correctly
 	define mark_suffix_with_optional_y_consonant as (
 		('y' (test vowel))
 		or
 		((not(test 'y')) test(next vowel))
 	)

 	define mark_suffix_with_optional_U_vowel as (
 		(U (test non-vowel))
 		or
 		((not(test U)) test(next non-vowel))

 	)

 	define mark_possessives as (
 		among ('m{i'}z' 'miz' 'muz' 'm{u"}z'
 		       'n{i'}z' 'niz' 'nuz' 'n{u"}z' 'm' 'n')
 		(mark_suffix_with_optional_U_vowel)
 	)

 	define mark_sU as (
 		check_vowel_harmony
 		U
 		(mark_suffix_with_optional_s_consonant)
 	)

 	define mark_lArI as (
 		among ('leri' 'lar{i'}')
 	)

 	define mark_yU as (
 		check_vowel_harmony
 		U
 		(mark_suffix_with_optional_y_consonant)
 	)

 	define mark_nU as (
 		check_vowel_harmony
 		among ('n{i'}' 'ni' 'nu' 'n{u"}')
 	)

 	define mark_nUn as (
 		check_vowel_harmony
 		among ('{i'}n' 'in' 'un' '{u"}n')
 		(mark_suffix_with_optional_n_consonant)
 	)

 	define mark_yA as (
 		check_vowel_harmony
 		among('a' 'e')
 		(mark_suffix_with_optional_y_consonant)
 	)

 	define mark_nA as (
 		check_vowel_harmony
 		among('na' 'ne')
 	)

 	define mark_DA as (
 		check_vowel_harmony
 		among('da' 'de' 'ta' 'te')
 	)

 	define mark_ndA as (
 		check_vowel_harmony
 		among('nda' 'nde')
 	)

 	define mark_DAn as (
 		check_vowel_harmony
 		among('dan' 'den' 'tan' 'ten')
 	)

 	define mark_ndAn as (
 		check_vowel_harmony
 		among('ndan' 'nden')
 	)

 	define mark_ylA as (
 		check_vowel_harmony
 		among('la' 'le')
 		(mark_suffix_with_optional_y_consonant)
 	)

 	define mark_ki as (
 		'ki'
 	)

 	define mark_ncA as (
 		check_vowel_harmony
 		among('ca' 'ce')
 		(mark_suffix_with_optional_n_consonant)
 	)

 	define mark_yUm as (
 		check_vowel_harmony
 		among ('{i'}m' 'im' 'um' '{u"}m')
 		(mark_suffix_with_optional_y_consonant)
 	)

 	define mark_sUn as (
 		check_vowel_harmony
 		among ('s{i'}n' 'sin' 'sun' 's{u"}n' )
 	)

 	define mark_yUz as (
 		check_vowel_harmony
 		among ('{i'}z' 'iz' 'uz' '{u"}z')
 		(mark_suffix_with_optional_y_consonant)
 	)

 	define mark_sUnUz as (
 		among ('s{i'}n{i'}z' 'siniz' 'sunuz' 's{u"}n{u"}z')
 	)

 	define mark_lAr as (
 		check_vowel_harmony
 		among ('ler' 'lar')
 	)

 	define mark_nUz as (
 		check_vowel_harmony
 		among ('n{i'}z' 'niz' 'nuz' 'n{u"}z')
 	)

 	define mark_DUr as (
 		check_vowel_harmony
 		among ('t{i'}r' 'tir' 'tur' 't{u"}r' 'd{i'}r' 'dir' 'dur' 'd{u"}r')
 	)

 	define mark_cAsInA as (
 		among ('cas{i'}na' 'cesine')
 	)

 	define mark_yDU as (
 		check_vowel_harmony
 		among ('t{i'}m' 'tim' 'tum' 't{u"}m' 'd{i'}m' 'dim' 'dum' 'd{u"}m'
 			't{i'}n' 'tin' 'tun' 't{u"}n' 'd{i'}n' 'din' 'dun' 'd{u"}n'
 			't{i'}k' 'tik' 'tuk' 't{u"}k' 'd{i'}k' 'dik' 'duk' 'd{u"}k'
 			't{i'}' 'ti' 'tu' 't{u"}' 'd{i'}' 'di' 'du' 'd{u"}')
 		(mark_suffix_with_optional_y_consonant)
 	)

 	// does not fully obey vowel harmony
 	define mark_ysA as (
 		among ('sam' 'san' 'sak' 'sem' 'sen' 'sek' 'sa' 'se')
 		(mark_suffix_with_optional_y_consonant)
 	)

 	define mark_ymUs_ as (
 		check_vowel_harmony
 		among ('m{i'}{s.}' 'mi{s.}' 'mu{s.}' 'm{u"}{s.}')
 		(mark_suffix_with_optional_y_consonant)
 	)

 	define mark_yken as (
 		'ken' (mark_suffix_with_optional_y_consonant)
 	)

 	define stem_nominal_verb_suffixes as (
 		[
 			set continue_stemming_noun_suffixes
 			(mark_ymUs_ or mark_yDU or mark_ysA or mark_yken)
 			or
 			(mark_cAsInA (mark_sUnUz or mark_lAr or mark_yUm or mark_sUn or mark_yUz or true) mark_ymUs_)
 			or
 			(
 				mark_lAr ] delete try([(mark_DUr or mark_yDU or mark_ysA or mark_ymUs_))
 				unset continue_stemming_noun_suffixes
 			)
 			or
 			(mark_nUz (mark_yDU or mark_ysA))
 			or
 			((mark_sUnUz or mark_yUz or mark_sUn or mark_yUm) ] delete try([ mark_ymUs_))
 			or
 			(mark_DUr ] delete try([ (mark_sUnUz or mark_lAr or mark_yUm or mark_sUn or mark_yUz or true) mark_ymUs_))
 		]delete
 	)

 	// stems noun suffix chains ending with -ki
 	define stem_suffix_chain_before_ki as (
 		[
 			mark_ki
 			(
 				(mark_DA] delete try([
 					(mark_lAr] delete try(stem_suffix_chain_before_ki))
 					or
 					(mark_possessives] delete try([mark_lAr] delete stem_suffix_chain_before_ki))

 				))
 				or
 				(mark_nUn] delete try([
 					(mark_lArI] delete)
 					or
 					([mark_possessives or mark_sU] delete try([mark_lAr] delete stem_suffix_chain_before_ki))
 					or
 					(stem_suffix_chain_before_ki)
 				))
 				or
 				(mark_ndA (
 					(mark_lArI] delete)
 					or
 					((mark_sU] delete try([mark_lAr]delete stem_suffix_chain_before_ki)))
 					or
 					(stem_suffix_chain_before_ki)
 				))
 			)
 	)

 	define stem_noun_suffixes as (
 		([mark_lAr] delete try(stem_suffix_chain_before_ki))
 		or
 		([mark_ncA] delete
 			try(
 				([mark_lArI] delete)
 				or
 				([mark_possessives or mark_sU] delete try([mark_lAr] delete stem_suffix_chain_before_ki))
 				or
 				([mark_lAr] delete stem_suffix_chain_before_ki)
 			)
 		)
 		or
 		([(mark_ndA or mark_nA)
 			(
 		  		(mark_lArI] delete)
 		  		or
 		  		(mark_sU] delete try([mark_lAr] delete stem_suffix_chain_before_ki))
 		  		or
 		  		(stem_suffix_chain_before_ki)
 		  	)
 		)
 		or
 		([(mark_ndAn or mark_nU) ((mark_sU ] delete try([mark_lAr] delete stem_suffix_chain_before_ki)) or (mark_lArI)))
 		or
 		( [mark_DAn] delete try ([
 			(
 		 		(mark_possessives ] delete try([mark_lAr] delete stem_suffix_chain_before_ki))
 		 		or
 		 		(mark_lAr] delete try(stem_suffix_chain_before_ki))
 		 		or
 		 		(stem_suffix_chain_before_ki)
 		 	))
 		)
 		or
 		([mark_nUn or mark_ylA] delete
 			try(
 				([mark_lAr] delete stem_suffix_chain_before_ki)
 				or
 				([mark_possessives or mark_sU] delete try([mark_lAr] delete stem_suffix_chain_before_ki))
 				or
 				stem_suffix_chain_before_ki
 			)
 		)
 		or
 		([mark_lArI] delete)
 		or
 		(stem_suffix_chain_before_ki)
 		or
 		([mark_DA or mark_yU or mark_yA] delete try([((mark_possessives] delete try([mark_lAr)) or mark_lAr) ] delete [ stem_suffix_chain_before_ki))
 		or
 		([mark_possessives or mark_sU] delete try([mark_lAr] delete stem_suffix_chain_before_ki))
 	)

 	define post_process_last_consonants as (
 		[substring] among (
 			'b' (<- 'p')
 			'c' (<- '{c.}')
 			'd' (<- 't')
 			'{g~}' (<- 'k')
 		)
 	)

 	// after stemming if the word ends with 'd' or 'g' most probably last U is overstemmed
 	// like in 'kedim' -> 'ked'
 	// Turkish words don't usually end with 'd' or 'g'
 	// some very well known words are ignored (like 'ad' 'soyad'
 	// appends U to stems ending with d or g, decides which vowel to add
 	// based on the last vowel in the stem
 	define append_U_to_stems_ending_with_d_or_g as (
 		test('d' or 'g')
 		(test((goto vowel) 'a' or '{i'}') <+ '{i'}')
 		or
 		(test((goto vowel) 'e' or 'i') <+ 'i')
 		or
 		(test((goto vowel) 'o' or 'u') <+ 'u')
 		or
 		(test((goto vowel) '{o"}' or '{u"}') <+ '{u"}')
 	)

 )

 // Tests if there are more than one syllables
 // In Turkish each vowel indicates a distinct syllable
 define more_than_one_syllable_word as (
 	test (atleast 2 (gopast vowel))
 )

 define is_reserved_word as (
 	test(gopast 'ad' ($strlen = 2) ($strlen == limit))
 	or
 	test(gopast 'soyad' ($strlen = 5) ($strlen == limit))
 )

 define postlude as (
 	not(is_reserved_word)
 	backwards (
 		do append_U_to_stems_ending_with_d_or_g
 		do post_process_last_consonants

 	)
 )

 define stem as (
 	(more_than_one_syllable_word)
 	(
 		backwards (
 			do stem_nominal_verb_suffixes
 			continue_stemming_noun_suffixes
 			do stem_noun_suffixes
 		)

 	postlude
 	)
 )
	/* Stemmer for Turkish
	* author: Evren (Kapusuz) Çilden
	* email: evren.kapusuz at gmail.com
	* version: 1.0 (15.01.2007)


	* stems nominal verb suffixes
	* stems nominal inflections
	* more than one syllable word check
	* (y,n,s,U) context check
	* vowel harmony check
	* last consonant check and conversion (b, c, d, ğ to p, ç, t, k)

	* The stemming algorithm is based on the paper "An Affix Stripping
	* Morphological Analyzer for Turkish" by Gülşen Eryiğit and
	* Eşref Adalı (Proceedings of the IAESTED International Conference
	* ARTIFICIAL INTELLIGENCE AND APPLICATIONS, February 16-18,2004,
	* Innsbruck, Austria

	* Turkish is an agglutinative language and has a very rich morphological
	* structure. In Turkish, you can form many different words from a single stem
	* by appending a sequence of suffixes. Eg. The word "doktoruymuşsunuz" means
	* "You had been the doctor of him". The stem of the word is "doktor" and it
	* takes three different suffixes -sU, -ymUs, and -sUnUz. The rules about
	* the append order of suffixes can be clearly described as FSMs.
	* The paper referenced above defines some FSMs for right to left
	* morphological analysis. I generated a method for constructing snowball
	* expressions from right to left FSMs for stemming suffixes.
	*/

	routines (
	append_U_to_stems_ending_with_d_or_g // for preventing some overstemmings
	check_vowel_harmony // tests vowel harmony for suffixes
	is_reserved_word // tests whether current string is a reserved word ('ad','soyad')
	mark_cAsInA // nominal verb suffix
	mark_DA // noun suffix
	mark_DAn // noun suffix
	mark_DUr // nominal verb suffix
	mark_ki // noun suffix
	mark_lAr // noun suffix, nominal verb suffix
	mark_lArI // noun suffix
	mark_nA // noun suffix
	mark_ncA // noun suffix
	mark_ndA // noun suffix
	mark_ndAn // noun suffix
	mark_nU // noun suffix
	mark_nUn // noun suffix
	mark_nUz // nominal verb suffix
	mark_sU // noun suffix
	mark_sUn // nominal verb suffix
	mark_sUnUz // nominal verb suffix
	mark_possessives // -(U)m,-(U)n,-(U)mUz,-(U)nUz,
	mark_yA // noun suffix
	mark_ylA // noun suffix
	mark_yU // noun suffix
	mark_yUm // nominal verb suffix
	mark_yUz // nominal verb suffix
	mark_yDU // nominal verb suffix
	mark_yken // nominal verb suffix
	mark_ymUs_ // nominal verb suffix
	mark_ysA // nominal verb suffix

	mark_suffix_with_optional_y_consonant
	mark_suffix_with_optional_U_vowel
	mark_suffix_with_optional_n_consonant
	mark_suffix_with_optional_s_consonant

	more_than_one_syllable_word

	post_process_last_consonants
	postlude

	stem_nominal_verb_suffixes
	stem_noun_suffixes
	stem_suffix_chain_before_ki
	)

	/* Special characters in Unicode Latin-1 and Latin Extended-A */
	stringdef c. hex 'E7' // LATIN SMALL LETTER C WITH CEDILLA
	stringdef g~ hex '011F' // LATIN SMALL LETTER G WITH BREVE
	stringdef i' hex '0131' // LATIN SMALL LETTER I WITHOUT DOT
	stringdef o" hex 'F6' // LATIN SMALL LETTER O WITH DIAERESIS
	stringdef s. hex '015F' // LATIN SMALL LETTER S WITH CEDILLA
	stringdef u" hex 'FC' // LATIN SMALL LETTER U WITH DIAERESIS

	stringescapes { }

	integers ( strlen ) // length of a string

	booleans ( continue_stemming_noun_suffixes )

	groupings ( vowel U vowel1 vowel2 vowel3 vowel4 vowel5 vowel6)

	define vowel 'ae{i'}io{o"}u{u"}'
	define U '{i'}iu{u"}'

	// the vowel grouping definitions below are used for checking vowel harmony
	define vowel1 'a{i'}ou' // vowels that can end with suffixes containing 'a'
	define vowel2 'ei{o"}{u"}' // vowels that can end with suffixes containing 'e'
	define vowel3 'a{i'}' // vowels that can end with suffixes containing 'i''
	define vowel4 'ei' // vowels that can end with suffixes containing 'i'
	define vowel5 'ou' // vowels that can end with suffixes containing 'o' or 'u'
	define vowel6 '{o"}{u"}' // vowels that can end with suffixes containing 'o"' or 'u"'

	externals ( stem )

	backwardmode (
	// checks vowel harmony for possible suffixes,
	// helps to detect whether the candidate for suffix applies to vowel harmony
	// this rule is added to prevent over stemming
	define check_vowel_harmony as (
	test
	(
	(goto vowel) // if there is a vowel
	(
	('a' goto vowel1) or
	('e' goto vowel2) or
	('{i'}' goto vowel3) or
	('i' goto vowel4) or
	('o' goto vowel5) or
	('{o"}' goto vowel6) or
	('u' goto vowel5) or
	('{u"}' goto vowel6)
	)
	)
	)

	// if the last consonant before suffix is vowel and n then advance and delete
	// if the last consonant before suffix is non vowel and n do nothing
	// if the last consonant before suffix is not n then only delete the suffix
	// assumption: slice beginning is set correctly
	define mark_suffix_with_optional_n_consonant as (
	('n' (test vowel))
	or
	((not(test 'n')) test(next vowel))

	)

	// if the last consonant before suffix is vowel and s then advance and delete
	// if the last consonant before suffix is non vowel and s do nothing
	// if the last consonant before suffix is not s then only delete the suffix
	// assumption: slice beginning is set correctly
	define mark_suffix_with_optional_s_consonant as (
	('s' (test vowel))
	or
	((not(test 's')) test(next vowel))
	)

	// if the last consonant before suffix is vowel and y then advance and delete
	// if the last consonant before suffix is non vowel and y do nothing
	// if the last consonant before suffix is not y then only delete the suffix
	// assumption: slice beginning is set correctly
	define mark_suffix_with_optional_y_consonant as (
	('y' (test vowel))
	or
	((not(test 'y')) test(next vowel))
	)

	define mark_suffix_with_optional_U_vowel as (
	(U (test non-vowel))
	or
	((not(test U)) test(next non-vowel))

	)

	define mark_possessives as (
	among ('m{i'}z' 'miz' 'muz' 'm{u"}z'
	'n{i'}z' 'niz' 'nuz' 'n{u"}z' 'm' 'n')
	(mark_suffix_with_optional_U_vowel)
	)

	define mark_sU as (
	check_vowel_harmony
	U
	(mark_suffix_with_optional_s_consonant)
	)

	define mark_lArI as (
	among ('leri' 'lar{i'}')
	)

	define mark_yU as (
	check_vowel_harmony
	U
	(mark_suffix_with_optional_y_consonant)
	)

	define mark_nU as (
	check_vowel_harmony
	among ('n{i'}' 'ni' 'nu' 'n{u"}')
	)

	define mark_nUn as (
	check_vowel_harmony
	among ('{i'}n' 'in' 'un' '{u"}n')
	(mark_suffix_with_optional_n_consonant)
	)

	define mark_yA as (
	check_vowel_harmony
	among('a' 'e')
	(mark_suffix_with_optional_y_consonant)
	)

	define mark_nA as (
	check_vowel_harmony
	among('na' 'ne')
	)

	define mark_DA as (
	check_vowel_harmony
	among('da' 'de' 'ta' 'te')
	)

	define mark_ndA as (
	check_vowel_harmony
	among('nda' 'nde')
	)

	define mark_DAn as (
	check_vowel_harmony
	among('dan' 'den' 'tan' 'ten')
	)

	define mark_ndAn as (
	check_vowel_harmony
	among('ndan' 'nden')
	)

	define mark_ylA as (
	check_vowel_harmony
	among('la' 'le')
	(mark_suffix_with_optional_y_consonant)
	)

	define mark_ki as (
	'ki'
	)

	define mark_ncA as (
	check_vowel_harmony
	among('ca' 'ce')
	(mark_suffix_with_optional_n_consonant)
	)

	define mark_yUm as (
	check_vowel_harmony
	among ('{i'}m' 'im' 'um' '{u"}m')
	(mark_suffix_with_optional_y_consonant)
	)

	define mark_sUn as (
	check_vowel_harmony
	among ('s{i'}n' 'sin' 'sun' 's{u"}n' )
	)

	define mark_yUz as (
	check_vowel_harmony
	among ('{i'}z' 'iz' 'uz' '{u"}z')
	(mark_suffix_with_optional_y_consonant)
	)

	define mark_sUnUz as (
	among ('s{i'}n{i'}z' 'siniz' 'sunuz' 's{u"}n{u"}z')
	)

	define mark_lAr as (
	check_vowel_harmony
	among ('ler' 'lar')
	)

	define mark_nUz as (
	check_vowel_harmony
	among ('n{i'}z' 'niz' 'nuz' 'n{u"}z')
	)

	define mark_DUr as (
	check_vowel_harmony
	among ('t{i'}r' 'tir' 'tur' 't{u"}r' 'd{i'}r' 'dir' 'dur' 'd{u"}r')
	)

	define mark_cAsInA as (
	among ('cas{i'}na' 'cesine')
	)

	define mark_yDU as (
	check_vowel_harmony
	among ('t{i'}m' 'tim' 'tum' 't{u"}m' 'd{i'}m' 'dim' 'dum' 'd{u"}m'
	't{i'}n' 'tin' 'tun' 't{u"}n' 'd{i'}n' 'din' 'dun' 'd{u"}n'
	't{i'}k' 'tik' 'tuk' 't{u"}k' 'd{i'}k' 'dik' 'duk' 'd{u"}k'
	't{i'}' 'ti' 'tu' 't{u"}' 'd{i'}' 'di' 'du' 'd{u"}')
	(mark_suffix_with_optional_y_consonant)
	)

	// does not fully obey vowel harmony
	define mark_ysA as (
	among ('sam' 'san' 'sak' 'sem' 'sen' 'sek' 'sa' 'se')
	(mark_suffix_with_optional_y_consonant)
	)

	define mark_ymUs_ as (
	check_vowel_harmony
	among ('m{i'}{s.}' 'mi{s.}' 'mu{s.}' 'm{u"}{s.}')
	(mark_suffix_with_optional_y_consonant)
	)

	define mark_yken as (
	'ken' (mark_suffix_with_optional_y_consonant)
	)

	define stem_nominal_verb_suffixes as (
	[
	set continue_stemming_noun_suffixes
	(mark_ymUs_ or mark_yDU or mark_ysA or mark_yken)
	or
	(mark_cAsInA (mark_sUnUz or mark_lAr or mark_yUm or mark_sUn or mark_yUz or true) mark_ymUs_)
	or
	(
	mark_lAr ] delete try([(mark_DUr or mark_yDU or mark_ysA or mark_ymUs_))
	unset continue_stemming_noun_suffixes
	)
	or
	(mark_nUz (mark_yDU or mark_ysA))
	or
	((mark_sUnUz or mark_yUz or mark_sUn or mark_yUm) ] delete try([ mark_ymUs_))
	or
	(mark_DUr ] delete try([ (mark_sUnUz or mark_lAr or mark_yUm or mark_sUn or mark_yUz or true) mark_ymUs_))
	]delete
	)

	// stems noun suffix chains ending with -ki
	define stem_suffix_chain_before_ki as (
	[
	mark_ki
	(
	(mark_DA] delete try([
	(mark_lAr] delete try(stem_suffix_chain_before_ki))
	or
	(mark_possessives] delete try([mark_lAr] delete stem_suffix_chain_before_ki))

	))
	or
	(mark_nUn] delete try([
	(mark_lArI] delete)
	or
	([mark_possessives or mark_sU] delete try([mark_lAr] delete stem_suffix_chain_before_ki))
	or
	(stem_suffix_chain_before_ki)
	))
	or
	(mark_ndA (
	(mark_lArI] delete)
	or
	((mark_sU] delete try([mark_lAr]delete stem_suffix_chain_before_ki)))
	or
	(stem_suffix_chain_before_ki)
	))
	)
	)

	define stem_noun_suffixes as (
	([mark_lAr] delete try(stem_suffix_chain_before_ki))
	or
	([mark_ncA] delete
	try(
	([mark_lArI] delete)
	or
	([mark_possessives or mark_sU] delete try([mark_lAr] delete stem_suffix_chain_before_ki))
	or
	([mark_lAr] delete stem_suffix_chain_before_ki)
	)
	)
	or
	([(mark_ndA or mark_nA)
	(
	(mark_lArI] delete)
	or
	(mark_sU] delete try([mark_lAr] delete stem_suffix_chain_before_ki))
	or
	(stem_suffix_chain_before_ki)
	)
	)
	or
	([(mark_ndAn or mark_nU) ((mark_sU ] delete try([mark_lAr] delete stem_suffix_chain_before_ki)) or (mark_lArI)))
	or
	( [mark_DAn] delete try ([
	(
	(mark_possessives ] delete try([mark_lAr] delete stem_suffix_chain_before_ki))
	or
	(mark_lAr] delete try(stem_suffix_chain_before_ki))
	or
	(stem_suffix_chain_before_ki)
	))
	)
	or
	([mark_nUn or mark_ylA] delete
	try(
	([mark_lAr] delete stem_suffix_chain_before_ki)
	or
	([mark_possessives or mark_sU] delete try([mark_lAr] delete stem_suffix_chain_before_ki))
	or
	stem_suffix_chain_before_ki
	)
	)
	or
	([mark_lArI] delete)
	or
	(stem_suffix_chain_before_ki)
	or
	([mark_DA or mark_yU or mark_yA] delete try([((mark_possessives] delete try([mark_lAr)) or mark_lAr) ] delete [ stem_suffix_chain_before_ki))
	or
	([mark_possessives or mark_sU] delete try([mark_lAr] delete stem_suffix_chain_before_ki))
	)

	define post_process_last_consonants as (
	[substring] among (
	'b' (<- 'p')
	'c' (<- '{c.}')
	'd' (<- 't')
	'{g~}' (<- 'k')
	)
	)

	// after stemming if the word ends with 'd' or 'g' most probably last U is overstemmed
	// like in 'kedim' -> 'ked'
	// Turkish words don't usually end with 'd' or 'g'
	// some very well known words are ignored (like 'ad' 'soyad'
	// appends U to stems ending with d or g, decides which vowel to add
	// based on the last vowel in the stem
	define append_U_to_stems_ending_with_d_or_g as (
	test('d' or 'g')
	(test((goto vowel) 'a' or '{i'}') <+ '{i'}')
	or
	(test((goto vowel) 'e' or 'i') <+ 'i')
	or
	(test((goto vowel) 'o' or 'u') <+ 'u')
	or
	(test((goto vowel) '{o"}' or '{u"}') <+ '{u"}')
	)

	)

	// Tests if there are more than one syllables
	// In Turkish each vowel indicates a distinct syllable
	define more_than_one_syllable_word as (
	test (atleast 2 (gopast vowel))
	)

	define is_reserved_word as (
	test(gopast 'ad' ($strlen = 2) ($strlen == limit))
	or
	test(gopast 'soyad' ($strlen = 5) ($strlen == limit))
	)

	define postlude as (
	not(is_reserved_word)
	backwards (
	do append_U_to_stems_ending_with_d_or_g
	do post_process_last_consonants

	)
	)

	define stem as (
	(more_than_one_syllable_word)
	(
	backwards (
	do stem_nominal_verb_suffixes
	continue_stemming_noun_suffixes
	do stem_noun_suffixes
	)

	postlude
	)
	)