The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
# morph-ga.txt: Morphology rules for Irish.
# Copyright (C) 2004-2007 Kevin P. Scannell <kscanne@gmail.com>
#
# This is free software; see the file COPYING for copying conditions.  There
# is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR
# PURPOSE.
#
#  with the exception of some of the non-standard verb endings,
#  each rule decreases number of caps or length => recursion will stop.
#
#  each substitution is applied globally (though this is only relevant
#  for the few rules that aren't anchored at ^ or $
#
#  There is no need to encode the regular inflectional morphology of 
#  Irish here because it is already handled by the C++ code that generates
#  the basic lexicon.   So if "imirceacha" is not in the lexicon (it is)
#  "imirceach" won't be either.   On the other hand it can be quite useful
#  to include *derivational* morphology; so if "gaimbíneachas" is not
#  in the lexicon (it is) there is some hope that by stripping the 
#  "eachas" it will find the root "gaimbín".
^([ACEFH-LO-SU-ZÁÉÍÓÚ])([A-ZÁÉÍÓÚ'-]*[A-ZÁÉÍÓÚ][A-ZÁÉÍÓÚ'-]*)$	$1\L$2 -1 <.+>	<&>	# ÉIRE; potentially eclipsing init chars require more care:
^B([A-GI-OQ-ZÁÉÍÓÚ][A-ZÁÉÍÓÚ'-]*)$	B\L$1		-1	 <.+>	<&>
^B'([AEIOUÁÉÍÓÚF][A-ZÁÉÍÓÚ'-]*)$	B'\L$1		-1	 <.+>	<&>
^BH([A-EG-ZÁÉÍÓÚ][A-ZÁÉÍÓÚ'-]*)$	Bh\L$1		-1	 <.+>	<&>
^D([A-SU-ZÁÉÍÓÚ][A-ZÁÉÍÓÚ'-]*)$	D\L$1		-1	 <.+>	<&>
^D'([AEIOUÁÉÍÓÚF][A-ZÁÉÍÓÚ'-]*)$	D'\L$1		-1	 <.+>	<&>
^G([ABD-ZÁÉÍÓÚ][A-ZÁÉÍÓÚ'-]*)$	G\L$1		-1	 <.+>	<&>
^M([AC-ZÁÉÍÓÚ][A-ZÁÉÍÓÚ'-]*)$	M\L$1		-1	 <.+>	<&>
^N([ABCEFH-ZÁÉÍÓÚ-][A-ZÁÉÍÓÚ'-]*)$	N\L$1		-1	 <.+>	<&>
^T([A-RT-ZÁÉÍÓÚ-][A-ZÁÉÍÓÚ'-]*)$	T\L$1		-1	 <.+>	<&>
^([ACEFH-LO-SU-ZÁÉÍÓÚ])		\l$1		-1	<.+>	<&> 	# Príomhach
^B([^pPh])		b$1		-1	<.+>	<&>
^Bh([^fF])		bh$1		-1	<.+>	<&>
^D([^tT])		d$1		-1	<.+>	<&>
^G([^cC])		g$1		-1	<.+>	<&>
^M([^bB])		m$1		-1	<.+>	<&>
^N([^dDgG])		n$1		-1	<.+>	<&>
^T([^sS])		t$1		-1	<.+>	<&>
(...)-([A-ZÁÉÍÓÚ])	$1-\l$2		-1	<.+>	<&> 	# Príomh-Aire
^b'([AEIOUÁÉÍÓÚF])	b'\l$1		-1	<.+>	<&>
^bP([A-ZÁÉÍÓÚ'-]+)$	bp\L$1		-1	<.+>	<&>	# bPRÍOMH-AIRE
^bP			bp		-1	<.+>	<&>	# bPríomhach
^bhF([A-ZÁÉÍÓÚ'-]+)$	bhf\L$1		-1	<.+>	<&>
^bhF			bhf		-1	<.+>	<&>
^d'([AEIOUÁÉÍÓÚF])	d'\l$1		-1	<.+>	<&>
^dT([A-ZÁÉÍÓÚ'-]+)$	dt\L$1		-1	<.+>	<&>
^dT			dt		-1	<.+>	<&>
^gC([A-ZÁÉÍÓÚ'-]+)$	gc\L$1		-1	<.+>	<&>
^gC			gc		-1	<.+>	<&>
^h([AEIOUÁÉÍÓÚ])([A-ZÁÉÍÓÚ'-]+)$	h$1\L$2	-1	<.+>	<&>	# hÉIREANN
^h([AEIOUÁÉÍÓÚ])	h\l$1		-1	<.+>	<&>
^m'([AEIOUÁÉÍÓÚF])	m'\l$1		-1	<.+>	<&>
^mB([A-ZÁÉÍÓÚ'-]+)$	mb\L$1		-1	<.+>	<&>
^mB			mb		-1	<.+>	<&>
^n([AEIOUÁÉÍÓÚ])([A-ZÁÉÍÓÚ'-]+)$	n$1\L$2	-1	<.+>	<&>
^n([AEIOUÁÉÍÓÚ])	n-\l$1		-1	<.+>	<&>
^nD([A-ZÁÉÍÓÚ'-]+)$	nd\L$1		-1	<.+>	<&>
^nD			nd		-1	<.+>	<&>
^nG([A-ZÁÉÍÓÚ'-]+)$	ng\L$1		-1	<.+>	<&>
^nG			ng		-1	<.+>	<&>
^tS([A-ZÁÉÍÓÚ'-]+)$	ts\L$1		-1	<.+>	<&>
^tS			ts		-1	<.+>	<&>
^t([AEIOUÁÉÍÓÚ])([A-ZÁÉÍÓÚ'-]+)$	t$1\L$2	-1	<N.*>	<&>	# tUASAL
^t([AEIOUÁÉÍÓÚ])	t-\l$1		-1	<N.*>	<&>
# these aren't in aspell db by default; better to do early (mb'amhlaidh)
^mb'			b'		-1	<.+>	<&>
^d'f([aeiouáéíóú])	d'fh$1		2	<.+>	<&>
#  do these early - avoids mistakes with "aighthe" (==aghaidheanna)
#  and note that first rule must precede second to get "-aithe" pp's correct
# IMPORTANT - used for -ughadh endings e.g., -> -aghadh which goes to ú below 
u(i?[dg]h)			a$1		1	<.+>	<&>  # see next few
i[dg]h(th?.)		i$1		1	<.+>	<&>  # marcaidhthe, masluighthe, beannuighth?ear
#############################################################################
################## CAIGHDEÁN OIFIGIÚIL SPELLING REFORMS #####################
#############################################################################
#   modernize prefixes (no need to trap mutated versions, stripped below
#   full list from OD77 is in gaeilge/diolaim/x/OD77alt-prefixes
^h?aith			ath		1	<.+>	<&>
^h?aird([^e])	ard$1		1	<.+>	<&>
# handled by more general "nn" rule below
#^h?anns			ans		1	<.+>	<&>
^dh'			d'		1	<.+>	<&>
^h-			h		1	<.+>	<&>    # CO p.126
^n-([AEIOUÁÉÍÓÚ])	n$1		1	<.+>	<&>    # CO p.125
^t-([AEIOUÁÉÍÓÚ])	t$1		1	<.+>	<&>    # CO p.125
^h?aoin			aon		1	<.+>	<&>
^h?ana-			an-		1	<.+>	<&>    # ana-eagraithe
#^[Áá]rd(-?..)	ard$1		1	<.+>	<&>
^h?ath-?th		at		1	<[ANV].*>	<&>
^bh'			b'		1	<.+>	<&>		# bh'fhiú -> b'fhiú
^bain			ban		1	<.+>	<&>
^báin			bán		1	<.+>	<&>
^bairr			barr		1	<.+>	<&>
^baoith			baoth		1	<.+>	<&>
^beig			beag		1	<.+>	<&>
^béil			béal		1	<.+>	<&>
^bioth			bith		1	<.+>	<&>
^boig			bog		1	<.+>	<&>
^boinn			bonn		1	<.+>	<&>
^boirr			borr		1	<.+>	<&>  # boirrphéist...
# handled by more general ll rule below
#^bolls			bols		1	<.+>	<&>
^buadh			bua			1	<.+>	<&>
^buain			buan		1	<.+>	<&>  # buainseasamh...
^caim			cam		1	<.+>	<&>  # caimbheart...
# handled by more general tht rule below
#^caitht			cait		1	<.+>	<&>
^caoimh			caomh		1	<.+>	<&>
# N.B. makes string longer
#^ceathar			ceathair		1	<.+>	<&>
^céid			céad		1	<.+>	<&>
# N.B. makes string longer
#^cinn			ceann		1	<.+>	<&>
^ceinn			ceann		1	<.+>	<&>  # Di04 ceinn-litir, srl.
^cionn			ceann		1	<.+>	<&>
^cláir			clár		1	<.+>	<&>   # cláirfhiacail e.g.
^claoidh			cloí		1	<.+>	<&>
^claoin			claon		1	<.+>	<&>
^coilg			colg		1	<.+>	<&>  # coilgsheasamh e.g.
^c[oó]imh-mh		cóimh		1	<[ANV].*>	<&>
^coimh			comh		1	<.+>	<&>    # coimhlíonadh
^cóimhmh			cóimh	1	<.+>	<&>    # cóimhmheas, srl in Di04
^cóimh([^eéií])			comh$1	1	<.+>	<&>    # cóimh-chealg (Di04)
# general ómh[^a] -> omh$1  rule below
^cómha		comha		1	<[ANV].*>	<&>
^cómhmh			cómh	1	<.+>	<&>    # cómhmhaith, srl in Di04
^comh-(mh[aáoóuú])		có$1		1	<[ANV].*>	<&>
^comh-(mh[eéií])		cói$1		1	<[ANV].*>	<&>
^coimh-n		cóin		1	<[ANV].*>	<&>
^c[oó]mh-?(n[aáoóuú])		có$1		1	<[ANV].*>	<&>
^c[oó]mh-?(n[eéií])		cói$1		1	<[ANV].*>	<&>
^có-(..)		comh$1		1	<[ANV].*>	<&>
^có([bcdfgpt]h)		comh$1		1	<[ANV].*>	<&>
^c[ou]ir([pr])			cor$1		1	<.+>	<&>
^crainn			crann		1	<.+>	<&>
^crioth			crith		1	<.+>	<&>
^croim			crom		1	<.+>	<&>   # croimleac e.g.
^cruai?dh			crua		1	<.+>	<&>
^cúil			cúl		1	<.+>	<&>
^dá-			dé		1	<.+>	<&>		# common in Di04 e.g. dá-bheathach
^daill			dall		1	<.+>	<&> # daillintinn
^daoir([^s])			daor		1	<.+>	<&>
#  first spelling of dea- is from na Grianna
^déagh-			dea-		1	<.+>	<&>
^de[áai]gh-		dea-		1	<.+>	<&>
^de[áai]gh([^-])	dea-$1		1	<.+>	<&>
^deir([bg])			dear$1		1	<.+>	<&> # deirbhnialas, deirgmheisce
^dío-			dí-		1	<.+>	<&>   # Di04 common. dío-chuimhne 
^diubhr			diúr		1	<.+>	<&>
^dligh(i?)		dlí		1	<.+>	<&>
^doi-			do-		1	<.+>	<&>  # doi-bhriathar, etc.  Di04
^doinn			donn		1	<.+>	<&>
^droich			droch		1	<.+>	<&>
^druim			droim		1	<.+>	<&>
^duibh			dubh		1	<.+>	<&>
^dubh-			dú		1	<.+>	<&>
# handled by more general prefix+gc rule below
#^éagc			éag		1	<.+>	<&>
#^éigc			éig		1	<.+>	<&>
# handled by more general prefix+dt rule below
#^éadt			éad		1	<.+>	<&>
#^éidt			éid		1	<.+>	<&>
# handled by more general ll rule below
#^falls			fals		1	<.+>	<&>
^fágbh			fág			1	<.+>	<&>
^h?eadar		idir		1	<.+>	<&>
^h?eidir		idir	1	<.+>	<&>
^fiadh			fia		1	<.+>	<&>
^fír-			fíor		1	<.+>	<&> # include hyphen so len is same
^flith			frith		1	<.+>	<&>
^frioth([^á])	frith$1		1	<.+>	<&>
^fó-			fo-		1	<.+>	<&>
^foir-			for-	1	<.+>	<&>  # foir-chéim
^fuair-			fuar-	1	<.+>	<&>  # fuair-chrith
# N.B. makes string longer
#^gamh			geamh		1	<.+>	<&>
# N.B. makes string longer
^girr			gearr		1	<.+>	<&>
^gairbh			garbh		1	<.+>	<&>
^géir			géar		1	<.+>	<&>
^gnáith			gnáth		1	<.+>	<&>
^gobh			gabh		1	<.+>	<&>
^héala([^aeiouáéíóú]+[eéií])	héili$1		1	<.+>	<&>    # OD77
^héala([^aeiouáéíóú]+[aáoóuú])	héilea$1	1	<.+>	<&>
^h?iair		iar		1	<.+>	<&>
^h?iarann		iarn		1	<.+>	<&>
^h?iodar		idir		1	<.+>	<&> # iodarchaidreamh
^h?iol([^ar])			il$1		1	<.+>	<&>
^h?iomshlá			iomlá		1	<.+>	<&>  # exception to next
^h?iom([^aálpr])			im$1		1	<.+>	<&>
^h?ion			in		1	<.+>	<&>
# handled by more general nn rule below
#^h?ionn([rs])			ion$1		1	<.+>	<&>
^([ls])áimh		$1ámh	1	<.+>	<&>
^láin		lán		1	<.+>	<&>  # láinchinnte
# handled by more general sb rule below
#^léasb			léasp		1	<.+>	<&>
^leig([^h])		lig$1		1	<.+>	<&>  # leigint, but not leigheas words
# risky?
^leith-			leath-		1	<.+>	<&>
^loim			lom		1	<.+>	<&>  # loimeasna
^lóir		leor		1	<.+>	<&>
^luaith([^r])	luath$1		1	<.+>	<&>  # luaithintinn
# handled by more general sg rule below
#^luasg			luasc		1	<.+>	<&>
# N.B. makes string longer
^lubh			luibh		1	<.+>	<&> # lubhghort
^lui([mn])		loi$1	1	<.+>	<&>		# luinnir->loinnir
^lúith			lúth		1	<.+>	<&>
^maill			mall		1	<.+>	<&>
^maoil			maol		1	<.+>	<&>
^maoith([^n])		maoth$1		1	<.+>	<&>  # maoithintinneach
^mairbh		marbh		1	<.+>	<&>   # mairbhghin
# only two math- words anyway?
#^magh		math		1	<.+>	<&>
^meadhón		meán		1	<.+>	<&>
^mean		meán		1	<.+>	<&>
^mh'([aeiouáéíóú].)		m'$1		1	<.+>	<&>
# N.B. makes string longer
^min-		mion-		1	<.+>	<&>
^mío-			mí-		1	<.+>	<&>
^mío([bcdfgmpst]h)		mí$1		1	<[ANV].*>	<&>  # míochothrom
^(mh?í)-([^aeiouáéíóú].)		$1$2		1	<[ANV].*>	<&>
# no midh- words anyway?
#^miodh		midh		1	<.+>	<&>
^móir			mór		1	<.+>	<&>
^naoimh			naomh		1	<.+>	<&>
^neamh-([^m].)		neamh$1		1	<[ANV].*>	<&>  # try stripping hyphen before stripping whole thing!
^neimh(..)		neamh$1		1	<.+>	<&>  # Di04
^nea-mb			neamhbh		1	<.+>	<&>
^nea-			neamh-		1	<.+>	<&>    # strip hyphen later
^nea([cdfgpt]h)		neamh$1		1	<.+>	<&>
^h?ói([gr])		ó$1		1	<.+>	<&>
# handled by more general nn rule below
#^pannc			panc		1	<.+>	<&>
#^poinnt			point		1	<.+>	<&>
^pont			punt		1	<.+>	<&>
^prímh			príomh		1	<.+>	<&>
^réidh			ré		1	<.+>	<&>
^réimh			réamh		1	<.+>	<&>
^r[íi]o?gh([^n])		rí$1		1	<.+>	<&>   # righdhamhna, ríogh-choróin, but not "righnigh", "righne", etc.
^ro-			ró-		1	<.+>	<&>
^rói-			ró-		1	<.+>	<&>
^roighn			righn		1	<.+>	<&>
^roimh			réamh		1	<.+>	<&>
^ruadh			rua		1	<.+>	<&>
^rúin			rún		1	<.+>	<&>
^sáir			sár		1	<.+>	<&>  # sáireolas
^saoghl			saol		1	<.+>	<&>
^saoibh			saobh		1	<.+>	<&>
^saoir-			saor-		1	<.+>	<&>
^sclug			sclog		1	<.+>	<&>
^scoith			scoth		1	<.+>	<&> # scoithdhearg
^seana-			sean-		1	<.+>	<&>    # strip hyphen later
^sein-			sean-		1	<.+>	<&>    # strip hyphen later
^siod			sead		1	<.+>	<&>
^síodh			sí		1	<.+>	<&>
# N.B. makes it longer
^sír-			síor-		1	<.+>	<&>
^smúid			smúit		1	<.+>	<&>
^soi-			so-		1	<.+>	<&>  # soi-bhriste, etc.  Di04
# handled by more general ll rule below
#^soills			soils		1	<.+>	<&>
# handled by more general nn rule below
#^sonnr			sonr		1	<.+>	<&>
^sr[áa]ic		srac		1	<.+>	<&>
# N.B. makes string longer
^srang			sreang		1	<.+>	<&>
^sream			sram		1	<.+>	<&>
# N.B. makes string longer - risky, plus no "sruim-" words at all?
#^srim			sruim		1	<.+>	<&>
^stáit			stát		1	<.+>	<&>  # stáitseirbhís
^taidhbh			taibh		1	<.+>	<&>
# handled by more general sb rule below
#^taisb		taisp	1	<.+>	<&>
^teasb([^h])		taispe$1	1	<.+>	<&>
^teint			tint		1	<.+>	<&>
^tele([^aeiouáéíóú]+[eéií])	teili$1		1	<.+>	<&>    # OD77
^tele([^aeiouáéíóú]+[aáoóuú])	teilea$1	1	<.+>	<&>
^tiom			tim		1	<.+>	<&>
^tiugh			tiú		1	<.+>	<&>
# risky
#^tiúr	tabhar		1	<.+>	<&>
# handled by more general rule below
#^treabhth			treaf		1	<.+>	<&>
^treas			tras		1	<.+>	<&>
^tréin			tréan		1	<.+>	<&>
^troim			trom		1	<.+>	<&>
^tuaith		tuath		1	<.+>	<&>  # tuaithcheantar
^h?uaith		uath		1	<.+>	<&>
^h?úir			úr		1	<.+>	<&>
# End of Prefixes - Now general spelling reforms (but not from CO handbook)
# Mostly based on corpus work...
# deán for déan
^(n?dh?)eán		$1éan		1	<V.*>	<&>
eoracht(a?)$	eoireacht$1		1	<[FN].*>	<&> # spaisteoracht
óracht(a?)$		óireacht$1	1	<[FN].*>	<&>   # fáidheadóracht
éair$			éir			1	<[FN].*>	<&>		# piléair, paipéair
eor$			eoir		1	<.+>	<&>
# 08-11-05, generalize nouns to everything (fraochta e.g.)
chd(a?)$		cht$1		1	<.+>	<&>    # masc too (achd, m.sh.)
ínteacht(a?)$	íneacht$1	1	<[FN].*>	<&>    # deisimínteacht, etc. 
óanna$			ónna		1	<[FN].*>	<&>	# common!  clóanna, seóanna, gnóanna, srl.
# corpus - Dómhnach, etc., but not rómhilis, etc.
#  Handles  all ^cómh- prefixes too except  ^cómha which we do above explicitly
([^Rr])ómh([^a])		$1omh$2		1	<.+>	<&>
^(.)ógh			$1ogh		1	<.+>	<&>		# fóghmhar->foghmhar->fómhar
eamhn		iún		1	<.+>	<&>     #  maitheamhnas, breitheamhnas
([^e])amhn	$1ún	1	<.+>	<&>     #  rathamhnas
#   warning - makes longer 
^b'f([aeiouáéíóú])	b'fh$1	1	<.+>	<&>
#   Now various general spelling reforms from CO handbook
ai(cht?ín)		a$1		1	<.+>	<&>    # CO p.120, smaichtín
sg			sc		1	<.+>	<&>    # CO p.115
sb			sp		1	<.+>	<&>    # CO p.115
sd			st		1	<.+>	<&>    # CO p.115
dn			nn		1	<.+>	<&>    # CO p.114 céadna
é(ir[ld])		e$1		1	<.+>	<&>    # CO p.123
á(i?r[dlnr])		a$1		1	<.+>	<&>    # CO p.123-124
ó(i?r[dlnr])		o$1		1	<.+>	<&>    # CO p.123-124
ú(i?r[dlnr])		u$1		1	<.+>	<&>    # CO p.123-124
eó			eo		1	<.+>	<&>    # go leór, aibhleóg, etc.
eamh[au]il$		iúil		1	<[ANF].*>	<&>    # dlightheamhail
eamhla			iúla		1	<[ANF].*>	<&>
([^e])amh[au]il$		$1úil		1	<[ANF].*>	<&>    # CO p.102, a2+barúil, etc.
([^e])amhla		$1úla		1	<[ANF].*>	<&>    # "-amhlachta?" too
adht([aá])		at$1	1	<.+>	<&>	# iadhta, cruadhtán, iarradhtas, srl
aíle$			aíola		1	<.+>	<&>    # *3ú* decl.
eachaibh$		igh		1	<N pl="y".*>	<&>    # taoiseachaibh
([^e])achaibh$		$1aigh		1	<N pl="y".*>	<&>    # Albanachaibh
anaibh$			anna		1	<N pl="y".*>	<&>    # sgoileanaibh, ceisteanaibh
ánaibh$			áin		1	<N pl="y".*>	<&>    # uachtaránaibh
aibh$			a		1	<N pl="y".*>	<&>    # sinagógaibh, fiachaibh
(..[^a])ibh$		$1í		1	<N pl="y".*>	<&>    # múinteoiribh, etc.
(.)eu			$1éa		1	<.+>	<&>    # sgeul, ceudna, srl.
tch			t		1	<.+>	<&>    # CO p.103
i[dg]hea		ío		1	<.+>	<&>    # CO p.105,107
i[dg]he			í		1	<.+>	<&>    # CO p.104,106
uí([mto])		aí$1		1	<.+>	<&>	# cialluíonn, ionaduíochta, leasuítear, mionnuím
uith			aith		1	<.+>	<&>    # rialuitheoir
adha			ú		1	<.+>	<&>    # CO p.105
ádh			á		1	<.+>	<&>    # ádhbhar, rádh, etc.
[mb]hth			f		1	<.+>	<&>    # CO p.106
th[mb]h			f		1	<.+>	<&>    # CO p.106
ghai			aí		1	<.+>	<&>    # CO p.107
thch			ch		1	<.+>	<&>    # CO p.108
tht			t		1	<.+>	<&>    # CO p.108
ll([rst])		l$1		1	<.+>	<&>    # CO p.112-113
nn([cdlrst])		n$1		1	<.+>	<&>    # CO p.114
ghail$			aíl		1	<N pl="n" gnt="n" gnd="f".*>	<&>
ghaile$			aíola		1	<N pl="n" gnt="y" gnd="f".*>	<&>
ighil$			íl		1	<N pl="n" gnt="n" gnd="f".*>	<&>
ighile$			íola		1	<N pl="n" gnt="y" gnd="f".*>	<&>
(ain|[eé][ai]|iar)gc	$1g		1	<.+>	<&>    # CO p.109
e[au]gc	éag		1	<.+>	<&>    # CO p.109
(é[ai])dt		$1d		1	<.+>	<&>    # CO p.109
(dh?í?)omb		$1om		1	<.+>	<&>    # CO p.109
(ai?n)bhf		$1bh		1	<.+>	<&>    # CO p.109  ainbhfiosach
(..)u(i?s)$		$1a$2		1	<.+>	<&>    # solus, árus, etc.
#   some dialect/pre-standard noun/adjective inflections
([oó]ga)í$		$1		1	<N pl="y".*>	<&>   # duilleogaí
nnaí$			nna		1	<N pl="y".*>	<&>   # ceisteannaí
(..)dha$		$1tha		1	<.+>	<&>    # measardha, muinteardha
(..)dha$		$1a		1	<.+>	<&>    # stuamdha, príomhdha
eói?g$			eog		1	<[NF].*>	<&>   # nb2 
eoig$			eog		1	<.+>	<&>   # nb2, leave as <.+> for <F>'s
óig$			óg		1	<[NF].*>	<&>   # nb2 
lainn$			lann		1	<[NF].*>	<&>   # nb2
aisí$			ais		1	<[NF].*>	<&>   # cuntaisí, bronntanaisí
dth([ea])$		dt$1		1	<.+>	<&>    # goidthe
# Fórsaíbh -> Fórsaí (dpl)
íbh$			í	1	<[NF].*>	<&>
íocha$			í	1	<N pl="y".*>	<&>	# margaidheacha->margaíocha->margaí
# but note in previous that sometimes this gets interrupted by correct
# plural adjective:  cogaidheacha->cogaíocha, or sometimes even correct
# plural noun:  claidheacha->claíocha  (so want to keep -íocha intermediate)
##################  NON/PRE-STANDARD VERB ENDINGS #########################
# future/conditional 2nd declension with -cha?- see CO p.103
e[oó]cha(i?dh)$		eo$1		1	<V.*>	<&>
([^e])ócha(i?dh)$	$1ó$2		1	<V.*>	<&>
e[oó]chaimi([ds])$	eoimi$1		1	<V.*>	<&>
([^e])óchaimi([ds])$	$1óimi$2	1	<V.*>	<&>
e[oó]chamuid$	eoimid		1	<V p="y" t="fáist">	<&>
([^e])óchamuid$	$1óimid		1	<V p="y" t="fáist">	<&>
e[oó]chai(nn|dís)$	eoi$1		1	<V p="y" t="coinn">	<&>
([^e])óchai(nn|dís)$	$1ói$2		1	<V p="y" t="coinn">	<&>
e[oó]chthá$		eofá		1	<V p="y" t="coinn">	<&>
([^e])óchthá$		$1ófá		1	<V p="y" t="coinn">	<&>
e[oó]ch(th)?aidhe$		eofaí		1	<V p="n" t="coinn">	<&>
([^e])óch(th)?aidhe$		$1ófaí		1	<V p="n" t="coinn">	<&>
(ó|eo)char$			$1far	1	<V p="n" t="fáist">	<&>
(ó|eo)chthar$			$1far	1	<V p="n" t="fáist">	<&>
faidhear$			far	1	<V p="n" t="fáist">	<&>
# dá n-amharcthá, dá dtugthá, srl
th(eá)$			t$1		1	<V p="y" t="gnáth">	<&>
#   2nd declension Dinneen-style endings
(..)aíd$		$1aíonn		1	<V p="y" t="láith">	<&> # cialluíd
aghadh			ú		1	<[NF].*>	<&>  # broad and slender
iaghadh			íodh		1	<[VF].*>	<&>  # innsiughadh->insíodh
(..)thear$		$1tear		1	<[VF].*>	<&> # aut. present
(..)t(te?ar)$	$1$2		1	<[VF].*>	<&> # aut. present
ighdar			íodar		1	<[VF].*>	<&>    # broad and slender
ighim			ím		1	<[VF].*>	<&>    # broad and slender
ighidh			í		1	<.+>	<&>    # subjunctive?
ighinn			ínn		1	<.+>	<&>    # imperfect
uigh'e$			aithe		1	<.+>	<&>	# U019.txt
fiomuid$		fimid		1	<[VF].*>	<&>
# Future forms: see OS88 p.215 for these (fe?as, etc. on p.63), also CB p.145
#  socróid, dílseoid, 3rd plural forms in achtanna
fa[ds]$			faidh		1	<V p="y" t="fáist">	<&>
fea[ds]$		fidh		1	<V p="y" t="fáist">	<&>
óchas$		óidh		1	<V p="y" t="fáist">	<&> # meabhróchas->meabhrós->...
ói?[ds]$		óidh		1	<V p="y" t="fáist">	<&>
eoi?[ds]$		eoidh		1	<V p="y" t="fáist">	<&>
# Common in achtanna to have -fidh,-fid ending on broad stem, lots of 
# other slender/broad mismatches:
([aáoóuú][^aeiouáéíóú]*)fí$	$1faí	1	<V p="n" t="coinn">	<&>
([aáoóuú][^aeiouáéíóú]*)fidh?$	$1faidh	1	<V p="y" t="fáist">	<&>
([eéií][^aeiouáéíóú]*)fadh$	$1feadh	1	<V p="y" t="coinn">	<&>
([eéií][^aeiouáéíóú]*)far$	$1fear	1	<V p="n" t="fáist">	<&>
([eéií][^aeiouáéíóú]*)tar$	$1tear	1	<V p="n" t="láith">	<&> # * not + for chítar
# Future 2nd p., "r" forms noted on OS88 p.216;
# see achtanna for íocfaid/íocfa, bainfid/bainfe
fa$			faidh		1	<V p="y" t="fáist">	<&>
fai[rsd]$		faidh		1	<V p="y" t="fáist">	<&>
fe$			fidh		1	<V p="y" t="fáist">	<&>
fi[rsd]$		fidh		1	<V p="y" t="fáist">	<&>
óis$			óidh		1	<V p="y" t="fáist">	<&>
eois$			eoidh		1	<V p="y" t="fáist">	<&>
# Future/Cond autonomous; unwanted prefix h is common in achtanna
^h([aeiouáéíóú].+fe?ar)$	$1	1	<V p="n" t="fáist">	<&>
^h([aeiouáéíóú].+fa?í)$		$1	1	<V p="n" t="coinn">	<&>
# Past; also see Di27 (present).  "r" forms on p.216 (also CB p.145 present)
(...)íos$		$1igh		1	<V p="y" t="caite">	<&>
(...)ai[rs]$		$1		1	<V p="y" t="caite">	<&>
(..[^aeiouáéíóú])i[rs]$	$1		1	<V p="y" t="caite">	<&>
(...)ís$		$1igh		1	<V p="y" t="caite">	<&>
#  Thugtaoi in MU44, e.g.
taoi$			taí		1	<V p="n" t="gnáth">	<&>
#  present
ains$			ann		1	<V p="y" t="láith">	<&>
ins$			eann		1	<V p="y" t="láith">	<&>
íns$			íonn		1	<V p="y" t="láith">	<&>
#  OS88 p.126  present
anns$			ann		1	<V p="y" t="láith">	<&>
íonns$			íonn		1	<V p="y" t="láith">	<&>
#  corpus; now handled above
# th(eá|ear)$		t$1		1	<V.*>	<&>
famuid		faimid		1	<V p="y" t="fáist">	<&>
#  -idh, -aidh on 1st declension verbs: Pres. Subj. or 2nd pl. imper (-igí) 
#  according to Dinneen.  Some examples seem to be pres. in corpus,
#  e.g. suidhidh, some apparently past e.g. uneclipsed "tigidh"
#  Here we go with subjunctive which is the most common by far
(.[^a])idh$	$1e		1	<V p="y" t="foshuit">	<&>
# (..)aidh$	$1a		1	<V p="y" t="foshuit">	<&>
#  pre-standard texts commonly have "déanfaimíd", "tugaimís", "rachaimíd"...
(...)imí([ds])$		$1imi$2	1	<V.*>	<&>
igidh$			igí		1	<V p="y" t="ord">	<&>
mist$			mis		1	<V.*>	<&>
#  various -áil  endings should be broad, CO p.120
áilim$			álaim		1	<V.*>	<&>
áileann$		álann		1	<V.*>	<&>
áilfidh$		álfaidh		1	<V.*>	<&>
áilfeá$			álfá		1	<V.*>	<&>
áilfear$		álfar		1	<V.*>	<&>
áilfí$			álfaí		1	<V.*>	<&>
ái(lf?)eadh$		á$1adh		1	<V.*>	<&>
ái(lf?)im(i[sd])$	á$1aim$2	1	<V.*>	<&>
áiligí$			álaigí		1	<V.*>	<&>
áilea([dm]ar)$		ála$1		1	<V.*>	<&>
ái(lf?)idís$		á$1aidís	1	<V.*>	<&>
áltar$			áiltear		1	<V.*>	<&>
álta$			áilte		1	<A.*>	<&>
álaithe$		áilte		1	<A.*>	<&>
álanna$			álacha		1	<N.*>	<&>  # nb3 pl
#############################################################################
#   start some more radical changes, e.g. stripping prefixes completely
#############################################################################
# hyphened prefixes, an-, dea-
^h?an-([bcfgmp]h)	$1		0	<[AN].*>	<&>
^h?an-([bcfgmp][^h])	$1		2	<[AN].*>	<&>
^h?an-([^bcfgmp][^h])	$1		0	<[AN].*>	<&>
^h?an-([^bcfgmp]h)	$1		2	<[AN].*>	<&>   # an-dhuine 
^dea-([bcdfgmpt]h)	$1		0	<[AN].*>	<&>
^dea-([bcdfgmpt][^h])	$1		2	<[AN].*>	<&>
^dea-(sh[aeiouáéíóúlnr])	$1	0	<[AN].*>	<&>
^dea-(s[aeiouáéíóúlnr])	$1		2	<[AN].*>	<&>
^dea-(s[^aeiouáéíóúlnrh])	$1	0	<[AN].*>	<&>   # dea-scéal
^dea-([^bcdfgmpst])	$1		0	<[AN].*>	<&>
# other prefixes; most are not independent words (ath, comh, etc.)
# but several are -- these are worth including for several reasons:
# (1) efficiency; morph. analysis happens well before the check for
#  compounds/run-togethers (2) allows for a more refined check for
#  lenition of the prefixed word or other changes (comh-mh, comh->có, etc.)
^h?ain([deéiílnrst])	$1		0	<[AN].*>	<&>
^h?ain([bcfgmp]h)	$1		0	<[AN].*>	<&>
^h?ath([^bcdfgmpst-])	$1		0	<[ANV].*>	<&>
^h?ath([bcdfgmp]h)	$1		0	<[ANV].*>	<&>
^h?ath(sh[aeiouáéíóúlnr])	$1		0	<[ANV].*>	<&>
^h?ath(s[^aeiouáéíóúlnrh])	$1	0	<[ANV].*>	<&>
^h?ath-?th		th		1	<[ANV].*>	<&>   # -> "at-" above
^comh([^bcdfgmnpst-])	$1		0	<[ANV].*>	<&>
^comh([bcdfgpt]h)	$1		0	<[ANV].*>	<&>
^comh(sh[aeiouáéíóúlnr])	$1	0	<[ANV].*>	<&>
^comh(s[^aeiouáéíóúlnrh])	$1	0	<[ANV].*>	<&>
# cómhargadh, cómhalartach; but pre-standard "cómhoibriughadh" too
^có(mh[aáoóuú])		$1		0	<[ANV].*>	<&>
^cói(mh[eéií])		$1		0	<[ANV].*>	<&>
^có(n[aáoóuú])		$1		0	<[ANV].*>	<&>
^cói(n[eéií])		$1		0	<[ANV].*>	<&>
^do-([aeiouáéíóú])	$1		0	<[AN].*>	<&>
# ^do([lnr]) BELOW
^do([bcdfgmpt]h)	$1		0	<[AN].*>	<&>
^do(sh[aeiouáéíóúlnr])	$1		0	<[AN].*>	<&>
^do(s[^aeiouáéíóúlnrh])	$1		0	<[AN].*>	<&>
^do-([aeiouáéíóú])	$1		0	<[ANV].*>	<&>
^droch([^bcdfgmpst-])	$1		0	<[AN].*>	<&>
^droch-(ch)		$1		0	<[AN].*>	<&>
^droch([bdfgmpt]h)	$1		0	<[AN].*>	<&>
^droch(sh[aeiouáéíóúlnr])	$1		0	<[AN].*>	<&>
^droch(s[^aeiouáéíóúlnrh])	$1		0	<[AN].*>	<&>
^fíor([^bcdfgmprst-])	$1		0	<[ANV].*>	<&>
^fíor-(r)		$1		0	<[ANV].*>	<&>
^fíor([bcdfgmpt]h)	$1		0	<[ANV].*>	<&>
^fíor(sh[aeiouáéíóúlnr])	$1		0	<[ANV].*>	<&>
^fíor(s[^aeiouáéíóúlnrh])	$1		0	<[ANV].*>	<&>
^for([^bcdfgmprst-])	$1		0	<[ANV].*>	<&>
^for-(r)		$1		0	<[ANV].*>	<&>
^for([bcdfgmpt]h)	$1		0	<[ANV].*>	<&>
^for(sh[aeiouáéíóúlnr])	$1		0	<[ANV].*>	<&>
^for(s[^aeiouáéíóúlnrh])	$1		0	<[ANV].*>	<&>
^fo-([aeiouáéíóú])	$1		0	<[ANV].*>	<&>
# ^fo([lnr]) BELOW
^fo([bcdfgmpt]h)	$1		0	<[ANV].*>	<&>
^fo(sh[aeiouáéíóúlnr])	$1		0	<[ANV].*>	<&>
^fo(s[^aeiouáéíóúlnrh])	$1		0	<[ANV].*>	<&>
^fo-(..)		$1		1	<[ANV].*>	<&>
^frith([^bcdfgmpst-])	$1		0	<[ANV].*>	<&>
^fri(t[^h])		$1		0	<[ANV].*>	<&>
^frith([bcdfgmp]h)	$1		0	<[ANV].*>	<&>
^frith(sh[aeiouáéíóúlnr])	$1		0	<[ANV].*>	<&>
^frith(s[^aeiouáéíóúlnrh])	$1		0	<[ANV].*>	<&>
^h?iar([^bcdfgmprst-])	$1		0	<[ANV].*>	<&>
^h?iar-(r)		$1		0	<[ANV].*>	<&>
^h?iar([bcdfgmpt]h)	$1		0	<[ANV].*>	<&>
^h?iar(sh[aeiouáéíóúlnr])	$1		0	<[ANV].*>	<&>
^h?iar(s[^aeiouáéíóúlnrh])	$1		0	<[ANV].*>	<&>
# ilsiamsa, ilsiollach, etc.  no special case
^h?il([^bcfgmp-]..)	$1		0	<[AN].*>	<&>
^h?il([bcfgmp]h.)	$1		0	<[AN].*>	<&>
^h?im([^bcdfghmpst-]..)	$1		0	<[ANV].*>	<&>
^h?im([bcdfgmpt]h.)	$1		0	<[ANV].*>	<&>
^h?im(sh[aeiouáéíóúlnr])	$1		0	<[ANV].*>	<&>
^h?im(s[^aeiouáéíóúlnrh].)	$1		0	<[ANV].*>	<&>
# ^in([^bcfgmp-]) BELOW
^h?in-(n)		$1		0	<[AN].*>	<&>
^h?in([bcfgmp]h..)	$1		0	<[AN].*>	<&>
^leas-(s)		$1		0	<[AN].*>	<&>
^leas([aeiouáéíóúd]..)	$1		0	<[AN].*>	<&>
^leas([bcfgm]h..)	$1		0	<[AN].*>	<&>
^mí-([aeiouáéíóú])	$1		0	<[ANV].*>	<&>
^mí([aeiouáéíóú]..)	$1		2	<[ANV].*>	<&>  # míúsáid
^mí([lnr]..)		$1		0	<[ANV].*>	<&>
^mí([bcdfgmpt]h.)	$1		0	<[ANV].*>	<&>
^mí(sh[aeiouáéíóúlnr])	$1		0	<[ANV].*>	<&>
^mí(s[^aeiouáéíóúlnrh])	$1		0	<[ANV].*>	<&>
^neamh-?([aeiouáéíóú])	$1		0	<[ANV].*>	<&>  # neamh-aistear but neamhaithne
^neamh([lnr])		$1		0	<[ANV].*>	<&>
^neamh([bcdfgpt]h)	$1		0	<[ANV].*>	<&>
^neamh-(mh)		$1		0	<[ANV].*>	<&>
^neamh(sh[aeiouáéíóúlnr])	$1	0	<[ANV].*>	<&>
^neamh(s[^aeiouáéíóúlnrh])	$1	0	<[ANV].*>	<&>
^príomh([^bcdfgmpst-])	$1		0	<N.*>	<&>
^príomh([bcdfgpt]h)	$1		0	<N.*>	<&>
^príomh-(mh)		$1		0	<N.*>	<&>
^príomh(sh[aeiouáéíóúlnr])	$1		0	<N.*>	<&>
^príomh(s[^aeiouáéíóúlnrh])	$1		0	<N.*>	<&>
^réamh([^bcdfgmpst-])	$1		0	<[ANV].*>	<&>
^réamh([bcdfgpt]h)	$1		0	<[ANV].*>	<&>
^réamh-(mh)		$1		0	<[ANV].*>	<&>
^réamh(sh[aeiouáéíóúlnr])	$1		0	<[ANV].*>	<&>
^réamh(s[^aeiouáéíóúlnrh])	$1		0	<[ANV].*>	<&>
^rí([lnr]..)		$1		0	<[ANV].*>	<&>
^rí([bcdfgmpt]h.)	$1		0	<[ANV].*>	<&>
^rí(sh[aeiouáéíóúlnr])	$1		0	<[ANV].*>	<&>
^rí(s[^aeiouáéíóúlnrh])	$1		0	<[ANV].*>	<&>
^ró-([aeiouáéíóú]..)	$1		0	<[AN].*>	<&>
^ró([aeouáéíóú]..)	$1		2	<[AN].*>	<&>  # not "i" since several actual words in Dinneen start with rói-
^ró([lnr]..)		$1		0	<[AN].*>	<&>
^ró([bcdfgmpt]h.)	$1		0	<[AN].*>	<&>
^ró(sh[aeiouáéíóúlnr])	$1		0	<[AN].*>	<&>
^ró(s[^aeiouáéíóúlnrh])	$1		0	<[AN].*>	<&>
^sain([^bcdfgmpst-])	$1		0	<[ANV].*>	<&>
^sain([bcdfgmpt]h)	$1		0	<[ANV].*>	<&>
^sain(sh[aeiouáéíóúlnr])	$1		0	<[ANV].*>	<&>
^sain(s[^aeiouáéíóúlnrh])	$1		0	<[ANV].*>	<&>
^so-([aeiouáéíóú]..)	$1		0	<[AN].*>	<&>
^so([lnr]..)		$1		0	<[AN].*>	<&>
^so([bcdfgmpt]h.)	$1		0	<[AN].*>	<&>
^so(sh[aeiouáéíóúlnr].)	$1		0	<[AN].*>	<&>
^so(s[^aeiouáéíóúlnrh]..)	$1		0	<[AN].*>	<&>
^tras([^bcfghmnps-])	$1		0	<[ANV].*>	<&>   # leave "^trasn" alone
^tras([bcfgmp]h)	$1		0	<[ANV].*>	<&>
^tras-(s)		$1		0	<[ANV].*>	<&>
# next batch are non-words so really need to be here, but VERY infrequent
^(cil|gig|h?is|meig|micr|pic|teil)ea-?([^aeiouáéíóú-]+[aouáóú])	$2	0	 <.+>	<&>
^(cil|gig|h?is|meig|micr|pic|teil)i-?([^aeiouáéíóú-]+[eiéí])	$2	0	 <.+>	<&>
^(h?ant|fót|nan|par|pol|h?ultr)a-?([^aeiouáéíóú-]+[aouáóú])		$2	0	 <.+>	<&>
^(h?ant|fót|nan|par|pol|h?ultr)ai-?([^aeiouáéíóú-]+[eiéí])		$2	0	<.+>	<&>
^(h?eachtar|freas|h?íos|neas|réalt|tob|h?uas|h?uath)-?([^-])		$2	0	 <.+>	<&>
#############################################################################
#  Spelling standardizations, but ones where I'm less sure that they 
#  always apply, i.e. non-standard bits that are also commonly standard,
#  e.g. -thaí$   which could be legit "torthaí" but also non-std "glórthaí"
#  Because of this, safer to apply them after the prefix stripping, so
#  e.g. "iarchoirpigh" comes out as MOIRF{choirpigh} instead of 
#  CAIGHDEAN{choirpeach}, and "fíorghanntanas" comes out as 
#  MOIRF{ghanntanas} instead of CAIGHDEAN{ghanntan}
chaí$			cha		1	<[ANF].*>	<&>
thaí$			tha		1	<.+>	<&>    # glórthaí, spéarthaí
(.[aá])igh$		$1ch		1	<[NF].*>	<&>	# nb2 -aí genitives
(..)i[dg]h$		$1each		1	<[NF].*>	<&>   #  same (e.g. stámhaillidh)
(..)aidh$		$1a		1	<.+>	<&>    # bhfágaidh
# past tenses
(...)eas$		$1		1	<V p="y" t="caite">	<&>
(..[^aeiouáéíóú])as$	$1		1	<V p="y" t="caite">	<&>
#  rule for [^eéií] is above with other non-std prefixes, last resort here
#  for words like cóimhéigean -> comhéigean
^cóimh([eéií])			comh$1	1	<.+>	<&>
#############################################################################
#  derivational morphology
([ao])chai?s$		$1ch		0	<A pl="n" gnt="n".*>	<&>
acht$			ach		0	<A pl="n" gnt="n".*>	<N pl="n" gnt="n" gnd="f">
([ao])chtaí$		$1cht		1	<N pl="n" gnt="n".*>	<&>
íocht$			íoch		0	<A pl="n" gnt="n".*>	<N pl="n" gnt="n" gnd="f">
úlachta?$		úil		0	<A pl="n" gnt="n".*>	<&> # CO p.120
úchái?n$		ú		0	<N.*>	<&>
# but actually non-standard if it's gs or pp; general  uith -> aith below
uithe$			ú		0	<N.*>	<&>
#############################################################################
#  emphatic suffixes
(.[aouáóú])(san?|na)$	$1		0	<[NV].*>	<&>
(.[eiéí])(sean|[sn]e)$	$1		0	<[NV].*>	<&>
([aouáóú][^aeiouáéíóú-]*[^aeiouáéíóús-])san?$	$1	0	<[NV].*>	<&>
([aouáóú][^aeiouáéíóú-]*[^aeiouáéíóún-])na$	$1	0	<[NV].*>	<&>
([eiéí][^aeiouáéíóú-]*[^aeiouáéíóús-])se(an)?$	$1	0	<[NV].*>	<&>
([eiéí][^aeiouáéíóú-]*[^aeiouáéíóún-])ne$	$1	0	<[NV].*>	<&>
([aouáóú][^aeiouáéíóú-]*s)-san?$		$1	0	<[NV].*>	<&>
([aouáóú][^aeiouáéíóú-]*n)-na$			$1	0	<[NV].*>	<&>
([eiéí][^aeiouáéíóú-]*s)-se(an)?$		$1	0	<[NV].*>	<&>
([eiéí][^aeiouáéíóú-]*n)-ne$			$1	0	<[NV].*>	<&>
#############################################################################
#   common spelling errors
# iri$			irí		2	<N.*>	<&>
uil$			úil		2	<A.*>	<&>
iúl$			iúil		2	<A.*>	<&>
(.[^aeiouáéíóú])ala$	$1ála		2	<N.*>	<&>
ula$			úla		2	<A.*>	<&>
eail$			eáil		2	<.+>	<&>    # common missing accent
([^ae])oir$		$1óir		2	<.+>	<&>
([^ae])ora$		$1óra		2	<.+>	<&>
aio		aío		2	<.+>	<&>   # ionadaiocht, srl
eor$			eoir		2	<N.*>	<&>
ionn$			íonn		2	<V.*>	<&>
itear$			ítear		2	<V.*>	<&>
iteá$			íteá		2	<V.*>	<&>
iodh$			íodh		2	<V.*>	<&>
([^e])oidh$		$1óidh		2	<V.*>	<&>
([^e])ofar$		$1ófar		2	<V.*>	<&>
# next batch from ngramprobs.pl
u$			ú		2	<N.*>	<&>
#chtai$			chtaí		2	<N.*>	<&>
#ai$			aí		2	<.+>	<&>
(...)i$			$1í		2	<.+>	<&>
iu			iú		2	<.+>	<&>
no			nó		2	<.+>	<&>
# aio			aío		2	<.+>	<&>
#aioch(ta?)$		aíoch$1		2	<N.*>	<&>   # see prev.
io			ío		2	<.+>	<&>
leir			léir		2	<.+>	<&>
^(g?ch?)om([^haómnpr]..)	$1omh$2		2	<.+>	<&>
éá			éa		2	<.+>	<&>  # held down alt too long; "éa" is only diphthong with "a" as second letter, and none with e/u
([áéóú])í		$1i		2	<.+>	<&>
íó			ío		2	<.+>	<&>
# places where the second vowel is the one with the fada are rarer - most of
# them are "aí", then "eá", "iú", "uí", then noise -- all but "iú" conflict
# with a resolution above; áí is, e.g., almost always a mistake for "ái" !
í([áú])			i$1		2	<.+>	<&>
# i$			í		2	<.+>	<&>  # lotsa false compounds with this
#############################################################################
#  rules with somewhat lower probability (e.g. fo-, must be after non-stnd
#  verb endings to avoid "foluíonn"  parsing as "fo+luíonn"
^do([lnr]..)		$1		0	<[AN].*>	<&>
^fo([lnr]..)		$1		0	<[ANV].*>	<&>
^h?in([^bcfgmp-]..)	$1		0	<[AN].*>	<&>
# these next ones work for proper names like "NGael"; otherwise
# they're caught by general "make everything lower" catch all that follows
^BP			bP		1	<.+>	<&>
^BHF			bhF		1	<.+>	<&>
^DT			dT		1	<.+>	<&>
^GC			gC		1	<.+>	<&>
^MB			mB		1	<.+>	<&>
^ND			nD		1	<.+>	<&>
^NG			nG		1	<.+>	<&>
^TS			tS		1	<.+>	<&>
# last resort for weird caps
([A-ZÁÉÍÓÚ])		\l$1		2	<.+>	<&>	# pRíOMhach
([eiéí][^aeiouáéíóú]+)íní?$	$1		0	<.+>	<&>	# diminutive
([aouáóú][^aeiouáéíóú]+)íní?$	$1		2	<.+>	<&>	# diminutive
([óo])igíní?$		$1g		0	<N.*>	<&>
#  the next rule handles a lot of stuff, including unwanted hyphens
#  after prefixes (since the prefix rules will apply after recursing)
#  and similarly unwanted hyphens before emphatic endings (CO p.126)
(.)-			$1		1	<.+>	<&>       # ró-bheag / róbheag
# delenite, de-eclipse.  
# Only helps when we recurse and find a standard unmutated prefix above!
# putting caps back Dec 08 for non-words like "tSíneach", "nIónach"
# which will only be found by stripping mutation and keeping cap
^b([Pp][^h])		$1		0	<[ANV].*>	<&>
^bh([Ff][^h])		$1		0	<[ANV].*>	<&>
^d([Tt][^h])		$1		0	<[ANV].*>	<&>
^g([Cc][^h])		$1		0	<[ANV].*>	<&>
^m([Bb][^h])		$1		0	<[ANV].*>	<&>
^n([DdGg][^h])		$1		0	<[ANV].*>	<&>
^t([Ss][^h])		$1		0	<[ANV].*>	<&>
^([bcdfgmpBCDFGMP])h	$1		0	<[ANVY].*>	<&>
^([Tt])h([^s])		$1$2		0	<[ANVY].*>	<&>   # luathscéal danger
^([Ss])h([lnraeiouáéíóú])	$1$2	0	<[ANVY].*>	<&>
^t'([AEIOUÁÉÍÓÚaeiouáéíóú]|[Ff]h)	d'$1	1	<[ANVY].*>	<&>	# t'athair
^[bdm]'([AEIOUÁÉÍÓÚaeiouáéíóú]|[Ff]h)	$1	0	<[ANVY].*>	<&>	# d'Éamonn too
^[nt]-([aeiouáéíóú])	$1		0	<[ANV].*>	<&>
^[nt]([AEIOUÁÉÍÓÚ])	$1		0	<[ANV].*>	<&>