# Copyright (C) 2004 Jörg Tiedemann <joerg@stp.ling.uu.se>
#
# pre-defined input files
#
{
'runtime target' => {
'file' => 'data/runtime/align.xml.trg',
'write_mode' => 'overwrite',
'format' => 'XML',
'root' => 's',
},
'runtime source' => {
'file' => 'data/runtime/align.xml.src',
'write_mode' => 'overwrite',
'format' => 'XML',
'root' => 's',
},
'runtime target text' => {
'file' => 'data/runtime/trg',
'write_mode' => 'overwrite',
'format' => 'text',
},
'runtime source text' => {
'file' => 'data/runtime/src',
'write_mode' => 'overwrite',
'format' => 'text',
},
'runtime text' => {
'file' => 'data/runtime/text.xml',
'write_mode' => 'overwrite',
'format' => 'XML',
'root' => 's',
},
'runtime xml' => {
'file' => 'data/runtime/align.xml',
'write_mode' => 'overwrite',
'format' => 'xces align',
},
'string similarities' => {
'format' => 'DBM',
'write_mode' => 'overwrite',
'file' => 'data/runtime/str.dbm',
'key' => ['source','target'],
},
'cooc freq' => {
'format' => 'DBM',
'write_mode' => 'overwrite',
'file' => 'data/runtime/cooc.dbm',
'key' => ['source','target'],
},
'source freq' => {
'format' => 'DBM',
'write_mode' => 'overwrite',
'file' => 'data/runtime/src.dbm',
'key' => ['token'],
},
'target freq' => {
'format' => 'DBM',
'write_mode' => 'overwrite',
'file' => 'data/runtime/trg.dbm',
'key' => ['token'],
},
'cooc stat' => {
'format' => 'DBM',
'write_mode' => 'overwrite',
'file' => 'data/runtime/stat.dbm',
'key' => ['source','target'],
},
'dice' => {
'format' => 'DBM',
'write_mode' => 'overwrite',
'file' => 'data/runtime/dice.dbm',
'key' => ['source','target'],
},
'mutual information' => {
'format' => 'DBM',
'write_mode' => 'overwrite',
'file' => 'data/runtime/mi.dbm',
'key' => ['source','target'],
},
't-score' => {
'format' => 'DBM',
'write_mode' => 'overwrite',
'file' => 'data/runtime/tscore.dbm',
'key' => ['source','target'],
},
'co-occurrence statistics 1' => {
'format' => 'DBM',
'write_mode' => 'overwrite',
'file' => 'data/runtime/stat1.dbm',
'key' => ['source','target'],
},
'co-occurrence statistics 2' => {
'format' => 'DBM',
'write_mode' => 'overwrite',
'file' => 'data/runtime/stat2.dbm',
'key' => ['source','target'],
},
'co-occurrence statistics 3' => {
'format' => 'DBM',
'write_mode' => 'overwrite',
'file' => 'data/runtime/stat3.dbm',
'key' => ['source','target'],
},
'co-occurrence statistics 4' => {
'format' => 'DBM',
'write_mode' => 'overwrite',
'file' => 'data/runtime/stat4.dbm',
'key' => ['source','target'],
},
'ngram stat' => {
'file' => 'data/runtime/ngrams.tab',
'format' => 'uwa tab',
'columns' => ['ngram'],
'write_mode' => 'overwrite',
},
'ngram freq' => {
'file' => 'data/runtime/ngramfreq.dbm',
'format' => 'DBM',
'key' => ['token'],
'write_mode' => 'overwrite',
},
# ----------- learned clues (bootstrapping) -----
'clue dl' => {
'format' => 'DBM',
'file' => 'data/runtime/dl.dbm',
'write_mode' => 'overwrite',
'key' => ['source','target'],
},
'clue dlp' => {
'format' => 'DBM',
'file' => 'data/runtime/dlp.dbm',
'write_mode' => 'overwrite',
'key' => ['source','target'],
},
'clue dlx' => {
'format' => 'DBM',
'file' => 'data/runtime/dlx.dbm',
'write_mode' => 'overwrite',
'key' => ['source','target'],
},
'clue dl3x' => {
'format' => 'DBM',
'file' => 'data/runtime/dl3x.dbm',
'write_mode' => 'overwrite',
'key' => ['source','target'],
},
'clue dpx' => {
'format' => 'DBM',
'file' => 'data/runtime/dpx.dbm',
'write_mode' => 'overwrite',
'key' => ['source','target']
},
'clue dp3' => {
'format' => 'DBM',
'file' => 'data/runtime/dp3.dbm',
'write_mode' => 'overwrite',
'key' => ['source','target'],
},
'clue dp3 (prefix)' => {
'format' => 'DBM',
'file' => 'data/runtime/dp3-pref.dbm',
'write_mode' => 'overwrite',
'key' => ['source','target'],
},
'clue dp3x' => {
'format' => 'DBM',
'file' => 'data/runtime/dp3x.dbm',
'write_mode' => 'overwrite',
'key' => ['source','target'],
},
'clue dc3' => {
'format' => 'DBM',
'file' => 'data/runtime/dc3.dbm',
'write_mode' => 'overwrite',
'key' => ['source','target'],
},
'clue dc3p' => {
'format' => 'DBM',
'file' => 'data/runtime/dc3p.dbm',
'write_mode' => 'overwrite',
'key' => ['source','target'],
},
'clue dc3x' => {
'format' => 'DBM',
'file' => 'data/runtime/dc3x.dbm',
'write_mode' => 'overwrite',
'key' => ['source','target'],
},
#-------- declarative clues
'sven pos clue' => {
'format' => 'DBM',
'file' => '$UplugLang/sven/pos.dbm',
'write_mode' => 'overwrite',
'key' => ['source','target'],
},
'sven pos clue 2' => {
'format' => 'DBM',
'file' => '$UplugLang/sven/pos2.dbm',
'write_mode' => 'overwrite',
'key' => ['source','target'],
},
'sven chunk clue' => {
'format' => 'DBM',
'file' => '$UplugLang/sven/chunk.dbm',
'write_mode' => 'overwrite',
'key' => ['source','target'],
},
'sven base lexicon' => {
'format' => 'DBM',
'file' => '$UplugLang/sven/baselex.dbm',
'key' => ['source','target'],
# 'encoding' => 'iso-8859-1',
},
'ensv pos clue' => {
'format' => 'DBM',
'file' => '$UplugLang/ensv/pos.dbm',
'write_mode' => 'overwrite',
'key' => ['source','target'],
},
'ensv pos clue 2' => {
'format' => 'DBM',
'file' => '$UplugLang/ensv/pos2.dbm',
'write_mode' => 'overwrite',
'key' => ['source','target'],
},
'ensv chunk clue' => {
'format' => 'DBM',
'file' => '$UplugLang/ensv/chunk.dbm',
'write_mode' => 'overwrite',
'key' => ['source','target'],
},
'ensv base lexicon' => {
'format' => 'DBM',
'file' => '$UplugLang/ensv/baselex.dbm',
'key' => ['source','target'],
# 'encoding' => 'iso-8859-1',
},
'enfr pos clue' => {
'format' => 'DBM',
'file' => '$UplugLang/enfr/pos.dbm',
'write_mode' => 'overwrite',
'key' => ['source','target'],
},
'enfr pos clue 2' => {
'format' => 'DBM',
'file' => '$UplugLang/enfr/pos2.dbm',
'write_mode' => 'overwrite',
'key' => ['source','target'],
},
'enfr chunk clue' => {
'format' => 'DBM',
'file' => '$UplugLang/enfr/chunk.dbm',
'write_mode' => 'overwrite',
'key' => ['source','target'],
},
'svde pos clue' => {
'format' => 'DBM',
'file' => '$UplugLang/svde/pos.dbm',
'write_mode' => 'overwrite',
'key' => ['source','target'],
},
'svde pos clue 2' => {
'format' => 'DBM',
'file' => '$UplugLang/svde/pos2.dbm',
'write_mode' => 'overwrite',
'key' => ['source','target'],
},
'svde chunk clue' => {
'format' => 'DBM',
'file' => '$UplugLang/svde/chunk.dbm',
'write_mode' => 'overwrite',
'key' => ['source','target'],
},
#-----------------------------------------------------------
'example bitext' => {
'file' => 'example/svenprf.xml',
'format' => 'xces align',
},
}