The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
# Copyright (C) 2004 Jörg Tiedemann  <joerg@stp.ling.uu.se>
#
# pre-defined input files
#

{
    'runtime target' => {
      'file' => 'data/runtime/align.xml.trg',
      'write_mode' => 'overwrite',
      'format' => 'XML',
      'root' => 's',
    },
    'runtime source' => {
      'file' => 'data/runtime/align.xml.src',
      'write_mode' => 'overwrite',
      'format' => 'XML',
      'root' => 's',
    },
    'runtime target text' => {
      'file' => 'data/runtime/trg',
      'write_mode' => 'overwrite',
      'format' => 'text',
    },
    'runtime source text' => {
      'file' => 'data/runtime/src',
      'write_mode' => 'overwrite',
      'format' => 'text',
    },
    'runtime text' => {
      'file' => 'data/runtime/text.xml',
      'write_mode' => 'overwrite',
      'format' => 'XML',
      'root' => 's',
    },
    'runtime xml' => {
      'file' => 'data/runtime/align.xml',
      'write_mode' => 'overwrite',
      'format' => 'xces align',
    },
    'string similarities' => {
      'format' => 'DBM',
      'write_mode' => 'overwrite',
      'file' => 'data/runtime/str.dbm',
      'key' => ['source','target'],
    },
    'cooc freq' => {
      'format' => 'DBM',
      'write_mode' => 'overwrite',
      'file' => 'data/runtime/cooc.dbm',
      'key' => ['source','target'],
    },
    'source freq' => {
      'format' => 'DBM',
      'write_mode' => 'overwrite',
      'file' => 'data/runtime/src.dbm',
      'key' => ['token'],
    },
    'target freq' => {
      'format' => 'DBM',
      'write_mode' => 'overwrite',
      'file' => 'data/runtime/trg.dbm',
      'key' => ['token'],
    },
    'cooc stat' => {
      'format' => 'DBM',
      'write_mode' => 'overwrite',
      'file' => 'data/runtime/stat.dbm',
      'key' => ['source','target'],
    },
    'dice' => {
      'format' => 'DBM',
      'write_mode' => 'overwrite',
      'file' => 'data/runtime/dice.dbm',
      'key' => ['source','target'],
    },
    'mutual information' => {
      'format' => 'DBM',
      'write_mode' => 'overwrite',
      'file' => 'data/runtime/mi.dbm',
      'key' => ['source','target'],
    },
    't-score' => {
      'format' => 'DBM',
      'write_mode' => 'overwrite',
      'file' => 'data/runtime/tscore.dbm',
      'key' => ['source','target'],
    },

    'co-occurrence statistics 1' => {
      'format' => 'DBM',
      'write_mode' => 'overwrite',
      'file' => 'data/runtime/stat1.dbm',
      'key' => ['source','target'],
    },
    'co-occurrence statistics 2' => {
      'format' => 'DBM',
      'write_mode' => 'overwrite',
      'file' => 'data/runtime/stat2.dbm',
      'key' => ['source','target'],
    },
    'co-occurrence statistics 3' => {
      'format' => 'DBM',
      'write_mode' => 'overwrite',
      'file' => 'data/runtime/stat3.dbm',
      'key' => ['source','target'],
    },
    'co-occurrence statistics 4' => {
      'format' => 'DBM',
      'write_mode' => 'overwrite',
      'file' => 'data/runtime/stat4.dbm',
      'key' => ['source','target'],
    },

    'ngram stat' => {
       'file' => 'data/runtime/ngrams.tab',
       'format' => 'uwa tab',
       'columns' => ['ngram'],
       'write_mode' => 'overwrite',
    },
    'ngram freq' => {
       'file' => 'data/runtime/ngramfreq.dbm',
      'format' => 'DBM',
      'key' => ['token'],
      'write_mode' => 'overwrite',
    },

# ----------- learned clues (bootstrapping) -----

    'clue dl' => {
      'format' => 'DBM',
      'file' => 'data/runtime/dl.dbm',
      'write_mode' => 'overwrite',
      'key' => ['source','target'],
    },
    'clue dlp' => {
      'format' => 'DBM',
      'file' => 'data/runtime/dlp.dbm',
      'write_mode' => 'overwrite',
      'key' => ['source','target'],
    },
    'clue dlx' => {
      'format' => 'DBM',
      'file' => 'data/runtime/dlx.dbm',
      'write_mode' => 'overwrite',
      'key' => ['source','target'],
    },
    'clue dl3x' => {
      'format' => 'DBM',
      'file' => 'data/runtime/dl3x.dbm',
      'write_mode' => 'overwrite',
      'key' => ['source','target'],
    },
    'clue dpx' => {
      'format' => 'DBM',
      'file' => 'data/runtime/dpx.dbm',
      'write_mode' => 'overwrite',
      'key' => ['source','target']
    },
    'clue dp3' => {
      'format' => 'DBM',
      'file' => 'data/runtime/dp3.dbm',
      'write_mode' => 'overwrite',
      'key' => ['source','target'],
    },
    'clue dp3 (prefix)' => {
      'format' => 'DBM',
      'file' => 'data/runtime/dp3-pref.dbm',
      'write_mode' => 'overwrite',
      'key' => ['source','target'],
    },
    'clue dp3x' => {
      'format' => 'DBM',
      'file' => 'data/runtime/dp3x.dbm',
      'write_mode' => 'overwrite',
      'key' => ['source','target'],
    },
    'clue dc3' => {
      'format' => 'DBM',
      'file' => 'data/runtime/dc3.dbm',
      'write_mode' => 'overwrite',
      'key' => ['source','target'],
    },
    'clue dc3p' => {
      'format' => 'DBM',
      'file' => 'data/runtime/dc3p.dbm',
      'write_mode' => 'overwrite',
      'key' => ['source','target'],
    },
    'clue dc3x' => {
      'format' => 'DBM',
      'file' => 'data/runtime/dc3x.dbm',
      'write_mode' => 'overwrite',
      'key' => ['source','target'],
    },

#-------- declarative clues

    'sven pos clue' => {
      'format' => 'DBM',
      'file' => '$UplugLang/sven/pos.dbm',
      'write_mode' => 'overwrite',
      'key' => ['source','target'],
    },
    'sven pos clue 2' => {
      'format' => 'DBM',
      'file' => '$UplugLang/sven/pos2.dbm',
      'write_mode' => 'overwrite',
      'key' => ['source','target'],
    },
    'sven chunk clue' => {
      'format' => 'DBM',
      'file' => '$UplugLang/sven/chunk.dbm',
      'write_mode' => 'overwrite',
      'key' => ['source','target'],
    },
    'sven base lexicon' => {
      'format' => 'DBM',
      'file' => '$UplugLang/sven/baselex.dbm',
      'key' => ['source','target'],
#      'encoding' => 'iso-8859-1',
    },

    'ensv pos clue' => {
      'format' => 'DBM',
      'file' => '$UplugLang/ensv/pos.dbm',
      'write_mode' => 'overwrite',
      'key' => ['source','target'],
    },
    'ensv pos clue 2' => {
      'format' => 'DBM',
      'file' => '$UplugLang/ensv/pos2.dbm',
      'write_mode' => 'overwrite',
      'key' => ['source','target'],
    },
    'ensv chunk clue' => {
      'format' => 'DBM',
      'file' => '$UplugLang/ensv/chunk.dbm',
      'write_mode' => 'overwrite',
      'key' => ['source','target'],
    },
    'ensv base lexicon' => {
      'format' => 'DBM',
      'file' => '$UplugLang/ensv/baselex.dbm',
      'key' => ['source','target'],
#      'encoding' => 'iso-8859-1',
    },


    'enfr pos clue' => {
      'format' => 'DBM',
      'file' => '$UplugLang/enfr/pos.dbm',
      'write_mode' => 'overwrite',
      'key' => ['source','target'],
    },
    'enfr pos clue 2' => {
      'format' => 'DBM',
      'file' => '$UplugLang/enfr/pos2.dbm',
      'write_mode' => 'overwrite',
      'key' => ['source','target'],
    },
    'enfr chunk clue' => {
      'format' => 'DBM',
      'file' => '$UplugLang/enfr/chunk.dbm',
      'write_mode' => 'overwrite',
      'key' => ['source','target'],
    },


    'svde pos clue' => {
      'format' => 'DBM',
      'file' => '$UplugLang/svde/pos.dbm',
      'write_mode' => 'overwrite',
      'key' => ['source','target'],
    },
    'svde pos clue 2' => {
      'format' => 'DBM',
      'file' => '$UplugLang/svde/pos2.dbm',
      'write_mode' => 'overwrite',
      'key' => ['source','target'],
    },
    'svde chunk clue' => {
      'format' => 'DBM',
      'file' => '$UplugLang/svde/chunk.dbm',
      'write_mode' => 'overwrite',
      'key' => ['source','target'],
    },

#-----------------------------------------------------------

    'example bitext' => {
      'file' => 'example/svenprf.xml',
      'format' => 'xces align',
    },
}