The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
{
  'module' => {
    'program' => 'uplug-wordalign',
    'location' => '$UplugBin',
    'name' => 'The clue aligner - linking words',
    'stdin' => 'bitext',
    'stdout' => 'bitext links',
  },
  'description' => 'This module links words and phrases using the
  clues that are available and which have been enabled for the
  alignment. Note: If you enable additional clues make sure that they
  exist, i.e. that they have been produced before. Non-existing clues
  are simply ignored.<p>
  The search parameter sets the link strategy:
  The default search
  strategy is a constrained best-first search (=best first). Other
  available strategies are 
  <ul><li>a refined bi-directional alignment
  (=refined)
  <li>the intersection of directional alignments (source to
  target and target to source) (=intersection)
  <li>the union of
  directional alignments (=union)
  <li>a competitive linking approach (=competitive)
  <li>and two directional alignment strategies
  (directional_src and directional_trg).<ul>',
  'input' => {
    'bitext' => {
      'stream name' => 'runtime xml',
      'format' => 'xces align',
    },

    ##########################
    # user lexicon

    'lexicon' => {
	'format' => 'DBM',
	'file' => 'data/runtime/lexicon.dbm',
	'key' => ['source','target'],
    },

    ##########################

    'giza-pos' => {
	'format' => 'DBM',
	'file' => 'data/runtime/giza-pos.dbm',
	'key' => ['source','target'],
    },
    'giza-word' => {
        'format' => 'DBM',
        'file' => 'data/runtime/giza-word.dbm',
        'key' => ['source','target'],
    },
    'giza-pos-word' => {
        'format' => 'DBM',
        'file' => 'data/runtime/giza-pos-word.dbm',
        'key' => ['source','target'],
    },
    'giza-word-prefix' => {
        'format' => 'DBM',
        'file' => 'data/runtime/giza-word-prefix.dbm',
        'key' => ['source','target'],
    },
    'giza-word-suffix' => {
        'format' => 'DBM',
        'file' => 'data/runtime/giza-word-suffix.dbm',
        'key' => ['source','target'],
    },

    'giza-pos-i' => {
	'format' => 'DBM',
	'file' => 'data/runtime/giza-pos-i.dbm',
	'key' => ['source','target'],
    },
    'giza-word-i' => {
        'format' => 'DBM',
        'file' => 'data/runtime/giza-word-i.dbm',
        'key' => ['source','target'],
    },
    'giza-pos-word-i' => {
        'format' => 'DBM',
        'file' => 'data/runtime/giza-pos-word-i.dbm',
        'key' => ['source','target'],
    },
    'giza-word-prefix-i' => {
        'format' => 'DBM',
        'file' => 'data/runtime/giza-word-prefix-i.dbm',
        'key' => ['source','target'],
    },
    'giza-word-suffix-i' => {
        'format' => 'DBM',
        'file' => 'data/runtime/giza-word-suffix-i.dbm',
        'key' => ['source','target'],
    },


    'ep-ensv-giza-word' => {
        'format' => 'DBM',
        'file' => '$UplugLangensv/ep-giza-word-ensv.dbm',
        'key' => ['source','target'],
    },
    'ep-ensv-giza-word-i' => {
        'format' => 'DBM',
        'file' => '$UplugLangensv/ep-giza-word-sven.dbm',
        'key' => ['target','source'],
    },



    'string similarities' => {'stream name' => 'string similarities'},
    'dice'		  => {'stream name' => 'dice'},
    'mutual information'  => {'stream name' => 'mutual information'},
    't-score'		  => {'stream name' => 't-score'},
    'coocstat1' => {'stream name' => 'co-occurrence statistics 1'},
    'coocstat2' => {'stream name' => 'co-occurrence statistics 2'},
    'coocstat3' => {'stream name' => 'co-occurrence statistics 3'},
    'coocstat4' => {'stream name' => 'co-occurrence statistics 4'},

    'clue dl'		=> {'stream name' => 'clue dl',},
    'clue dlp'		=> {'stream name' => 'clue dlp'},
    'clue dlx'		=> {'stream name' => 'clue dlx'},
    'clue dl3x'		=> {'stream name' => 'clue dl3x'},
    'clue dpx'	        => {'stream name' => 'clue dpx'},
    'clue dp3'		=> {'stream name' => 'clue dp3'},
    'clue dp3 (prefix)' => {'stream name' => 'clue dp3 (prefix)'},
    'clue dp3x'		=> {'stream name' => 'clue dp3x'},
    'clue dc3'		=> {'stream name' => 'clue dc3'},
    'clue dc3p'		=> {'stream name' => 'clue dc3p'},
    'clue dc3x'		=> {'stream name' => 'clue dc3x'},

    'sven static POS clue' => {'stream name' => 'sven pos clue'},
    'sven static POS clue 2' => {'stream name' => 'sven pos clue 2'},
    'sven static chunk clue' => {'stream name' => 'sven chunk clue'},

    'ensv static POS clue' => {'stream name' => 'ensv pos clue'},
    'ensv static POS clue 2' => {'stream name' => 'ensv pos clue 2'},
    'ensv static chunk clue' => {'stream name' => 'ensv chunk clue'},

    'svde static POS clue' => {'stream name' => 'svde pos clue'},
    'svde static POS clue 2' => {'stream name' => 'svde pos clue 2'},
    'svde static chunk clue' => {'stream name' => 'svde chunk clue'},

    'enfr static POS clue' => {'stream name' => 'enfr pos clue'},
    'enfr static POS clue 2' => {'stream name' => 'enfr pos clue 2'},

  },
  'output' => {
    'bitext links' => {
      'format' => 'xces align',
      'status' => 'word',
      'write_mode' => 'overwrite',
    },
  },




  'parameter' => {
    'string similarities' => {
#      'minimal score' => 0.3,
#      'score weight' => 0.05,
    },
    'dice' => {
#      'minimal score' => 0.2,
#      'score weight' => 0.05,
    },
    'mutual information' => {
#      'minimal score' => 2,
#      'score weight' => 0.005,
    },
    't-score' => {
#      'minimal score' => 0.8,
#      'score weight' => 0.01,
    },
    'general input parameter' => {
       'chunks (source)' => 'c.*',
       'chunks (target)' => 'c.*',
    },
    'alignment' => {
      'remove word links' => 0,
      'clues' => {
        'lexicon' => 0,
        'string similarities' => 0,
        'dice'		      => 0,
        'mutual information'  => 0,
        't-score'	      => 0,
        'coocstat1'	      => 0,
        'coocstat2'	      => 0,
        'coocstat3'	      => 0,
        'coocstat4'	      => 0,

	'giza-word' => 0,
	'giza-pos' => 0,
	'giza-pos-word' => 0,
	'giza-word-prefix' => 0,
	'giza-word-suffix' => 0,

	'giza-word-i' => 0,
	'giza-pos-i' => 0,
	'giza-pos-word-i' => 0,
	'giza-word-prefix-i' => 0,
	'giza-word-suffix-i' => 0,

	'clue dl' => 0,
	'clue dlp' => 0,
	'clue dlx' => 0,
	'clue dl3x' => 0,
	'clue dpx' => 0,
	'clue dp3' => 0,
	'clue dp3 (prefix)' => 0,
	'clue dp3x' => 0,
	'clue dc3' => 0,
	'clue dc3p' => 0,
	'clue dc3x' => 0,
      },
#      'minimal score' => '0.000000000001',
      'search' => 'matrix',
       'verbose' => 0,                         # verbose mode
#      'minimal score' => '70%',
#      'general stream' => 'dice',
#      'align 1:1' => '0.5',
#      'remove linked' => 1,
#      'align identical' => '0.08',
    },
    'runtime' => {
      'print progress' => 1,
    },
  },
  'arguments' => {
    'shortcuts' => {

      'lex' => 'parameter:alignment:clues:lexicon',

      #---- basic clues --------------------------

      'sim' => 'parameter:alignment:clues:string similarities',
      'dice' => 'parameter:alignment:clues:dice',
      'mi' => 'parameter:alignment:clues:mutual information',
      'tscore' => 'parameter:alignment:clues:t-score',
      'stat1' => 'parameter:alignment:clues:coocstat1',
      'stat2' => 'parameter:alignment:clues:coocstat2',
      'stat3' => 'parameter:alignment:clues:coocstat3',
      'stat4' => 'parameter:alignment:clues:coocstat4',

      #---- giza-clues-----------------------

      'gw' => 'parameter:alignment:clues:giza-word',
      'gp' => 'parameter:alignment:clues:giza-pos',
      'gpw' => 'parameter:alignment:clues:giza-pos-word',
      'gwp' => 'parameter:alignment:clues:giza-word-prefix',
      'gws' => 'parameter:alignment:clues:giza-word-suffix',

      #---- giza inverse

      'gwi' => 'parameter:alignment:clues:giza-word-i',
      'gpi' => 'parameter:alignment:clues:giza-pos-i',
      'gpwi' => 'parameter:alignment:clues:giza-pos-word-i',
      'gwpi' => 'parameter:alignment:clues:giza-word-prefix-i',
      'gwsi' => 'parameter:alignment:clues:giza-word-suffix-i',

      #------- bootstrapped clues---------------------

      'dl' => 'parameter:alignment:clues:clue dl',
      'dlp' => 'parameter:alignment:clues:clue dlp',
      'dlx' => 'parameter:alignment:clues:clue dlx',
      'dl3x' => 'parameter:alignment:clues:clue dl3x',
      'dpx' => 'parameter:alignment:clues:clue dpx',
      'dp3' => 'parameter:alignment:clues:clue dp3',
      'dp3 (prefix)' => 'parameter:alignment:clues:clue dp3 (prefix)',
      'dp3x' => 'parameter:alignment:clues:clue dp3x',
      'dc3' => 'parameter:alignment:clues:clue dc3',
      'dc3p' => 'parameter:alignment:clues:clue dc3p',
      'dc3x' => 'parameter:alignment:clues:clue dc3x',

      #---- language-specific clues ------------------------

      'svenp' => 'parameter:alignment:clues:sven static POS clue',
      'svenpp' => 'parameter:alignment:clues:sven static POS clue 2',
      'svenc' => 'parameter:alignment:clues:sven static chunk clue',

      'svdep' => 'parameter:alignment:clues:svde static POS clue',
      'svdepp' => 'parameter:alignment:clues:svde static POS clue 2',
      'svdec' => 'parameter:alignment:clues:svde static chunk clue',

      'ensvp' => 'parameter:alignment:clues:ensv static POS clue',
      'ensvpp' => 'parameter:alignment:clues:ensv static POS clue 2',
      'ensvc' => 'parameter:alignment:clues:ensv static chunk clue',

      'enfrp' => 'parameter:alignment:clues:enfr static POS clue',
      'enfrpp' => 'parameter:alignment:clues:enfr static POS clue 2',

      #---- language-specific EuroParl clues ---------------

      'ep-ensv-gw' => 'parameter:alignment:clues:ep-ensv-giza-word',
      'ep-ensv-gwi' => 'parameter:alignment:clues:ep-ensv-giza-word-i',

      #-----------clue weights -------------------------

      'sim_w' => 'parameter:string similarities:score weight',
      'dice_w' => 'parameter:dice:score weight',
      'mi_w' => 'parameter:mutual information:score weight',
      'tscore_w' => 'parameter:t-score:score weight',
      'stat1_w' => 'parameter:coocstat1:score weight',
      'stat2_w' => 'parameter:coocstat2:score weight',
      'stat3_w' => 'parameter:coocstat3:score weight',
      'stat4_w' => 'parameter:coocstat4:score weight',

      'gw_w' => 'parameter:giza-word:score weight',
      'gp_w' => 'parameter:giza-pos:score weight',
      'gpw_w' => 'parameter:giza-pos-word:score weight',
      'gwp_w' => 'parameter:giza-word-prefix:score weight',
      'gws_w' => 'parameter:giza-word-suffix:score weight',

      'gwi_w' => 'parameter:giza-word-i:score weight',
      'gpi_w' => 'parameter:giza-pos-i:score weight',
      'gpwi_w' => 'parameter:giza-pos-word-i:score weight',
      'gwpi_w' => 'parameter:giza-word-prefix-i:score weight',
      'gwsi_w' => 'parameter:giza-word-suffix-i:score weight',

      'dl_w' => 'parameter:clues:clue dl:score weight',
      'dlp_w' => 'parameter:clues:clue dlp:score weight',
      'dlx_w' => 'parameter:clues:clue dlx:score weight',
      'dl3x_w' => 'parameter:clues:clue dl3x:score weight',
      'dpx_w' => 'parameter:clues:clue dpx:score weight',
      'dp3_w' => 'parameter:clues:clue dp3:score weight',
      'dp3_w (prefix)' => 'parameter:clues:clue dp3 (prefix):score weight',
      'dp3x_w' => 'parameter:clues:clue dp3x:score weight',
      'dc3_w' => 'parameter:clues:clue dc3:score weight',
      'dc3p_w' => 'parameter:clues:clue dc3p:score weight',
      'dc3x_w' => 'parameter:clues:clue dc3x:score weight',

      'svenp_w' => 'parameter:sven static POS clue:score weight',
      'svenpp_w' => 'parameter:sven static POS clue 2:score weight',
      'svenc_w' => 'parameter:sven static chunk clue:score weight',

      'svdep_w' => 'parameter:svde static POS clue:score weight',
      'svdepp_w' => 'parameter:svde static POS clue 2:score weight',
      'svdec_w' => 'parameter:svde static chunk clue:score weight',

      'ensvp_w' => 'parameter:ensv static POS clue:score weight',
      'ensvpp_w' => 'parameter:ensv static POS clue 2:score weight',
      'ensvc_w' => 'parameter:ensv static chunk clue:score weight',

      'enfrp_w' => 'parameter:enfr static POS clue:score weight',
      'enfrpp_w' => 'parameter:enfr static POS clue 2:score weight',


      #------other parameters-----------

      'new' => 'parameter:alignment:non-aligned only',
      'in' => 'input:bitext:file',
      'infile' => 'input:bitext:file',
      'informat' => 'input:bitext:format',
      'out' => 'output:bitext links:file',
      'srclang' => 'parameter:general:language (source)',
      'trglang' => 'parameter:general:language (target)',
      'id' => 'parameter:alignment:index',
      'html' => 'parameter:runtime:print html only',
      'search' => 'parameter:alignment:search',
      'v' => 'parameter:alignment:verbose',
      'comb' => 'parameter:alignment:score combination',
      'adj' => 'parameter:alignment:adjacent_only',
      'phr' => 'parameter:alignment:in_phrases_only',
      'min' => 'parameter:alignment:minimal score',
      'dir' => 'parameter:runtime dir'
    }
  },
  'widgets' => {
#       'input' => {
#            'bitext' => {
#               'stream name' => 'stream (format=xces align,status=sent)',
#            },
#        },
       'parameter' => {
         'alignment' => {
	   'clues' => {
	      'string similarities' => 'checkbox',
	      'dice' => 'checkbox',
	   },
	   'minimal score' => 'scale (0,1,0.00001,0.005)',
	   'search' => 'optionmenu (best first,refined,intersection,union,competitive,directional_src,directedional_trg)',
            },
        }
  }
}