The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
{
  'module' => {
    'program' => 'uplug-strsim',
    'location' => '$UplugBin',
    'name' => 'LCSR - the longest common sub-sequence ratio',
#    'stdin' => 'bitext',
  },
  'description' => 'The longest common sub-sequence ratio is
  calculated for co-occurring words and chunks.',
  'input' => {
    'bitext' => {
      'stream name' => 'runtime xml',
    }
  },
  'output' => {
    'string similarities' => {
      'stream name' => 'string similarities',
    },
  },
  'parameter' => {
    'token' => {
      'chunks (source)' => 'c.*',            # use chunks
      'chunks (target)' => 'c.*',            # use chunks
#      'minimal length diff' => 0.3,
#     'matching word class' => 'same',       # don't mix content and stop words
      'minimal frequency' => 1,
      'minimal frequency (source)' => 1,
      'minimal frequency (target)' => 1,
      'minimal length (source)' => 3,
      'minimal length (target)' => 3,
      'maximal ngram length (source)' => 1,  # >1 --> use N-grams
      'maximal ngram length (target)' => 1,  # >1 --> use N-grams
#      'use attribute (source)' => 'none',
#      'use attribute (target)' => 'none',
#      'grep token (source)' => 'contains alphabetic',
#      'grep token (target)' => 'contains alphabetic',
      'lower case (source)' => 1,
      'lower case (target)' => 1,
      'exclude stop words (source)' => 1,
      'exclude stop words (target)' => 1,
#      'language (source)' => 'english',
#      'language (target)' => 'swedish',
      'language (source)' => 'default',
      'language (target)' => 'default',
      'delimiter' => '\\s+',
      'token label' => 'w',
      'remove linked' => 0,
    },
    'similarity measures' => {
      'minimal score' => 0.4,
      'use not-matching-weights' => 0,
      'use N-grams' => 0,
      'metrics' => 'lcsr',
#      'precision' => 4,
      'use weights' => 0,
    },
    'runtime' => {
      'print progress' => 1,
      'buffer' => 2000000,
      'source buffer' => 200000,
      'target buffer' => 200000,
    },
  },
  'arguments' => {
    'shortcuts' => {
       'in' => 'input:bitext:file',
       'informat' => 'input:bitext:format',
       'out' => 'output:string similarities:file',
       'outformat' => 'output:string similarities:format',
       'srclang' => 'parameter:token:language (source)',
       'trglang' => 'parameter:token:language (target)',
    }
  },
  'widgets' => {
  }
}