The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
{
  'module' => {
    'program' => 'uplug-coocfreq-slow',
    'location' => '$UplugBin',
    'name' => 'co-occurrence frequency counter',
#    'stdin' => 'bitext',
  },
  'description' => 'This modules counts co-occurrence frequencies of
  words and phrases.',
  'input' => {
    'bitext' => {
      'stream name' => 'runtime xml',
    },
  },
  'output' => {
    'cooc freq' => {
      'stream name' => 'cooc freq',
    },
    'source freq' => {
      'stream name' => 'source freq',
    },
    'target freq' => {
      'stream name' => 'target freq',
    }
  },
  'parameter' => {
    'token' => {

    #------------------------------------------------------------------
    # token pair features
    #     define contextual features for counting
    #     for example:
    #
    # 'features (source)' => {       # source language features:
    #   'left:pos' => '^(..).*$$1', # 1st 2 char's of POS of the left neighbor
    #   '#text' => '(.{4})$$1',     # last 4 char's of the word itself
    #   'c.*:right:type' => undef,   # type attribute of a right neighbor
    # },                             #    of a parent-tag that starts with 'c'
    # 'features (target)' => {       # target language features:
    #   'pos' => undef,              # POS-attribute of the current token
    # },
    #
    #------------------------------------------------------------------

    #------------------------------------------------------------------
    # other token parameters:
    #      chunks: use marked chunks, argument: xml-tag-pattern
    #      minimal frequency: threshold for token pair frequencies
    #
    #      'minimal length diff' => 0.1,     # string length difference ratio
    #      'matching word class' => 'same',  # don't mix content and stop words
    #      'minimal length (source)' => 2,
    #      'minimal length (target)' => 2,
    #      'use attribute (source)' => 'stem',     # use the 'stem'-attribute
    #      'use attribute (target)' => 'stem',     # for all tokens
    #      'grep token (source)' => 'alphabetic',  # restrict tokens to
    #      'grep token (target)' => 'alphabetic',  # alphabetic only
    #      'exclude stop words (source)' => 0,     # don't count stop words
    #      'exclude stop words (target)' => 0,
    #      'language (source)' => 'english',       # use language-specific
    #      'language (target)' => 'swedish',       # information (inilang.ini)
    #------------------------------------------------------------------

      'chunks (source)' => 'c.*',            # use marked chunks
      'chunks (target)' => 'c.*',            # use marked chunks
      'minimal frequency' => 2,
      'minimal frequency (source)' => 2,
      'minimal frequency (target)' => 2,
      'maximal ngram length (source)' => 1,  # >1 --> use N-grams
      'maximal ngram length (target)' => 1,  # >1 --> use N-grams
      'lower case (source)' => 0,            # =1 --> lower case
      'lower case (target)' => 0,            # =1 --> lower case
      'token label' => 'w',                  # xml-tag for (single) tokens
      'remove linked' => 1,                  # =1 --> don't count aligned data!
    },


    'runtime' => {

    #------------------------------------------------------------------
    # runtime parameters
    #
      'print progress' => 1,       # verbose output
      'buffer' => 2000000,         # number of token pairs buffered in a hash
      'source buffer' => 2000000,  # source token buffer
      'target buffer' => 2000000,  # target token buffer
      #------------------------------------------------------------
      # clean buffer: 
      # if set to 1: remove low-frequency-pairs from the buffer in
      #              cases of buffer overflows
      'clean buffer' => 1,
      #------------------------------------------------------------
    },
  },
  #------------------------------------------------------------------
  'arguments' => {
    'shortcuts' => {
       'src' => 'output:source freq:file',
       'trg' => 'output:target freq:file',
       'cooc' => 'output:cooc freq:file',
       'freq' => 'parameter:token:minimal frequency',
       'srclang' => 'parameter:token:language (source)',
       'trglang' => 'parameter:token:language (target)',
       'max' => 'parameter:runtime:max nr of segments',
       'buf' => 'parameter:runtime:buffer',
       'clean' => 'parameter:runtime:clean buffer',
       'sa' => 'parameter:token:use attribute (source)',
       'ta' => 'parameter:token:use attribute (target)',
       'w' => 'parameter:token:token label',
    }
  },
  'widgets' => {
  }
}