The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/usr/bin/env python
#
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
#
#
"""getctype.py - Generate the svn_ctype character classification table.
"""

# Table of ASCII character names
names = ('nul', 'soh', 'stx', 'etx', 'eot', 'enq', 'ack', 'bel',
         'bs',  'ht',  'nl',  'vt',  'np',  'cr',  'so',  'si',
         'dle', 'dc1', 'dc2', 'dc3', 'dc4', 'nak', 'syn', 'etb',
         'can', 'em',  'sub', 'esc', 'fs',  'gs',  'rs',  'us',
         'sp',  '!',   '"',   '#',   '$',   '%',   '&',   '\'',
         '(',   ')',   '*',   '+',   ',',   '-',   '.',   '/',
         '0',   '1',   '2',   '3',   '4',   '5',   '6',   '7',
         '8',   '9',   ':',   ';',   '<',   '=',   '>',   '?',
         '@',   'A',   'B',   'C',   'D',   'E',   'F',   'G',
         'H',   'I',   'J',   'K',   'L',   'M',   'N',   'O',
         'P',   'Q',   'R',   'S',   'T',   'U',   'V',   'W',
         'X',   'Y',   'Z',   '[',   '\\',  ']',   '^',   '_',
         '`',   'a',   'b',   'c',   'd',   'e',   'f',   'g',
         'h',   'i',   'j',   'k',   'l',   'm',   'n',   'o',
         'p',   'q',   'r',   's',   't',   'u',   'v',   'w',
         'x',   'y',   'z',   '{',   '|',   '}',   '~',   'del')

# All whitespace characters:
#   horizontal tab, vertical tab, new line, form feed, carriage return, space
whitespace = (9, 10, 11, 12, 13, 32)

# Bytes not valid in UTF-8 sequences
utf8_invalid = (0xE0, 0xF0, 0xF8, 0xFC, 0xFE, 0xFF)

print('    /* **** DO NOT EDIT! ****')
print('       This table was generated by genctype.py, make changes there. */')

for c in range(256):
    bits = []

    # Ascii subrange
    if c < 128:
        bits.append('SVN_CTYPE_ASCII')

        if len(names[c]) == 1:
            name = names[c].center(3)
        else:
            name = names[c].ljust(3)

        # Control characters
        if c < 32 or c == 127:
            bits.append('SVN_CTYPE_CNTRL')

        # Whitespace characters
        if c in whitespace:
            bits.append('SVN_CTYPE_SPACE')

        # Punctuation marks
        if c >= 33 and c < 48 \
           or c >= 58 and c < 65 \
           or c >= 91 and c < 97 \
           or c >= 123 and c < 127:
            bits.append('SVN_CTYPE_PUNCT')

        # Decimal digits
        elif c >= 48 and c < 58:
            bits.append('SVN_CTYPE_DIGIT')

        # Uppercase letters
        elif c >= 65 and c < 91:
            bits.append('SVN_CTYPE_UPPER')
            # Hexadecimal digits
            if c <= 70:
                bits.append('SVN_CTYPE_XALPHA')

        # Lowercase letters
        elif c >= 97 and c < 123:
            bits.append('SVN_CTYPE_LOWER')
            # Hexadecimal digits
            if c <= 102:
                bits.append('SVN_CTYPE_XALPHA')

    # UTF-8 multibyte sequences
    else:
        name = hex(c)[1:]

        # Lead bytes (start of sequence)
        if c > 0xC0 and c < 0xFE and c not in utf8_invalid:
            bits.append('SVN_CTYPE_UTF8LEAD')

        # Continuation bytes
        elif (c & 0xC0) == 0x80:
            bits.append('SVN_CTYPE_UTF8CONT')

    if len(bits) == 0:
        flags = '0'
    else:
        flags = ' | '.join(bits)
    print('    /* %s */ %s,' % (name, flags))