The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/usr/bin/env python
#
# check if a file has the proper license in it
#
# USAGE: check-license.py [-C] file1 file2 ... fileN
#
# A 'file' may in fact be a directory, in which case it is recursively
# searched.
#
# If the license cannot be found, then the filename is printed to stdout.
# Typical usage:
#    $ check-license.py . > bad-files
#
# -C switch is used to change licenses.
# Typical usage:
#    $ check-license.py -C file1 file2 ... fileN
#

import sys, os, re

# Note: Right now, OLD_LICENSE and NEW_LICENSE are the same, because
# r878444 updated all the license blocks.  In the future, if we update
# the license block again, change just NEW_LICENSE and use this script.

OLD_LICENSE = '''\
 \* ====================================================================
 \*    Licensed to the Subversion Corporation \(SVN Corp\.\) under one
 \*    or more contributor license agreements\.  See the NOTICE file
 \*    distributed with this work for additional information
 \*    regarding copyright ownership\.  The SVN Corp\. licenses this file
 \*    to you under the Apache License, Version 2\.0 \(the
 \*    "License"\); you may not use this file except in compliance
 \*    with the License\.  You may obtain a copy of the License at
 \*
 \*      http://www\.apache\.org/licenses/LICENSE-2\.0
 \*
 \*    Unless required by applicable law or agreed to in writing,
 \*    software distributed under the License is distributed on an
 \*    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 \*    KIND, either express or implied\.  See the License for the
 \*    specific language governing permissions and limitations
 \*    under the License\.
 \* ====================================================================
'''

SH_OLD_LICENSE = re.subn(r'(?m)^ \\\*', '#', OLD_LICENSE)[0]

# Remember not to do regexp quoting for NEW_LICENSE.  Only OLD_LICENSE
# is used for matching; NEW_LICENSE is inserted as-is.
NEW_LICENSE = '''\
 * ====================================================================
 *    Licensed to the Subversion Corporation (SVN Corp.) under one
 *    or more contributor license agreements.  See the NOTICE file
 *    distributed with this work for additional information
 *    regarding copyright ownership.  The SVN Corp. licenses this file
 *    to you under the Apache License, Version 2.0 (the
 *    "License"); you may not use this file except in compliance
 *    with the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *    Unless required by applicable law or agreed to in writing,
 *    software distributed under the License is distributed on an
 *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 *    KIND, either express or implied.  See the License for the
 *    specific language governing permissions and limitations
 *    under the License.
 * ====================================================================
'''

SH_NEW_LICENSE = re.subn(r'(?m)^ \*', '#', NEW_LICENSE)[0]

re_OLD = re.compile(OLD_LICENSE)
re_SH_OLD = re.compile(SH_OLD_LICENSE)
re_EXCLUDE = re.compile(
    r'automatically generated by SWIG'
    + r'|Generated from configure\.in'
    + r'|placed into the public domain'
    )

c_comment_suffices = ('.c', '.java', '.h', '.cpp', '.hw', '.pas')

# Yes, this is an empty tuple. No types that fit in this category uniformly
# have a copyright block.
# Possible types to add here:
# ('.bat', '.py', '.pl', '.in')
sh_comment_suffices = ()

def check_file(fname, old_re, new_lic):
  s = open(fname).read()
  if (not old_re.search(s)
      and not re_EXCLUDE.search(s)):
    print(fname)

def change_license(fname, old_re, new_lic):
  s = open(fname).read()
  m = old_re.search(s)
  if not m:
    print('ERROR: missing old license: %s' % fname)
  else:
    s = s[:m.start()] + new_lic + s[m.end():]
    open(fname, 'w').write(s)
    print('Changed: %s' % fname)

def visit(baton, dirname, dircontents):
  file_func = baton
  for i in dircontents:
    # Don't recurse into certain directories
    if i in ('.svn', '.libs'):
      dircontents.remove(i)
      continue

    extension = os.path.splitext(i)[1]
    fullname = os.path.join(dirname, i)

    if os.path.isdir(fullname):
      continue

    if extension in c_comment_suffices:
      file_func(fullname, re_OLD, NEW_LICENSE)
    elif extension in sh_comment_suffices:
      file_func(fullname, re_SH_OLD, SH_NEW_LICENSE)

def main():
  file_func = check_file
  if sys.argv[1] == '-C':
    print('Changing license text...')
    del sys.argv[1]
    file_func = change_license

  for f in sys.argv[1:]:
    if os.path.isdir(f):
      baton = file_func
      for dirpath, dirs, files in os.walk(f):
        visit(baton, dirpath, dirs + files)
    else:
      baton = file_func
      dir, i = os.path.split(f)
      visit(baton, dir, i)

if __name__ == '__main__':
  main()