| #!/usr/bin/python |
| # Run with directory arguments from any directory, with no special setup required. |
| # Or: |
| # for i in libc libdl libm linker libstdc++ libthread_db ; do ./libc/tools/generate-NOTICE.py $i > $i/NOTICE ; done |
| |
| import ftplib |
| import hashlib |
| import os |
| import re |
| import shutil |
| import string |
| import subprocess |
| import sys |
| import tarfile |
| import tempfile |
| |
| def IsUninteresting(path): |
| path = path.lower() |
| if path.endswith(".mk") or path.endswith(".py") or path.endswith(".pyc") or path.endswith(".txt") or path.endswith(".3"): |
| return True |
| if path.endswith("/notice") or path.endswith("/readme") or path.endswith("/caveats"): |
| return True |
| if path.endswith("/tzdata") or path.endswith("/zoneinfo/generate"): |
| return True |
| return False |
| |
| def IsAutoGenerated(content): |
| if "generated by gensyscalls.py" in content or "generated by genserv.py" in content: |
| return True |
| if "This header was automatically generated from a Linux kernel header" in content: |
| return True |
| return False |
| |
| copyrights = set() |
| |
| def ExtractCopyrightAt(lines, i): |
| hash = lines[i].startswith("#") |
| |
| # Do we need to back up to find the start of the copyright header? |
| start = i |
| if not hash: |
| while start > 0: |
| if "/*" in lines[start - 1]: |
| break |
| start -= 1 |
| |
| # Read comment lines until we hit something that terminates a |
| # copyright header. |
| while i < len(lines): |
| if "*/" in lines[i]: |
| break |
| if hash and len(lines[i]) == 0: |
| break |
| if "\t@(#)" in lines[i] or "\tfrom: @(#)" in lines[i] or "From: @(#)" in lines[i] or "from OpenBSD:" in lines[i]: |
| break |
| if "\tcitrus Id: " in lines[i]: |
| break |
| if "\t$OpenBSD: " in lines[i] or " $FreeBSD: " in lines[i] or "\t$NetBSD: " in lines[i]: |
| break |
| if "$FreeBSD$" in lines[i] or "$Citrus$" in lines[i]: |
| break |
| i += 1 |
| |
| end = i |
| |
| # Trim trailing cruft. |
| while end > 0: |
| if lines[end - 1] != " *" and lines[end - 1] != " * ====================================================": |
| break |
| end -= 1 |
| |
| # Remove C/assembler comment formatting, pulling out just the text. |
| clean_lines = [] |
| for line in lines[start:end]: |
| line = line.replace("\t", " ") |
| line = line.replace("/* ", "") |
| line = line.replace(" * ", "") |
| line = line.replace("** ", "") |
| line = line.replace("# ", "") |
| if line.startswith("++Copyright++"): |
| continue |
| line = line.replace("--Copyright--", "") |
| line = line.rstrip() |
| # These come last and take care of "blank" comment lines. |
| if line == "#" or line == " *" or line == "**" or line == "-": |
| line = "" |
| clean_lines.append(line) |
| |
| # Trim blank lines from head and tail. |
| while clean_lines[0] == "": |
| clean_lines = clean_lines[1:] |
| while clean_lines[len(clean_lines) - 1] == "": |
| clean_lines = clean_lines[0:(len(clean_lines) - 1)] |
| |
| copyright = "\n".join(clean_lines) |
| copyrights.add(copyright) |
| |
| return i |
| |
| args = sys.argv[1:] |
| if len(args) == 0: |
| args = [ "." ] |
| |
| for arg in args: |
| sys.stderr.write('Searching for source files in "%s"...\n' % arg) |
| |
| for directory, sub_directories, filenames in os.walk(arg): |
| if ".git" in sub_directories: |
| sub_directories.remove(".git") |
| sub_directories = sorted(sub_directories) |
| |
| for filename in sorted(filenames): |
| path = os.path.join(directory, filename) |
| if IsUninteresting(path): |
| #print "ignoring uninteresting file %s" % path |
| continue |
| |
| try: |
| content = open(path, 'r').read().decode('utf-8') |
| except: |
| # TODO: update hash.h, md5.c, and md5.h; upstream is probably UTF-8 already. |
| sys.stderr.write('warning: bad UTF-8 in %s\n' % path) |
| content = open(path, 'r').read().decode('iso-8859-1') |
| |
| lines = content.split("\n") |
| |
| if len(lines) <= 4: |
| #print "ignoring short file %s" % path |
| continue |
| |
| if IsAutoGenerated(content): |
| #print "ignoring auto-generated file %s" % path |
| continue |
| |
| if not "Copyright" in content: |
| if "public domain" in content.lower(): |
| #print "ignoring public domain file %s" % path |
| continue |
| sys.stderr.write('warning: no copyright notice found in "%s" (%d lines)\n' % (path, len(lines))) |
| continue |
| |
| i = 0 |
| while i < len(lines): |
| if "Copyright" in lines[i]: |
| i = ExtractCopyrightAt(lines, i) |
| i += 1 |
| |
| #print path |
| |
| for copyright in sorted(copyrights): |
| print copyright.encode('utf-8') |
| print |
| print '-------------------------------------------------------------------' |
| print |
| |
| sys.exit(0) |