diff options
-rwxr-xr-x | extract-copyright-comments | 216 |
1 files changed, 216 insertions, 0 deletions
diff --git a/extract-copyright-comments b/extract-copyright-comments new file mode 100755 index 0000000..0b26731 --- /dev/null +++ b/extract-copyright-comments @@ -0,0 +1,216 @@ +#!/bin/sh +# +# Extract comments containing copyright notices from C/C++ files +# +# Copyright (C) 2020 Patrick McDermott +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +set -eu + +LF=' +' +HT=' ' + +bufc= +bufi= +c= + +getc() +{ + if [ ${bufi} -ge ${bufc} ]; then + c='' + else + eval "c=\${bufv_${bufi}}" + bufi=$((${bufi} + 1)) + fi +} + +extract_cxx_comment() +{ + local indent="${1}" + shift 1 + local comment='//' + + while :; do + getc + case "${c}" in + "${LF}") + break + ;; + '') + printf '\tError: Unterminated C++ comment\n' \ + 1>&2 + return 1 + ;; + *) + comment="${comment}${c}" + ;; + esac + done + + if printf '%s' "${comment}" | grep -Eqi \ + 'copyright|\(c\)|author|written|license|public domain' + then + printf '\t%s\n' "${indent}${comment}" + fi + + return 0 +} + +extract_c_comment() +{ + local indent="${1}" + shift 1 + local comment='/*' + local asterisk=false + + while :; do + getc + case "${c}" in + '*') + asterisk=true + comment="${comment}${c}" + ;; + '/') + comment="${comment}${c}" + if ${asterisk}; then + break + fi + ;; + '') + printf '\tError: Unterminated C comment\n' 1>&2 + return 1 + ;; + *) + asterisk=false + comment="${comment}${c}" + ;; + esac + done + + if printf '%s' "${comment}" | grep -Eqi \ + 'copyright|\(c\)|author|written|license|public domain' + then + printf '%s\n' "${indent}${comment}" | sed 's/^/\t/' + fi + + return 0 +} + +extract() +{ + local fn="${1}" + shift 1 + local newline=true + local indent='' + local quote= + + printf '%s\n' "${fn}" + + # Read file into array + eval "$(awk -v FS='' -v j=0 -v squote="'" -v esc_squote="'\\\\''" ' + { + for (i = 1; i <= NF; ++i) { + sub(squote, esc_squote, $i); + printf("bufv_%d=" squote "%s" squote "\n", + j++, $i); + }; + printf("bufv_%d=" squote "\n" squote "\n", j++); + } + END { + printf("bufc=%d", j); + } + ')" + bufi=0 + + while :; do + getc + case "${c}" in + '/') + newline=false + getc + case "${c}" in + '/') + extract_cxx_comment "${indent}"\ + || return 1 + ;; + '*') + extract_c_comment "${indent}" \ + || return 1 + ;; + esac + ;; + "${LF}") + newline=true + indent='' + ;; + "${HT}" | ' ') + if ${newline}; then + indent="${indent}${c}" + fi + ;; + "'" | '"') + newline=false + quote="${c}" + while :; do + getc + case "${c}" in + "${quote}") + break + ;; + \\) + # This doesn't + # explicitly handle + # octal, hexadecimal, or + # Unicode sequences; but + # it's good enough to + # handle escaped quotes. + getc + ;; + esac + done + ;; + '') + break + ;; + *) + newline=false + ;; + esac + done + + return 0 +} + +main() +{ + local f= + + if [ ${#} -eq 0 ]; then + extract 'INPUT' || return 1 + else + for f in "${@}"; do + if [ x"${f}" = x'-' ]; then + extract 'INPUT' || return 1 + else + extract "${f}" 0<"${f}" || return 1 + fi + done + fi + + return 0 +} + +main "${@}" |