diff options
Diffstat (limited to 'extract-copyright-comments')
-rwxr-xr-x | extract-copyright-comments | 185 |
1 files changed, 185 insertions, 0 deletions
diff --git a/extract-copyright-comments b/extract-copyright-comments new file mode 100755 index 0000000..6b603b6 --- /dev/null +++ b/extract-copyright-comments @@ -0,0 +1,185 @@ +#!/bin/sh +# +# Extract comments containing copyright notices from C/C++ files +# +# Copyright (C) 2020 Patrick McDermott +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +set -eu + +LF=' +' + +bufc= +bufi= +c= + +getc() +{ + if [ ${bufi} -ge ${bufc} ]; then + c='' + else + eval "c=\${bufv_${bufi}}" + bufi=$((${bufi} + 1)) + fi +} + +extract_cxx_comment() +{ + local comment='//' + + while :; do + getc + case "${c}" in + "${LF}") + break + ;; + '') + printf '\tError: Unterminated C++ comment\n' \ + 1>&2 + return 1 + ;; + *) + comment="${comment}${c}" + ;; + esac + done + + if printf '%s' "${comment}" | grep -Eqi 'copyright|\(c\)'; then + printf '\t%s\n' "${comment}" + fi + + return 0 +} + +extract_c_comment() +{ + local comment='/*' + + while :; do + getc + case "${c}" in + '*') + comment="${comment}${c}" + getc + comment="${comment}${c}" + case "${c}" in '/') + break + esac + ;; + '') + printf '\tError: Unterminated C comment\n' 1>&2 + return 1 + ;; + *) + comment="${comment}${c}" + ;; + esac + done + + if printf '%s' "${comment}" | grep -Eqi 'copyright|\(c\)'; then + printf '%s\n' "${comment}" | sed 's/^/\t/' + fi + + return 0 +} + +extract() +{ + local fn="${1}" + shift 1 + local quote= + + printf '%s\n' "${fn}" + + # Read file into array + eval "$(awk -v FS='' -v j=0 -v squote="'" -v esc_squote="'\\\\''" ' + { + for (i = 1; i <= NF; ++i) { + sub(squote, esc_squote, $i); + printf("bufv_%d=" squote "%s" squote "\n", + j++, $i); + }; + printf("bufv_%d=" squote "\n" squote "\n", j++); + } + END { + printf("bufc=%d", j); + } + ')" + bufi=0 + + while :; do + getc + case "${c}" in + '/') + getc + case "${c}" in + '/') + extract_cxx_comment || return 1 + ;; + '*') + extract_c_comment || return 1 + ;; + esac + ;; + "'" | '"') + quote="${c}" + while :; do + getc + case "${c}" in + "${quote}") + break + ;; + \\) + # This doesn't + # explicitly handle + # octal, hexadecimal, or + # Unicode sequences; but + # it's good enough to + # handle escaped quotes. + getc + ;; + esac + done + ;; + '') + break + ;; + esac + done + + return 0 +} + +main() +{ + local f= + + if [ ${#} -eq 0 ]; then + extract 'INPUT' || return 1 + else + for f in "${@}"; do + if [ x"${f}" = x'-' ]; then + extract 'INPUT' || return 1 + else + extract "${f}" 0<"${f}" || return 1 + fi + done + fi + + return 0 +} + +main "${@}" |