#!/bin/sh # # Extract comments containing copyright notices from C/C++ files # # Copyright (C) 2020 Patrick McDermott # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . set -eu LF=' ' HT=' ' bufc= bufi= c= getc() { if [ ${bufi} -ge ${bufc} ]; then c='' else eval "c=\${bufv_${bufi}}" bufi=$((${bufi} + 1)) fi } extract_cxx_comment() { local indent="${1}" shift 1 local comment='//' while :; do getc case "${c}" in "${LF}") break ;; '') printf '\tError: Unterminated C++ comment\n' \ 1>&2 return 1 ;; *) comment="${comment}${c}" ;; esac done if printf '%s' "${comment}" | grep -Eqi \ 'copyright|\(c\)|license|public domain' then printf '\t%s\n' "${indent}${comment}" fi return 0 } extract_c_comment() { local indent="${1}" shift 1 local comment='/*' local asterisk=false while :; do getc case "${c}" in '*') asterisk=true comment="${comment}${c}" ;; '/') comment="${comment}${c}" if ${asterisk}; then break fi ;; '') printf '\tError: Unterminated C comment\n' 1>&2 return 1 ;; *) asterisk=false comment="${comment}${c}" ;; esac done if printf '%s' "${comment}" | grep -Eqi \ 'copyright|\(c\)|license|public domain' then printf '%s\n' "${indent}${comment}" | sed 's/^/\t/' fi return 0 } extract() { local fn="${1}" shift 1 local newline=true local indent='' local quote= printf '%s\n' "${fn}" # Read file into array eval "$(awk -v FS='' -v j=0 -v squote="'" -v esc_squote="'\\\\''" ' { for (i = 1; i <= NF; ++i) { sub(squote, esc_squote, $i); printf("bufv_%d=" squote "%s" squote "\n", j++, $i); }; printf("bufv_%d=" squote "\n" squote "\n", j++); } END { printf("bufc=%d", j); } ')" bufi=0 while :; do getc case "${c}" in '/') newline=false getc case "${c}" in '/') extract_cxx_comment "${indent}"\ || return 1 ;; '*') extract_c_comment "${indent}" \ || return 1 ;; esac ;; "${LF}") newline=true indent='' ;; "${HT}" | ' ') if ${newline}; then indent="${indent}${c}" fi ;; "'" | '"') newline=false quote="${c}" while :; do getc case "${c}" in "${quote}") break ;; \\) # This doesn't # explicitly handle # octal, hexadecimal, or # Unicode sequences; but # it's good enough to # handle escaped quotes. getc ;; esac done ;; '') break ;; *) newline=false ;; esac done return 0 } main() { local f= if [ ${#} -eq 0 ]; then extract 'INPUT' || return 1 else for f in "${@}"; do if [ x"${f}" = x'-' ]; then extract 'INPUT' || return 1 else extract "${f}" 0<"${f}" || return 1 fi done fi return 0 } main "${@}"