#!/bin/sh
#
# Extract comments containing copyright notices from C/C++ files
#
# Copyright (C) 2020  Patrick McDermott
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

set -eu

LF='
'
HT='	'

bufc=
bufi=
c=

getc()
{
	if [ ${bufi} -ge ${bufc} ]; then
		c=''
	else
		eval "c=\${bufv_${bufi}}"
		bufi=$((${bufi} + 1))
	fi
}

extract_cxx_comment()
{
	local indent="${1}"
	shift 1
	local comment='//'

	while :; do
		getc
		case "${c}" in
			"${LF}")
				break
				;;
			'')
				printf '\tError: Unterminated C++ comment\n' \
					1>&2
				return 1
				;;
			*)
				comment="${comment}${c}"
				;;
		esac
	done

	if printf '%s' "${comment}" | grep -Eqi \
			'copyright|\(c\)|author|license|public domain'
		then
		printf '\t%s\n' "${indent}${comment}"
	fi

	return 0
}

extract_c_comment()
{
	local indent="${1}"
	shift 1
	local comment='/*'
	local asterisk=false

	while :; do
		getc
		case "${c}" in
			'*')
				asterisk=true
				comment="${comment}${c}"
				;;
			'/')
				comment="${comment}${c}"
				if ${asterisk}; then
					break
				fi
				;;
			'')
				printf '\tError: Unterminated C comment\n' 1>&2
				return 1
				;;
			*)
				asterisk=false
				comment="${comment}${c}"
				;;
		esac
	done

	if printf '%s' "${comment}" | grep -Eqi \
			'copyright|\(c\)|author|license|public domain'
		then
		printf '%s\n' "${indent}${comment}" | sed 's/^/\t/'
	fi

	return 0
}

extract()
{
	local fn="${1}"
	shift 1
	local newline=true
	local indent=''
	local quote=

	printf '%s\n' "${fn}"

	# Read file into array
	eval "$(awk -v FS='' -v j=0 -v squote="'" -v esc_squote="'\\\\''" '
		{
			for (i = 1; i <= NF; ++i) {
				sub(squote, esc_squote, $i);
				printf("bufv_%d=" squote "%s" squote "\n",
					j++, $i);
			};
			printf("bufv_%d=" squote "\n" squote "\n", j++);
		}
		END {
			printf("bufc=%d", j);
		}
		')"
	bufi=0

	while :; do
		getc
		case "${c}" in
			'/')
				newline=false
				getc
				case "${c}" in
					'/')
						extract_cxx_comment "${indent}"\
							|| return 1
						;;
					'*')
						extract_c_comment "${indent}" \
							|| return 1
						;;
				esac
				;;
			"${LF}")
				newline=true
				indent=''
				;;
			"${HT}" | ' ')
				if ${newline}; then
					indent="${indent}${c}"
				fi
				;;
			"'" | '"')
				newline=false
				quote="${c}"
				while :; do
					getc
					case "${c}" in
						"${quote}")
							break
							;;
						\\)
							# This doesn't
							# explicitly handle
							# octal, hexadecimal, or
							# Unicode sequences; but
							# it's good enough to
							# handle escaped quotes.
							getc
							;;
					esac
				done
				;;
			'')
				break
				;;
			*)
				newline=false
				;;
		esac
	done

	return 0
}

main()
{
	local f=

	if [ ${#} -eq 0 ]; then
		extract 'INPUT' || return 1
	else
		for f in "${@}"; do
			if [ x"${f}" = x'-' ]; then
				extract 'INPUT' || return 1
			else
				extract "${f}" 0<"${f}" || return 1
			fi
		done
	fi

	return 0
}

main "${@}"