1 files changed, 185 insertions, 0 deletions
diff --git a/extract-copyright-comments b/extract-copyright-comments
new file mode 100755
index 0000000..6b603b6
--- /dev/null
+++ b/extract-copyright-comments
@@ -0,0 +1,185 @@
+#!/bin/sh
+#
+# Extract comments containing copyright notices from C/C++ files
+#
+# Copyright (C) 2020  Patrick McDermott
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+set -eu
+
+LF='
+'
+
+bufc=
+bufi=
+c=
+
+getc()
+{
+	if [ ${bufi} -ge ${bufc} ]; then
+		c=''
+	else
+		eval "c=\${bufv_${bufi}}"
+		bufi=$((${bufi} + 1))
+	fi
+}
+
+extract_cxx_comment()
+{
+	local comment='//'
+
+	while :; do
+		getc
+		case "${c}" in
+			"${LF}")
+				break
+				;;
+			'')
+				printf '\tError: Unterminated C++ comment\n' \
+					1>&2
+				return 1
+				;;
+			*)
+				comment="${comment}${c}"
+				;;
+		esac
+	done
+
+	if printf '%s' "${comment}" | grep -Eqi 'copyright|\(c\)'; then
+		printf '\t%s\n' "${comment}"
+	fi
+
+	return 0
+}
+
+extract_c_comment()
+{
+	local comment='/*'
+
+	while :; do
+		getc
+		case "${c}" in
+			'*')
+				comment="${comment}${c}"
+				getc
+				comment="${comment}${c}"
+				case "${c}" in '/')
+					break
+				esac
+				;;
+			'')
+				printf '\tError: Unterminated C comment\n' 1>&2
+				return 1
+				;;
+			*)
+				comment="${comment}${c}"
+				;;
+		esac
+	done
+
+	if printf '%s' "${comment}" | grep -Eqi 'copyright|\(c\)'; then
+		printf '%s\n' "${comment}" | sed 's/^/\t/'
+	fi
+
+	return 0
+}
+
+extract()
+{
+	local fn="${1}"
+	shift 1
+	local quote=
+
+	printf '%s\n' "${fn}"
+
+	# Read file into array
+	eval "$(awk -v FS='' -v j=0 -v squote="'" -v esc_squote="'\\\\''" '
+		{
+			for (i = 1; i <= NF; ++i) {
+				sub(squote, esc_squote, $i);
+				printf("bufv_%d=" squote "%s" squote "\n",
+					j++, $i);
+			};
+			printf("bufv_%d=" squote "\n" squote "\n", j++);
+		}
+		END {
+			printf("bufc=%d", j);
+		}
+		')"
+	bufi=0
+
+	while :; do
+		getc
+		case "${c}" in
+			'/')
+				getc
+				case "${c}" in
+					'/')
+						extract_cxx_comment || return 1
+						;;
+					'*')
+						extract_c_comment || return 1
+						;;
+				esac
+				;;
+			"'" | '"')
+				quote="${c}"
+				while :; do
+					getc
+					case "${c}" in
+						"${quote}")
+							break
+							;;
+						\\)
+							# This doesn't
+							# explicitly handle
+							# octal, hexadecimal, or
+							# Unicode sequences; but
+							# it's good enough to
+							# handle escaped quotes.
+							getc
+							;;
+					esac
+				done
+				;;
+			'')
+				break
+				;;
+		esac
+	done
+
+	return 0
+}
+
+main()
+{
+	local f=
+
+	if [ ${#} -eq 0 ]; then
+		extract 'INPUT' || return 1
+	else
+		for f in "${@}"; do
+			if [ x"${f}" = x'-' ]; then
+				extract 'INPUT' || return 1
+			else
+				extract "${f}" 0<"${f}" || return 1
+			fi
+		done
+	fi
+
+	return 0
+}
+
+main "${@}"