diff options
author | Patrick McDermott <patrick.mcdermott@libiquity.com> | 2020-08-09 16:51:31 (EDT) |
---|---|---|
committer | Patrick McDermott <patrick.mcdermott@libiquity.com> | 2020-08-09 16:51:31 (EDT) |
commit | e3bc617e20ab69d4c0285f33eac4fc64afcaae86 (patch) | |
tree | 445531419aecf464548d51dc03c315cad1ad98de /tests/aux/json.sh | |
parent | 88bf8172ee60504b9d8bd6d2b337097d3cb9aa32 (diff) |
Diffstat (limited to 'tests/aux/json.sh')
-rw-r--r-- | tests/aux/json.sh | 402 |
1 files changed, 0 insertions, 402 deletions
diff --git a/tests/aux/json.sh b/tests/aux/json.sh deleted file mode 100644 index 3c9783f..0000000 --- a/tests/aux/json.sh +++ /dev/null @@ -1,402 +0,0 @@ -# `json.sh`, a pure-shell JSON parser. -# -# Copied from <lib/json.sh> in repository <https://github.com/rcrowley/json.sh>. -# -# Copyright 2011 Richard Crowley. All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following -# disclaimer in the documentation and/or other materials provided -# with the distribution. -# -# THIS SOFTWARE IS PROVIDED BY RICHARD CROWLEY AS IS'' AND ANY EXPRESS -# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL RICHARD CROWLEY OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF -# THE POSSIBILITY OF SUCH DAMAGE. -# -# The views and conclusions contained in the software and documentation -# are those of the authors and should not be interpreted as representing -# official policies, either expressed or implied, of Richard Crowley. - -set -e - -# Most users will be happy with the default '/' separator that makes trees -# of keys look like filesystem paths but that breaks down if keys can -# contain slashes. In that case, set `JSON_SEPARATOR` to desired character. -[ -z "$JSON_SEPARATOR" ] && _J_S="/" || _J_S="$JSON_SEPARATOR" - -# File descriptor 3 is commandeered for debug output, which may end up being -# forwarded to standard error. -[ -z "$JSON_DEBUG" ] && exec 3>/dev/null || exec 3>&2 - -# File descriptor 4 is commandeered for use as a sink for literal and -# variable output of (inverted) sections that are not destined for standard -# output because their condition is not met. -exec 4>/dev/null - -# Consume standard input one character at a time to parse JSON. -json() { - - # Initialize the file descriptor to be used to emit characters. At - # times this value will be 4 to send output to `/dev/null`. - _J_FD=1 - - # Initialize storage for the "pathname", the concatenation of all - # the keys in the tree at any point in time, the current state of - # the machine, and the state to which the machine returns after - # completing a key or value. - _J_PATHNAME="$_J_S" _J_STATE="whitespace" _J_STATE_DEFAULT="whitespace" - - # IFS must only contain '\n' so as to be able to read space and tab - # characters from standard input one-at-a-time. The easiest way to - # convince it to actually contain the correct byte, and only the - # correct byte, is to use a single-quoted literal newline. - IFS=' -' - - # Consuming standard input one character at a time is quite a feat - # within the confines of POSIX shell. Bash's `read` builtin has - # `-n` for limiting the number of characters consumed. Here it is - # faked using `sed`(1) to place each character on its own line. - # The subtlety is that real newline characters are chomped so they - # must be indirectly detected by checking for zero-length - # characters, which is done as the character is emitted. - sed " - s/./&$(printf "\036")/g - s/\\\\/\\\\\\\\/g - " | tr "\036" "\n" | _json - - # TODO Replace the original value of IFS. Be careful if it's unset. - -} - -# Consume the one-character-per-line stream from `sed` via a state machine. -# This function will be called recursively in subshell environments to -# isolate values from their containing scope. -# -# The `read` builtin consumes one line at a time but by now each line -# contains only a single character. -_json() { - while read _J_C - do - _json_char - _J_PREV_C="$_J_C" - done -} - -# Consume a single character as stored in `_J_C`. This function is broken -# out from `_json` so it may be called to reconsume a character as is -# necessary following the end of any number since numbers do not have a -# well-known ending in the grammar. -# -# The state machine implemented here follows very naturally from the -# diagrams of the JSON grammar on <http://json.org>. -_json_char() { - echo " _J_C: $_J_C (${#_J_C}), _J_STATE: $_J_STATE" >&3 - case "$_J_STATE" in - - # The machine starts in the "whitespace" state and learns - # from leading characters what state to enter next. JSON's - # grammar doesn't contain any tokens that are ambiguous in - # their first character so the parser's job is relatively - # easier. - # - # Further whitespace characters are consumed and ignored. - # - # Arrays are unique in that their parsing rules are a strict - # superset of the rules in open whitespace. When an opening - # bracket is encountered, the remainder of the array is - # parsed in a subshell which goes around again when a comma - # is encountered and exits back to the containing scope when - # the closing bracket is encountered. - # - # Objects are not parsed as a superset of open whitespace but - # they are parsed in a subshell to protect the containing scope. - "array-0"|"array-even"|"array-odd"|"whitespace") - case "$_J_STATE" in - "array-0") - case "$_J_C" in - "]") exit;; - esac;; - "array-even") - case "$_J_C" in - ",") - _J_DIRNAME="${_J_PATHNAME%"$_J_S"*}" - [ "$_J_DIRNAME" = "$_J_S" ] && _J_DIRNAME="" - _J_BASENAME="${_J_PATHNAME##*"$_J_S"}" - _J_BASENAME="$(($_J_BASENAME + 1))" - _J_PATHNAME="$_J_DIRNAME$_J_S$_J_BASENAME" - _J_STATE="array-odd" - return;; - "]") exit;; - esac;; - esac - case "$_J_C" in - "\"") _J_STATE="string" _J_V="";; - "-") _J_STATE="number-negative" _J_V="$_J_C";; - 0) _J_STATE="number-leading-zero" _J_V="$_J_C";; - [1-9]) _J_STATE="number-leading-nonzero" _J_V="$_J_C";; - "[") - ( - [ "$_J_PATHNAME" = "/" ] && _J_PATHNAME="" - _J_PATHNAME="$_J_PATHNAME/0" - _J_STATE="array-0" _J_STATE_DEFAULT="array-even" - _json - ) - _J_STATE="$_J_STATE_DEFAULT" _J_V="";; - "f"|"t") _J_STATE="boolean" _J_V="$_J_C";; - "n") _J_STATE="null" _J_V="$_J_C";; - "{") - ( - _J_STATE="object-0" _J_STATE_DEFAULT="object-even" - _json - ) - _J_STATE="$_J_STATE_DEFAULT" _J_V="";; - " "|""|" ") ;; - *) _json_die "syntax: $_J_PATHNAME";; - esac;; - - # Boolean values are multicharacter literals but they're unique - # from their first character. This means the eventual value is - # already known when the "boolean" state is entered so we can - # raise syntax errors as soon as the input goes south. - "boolean") - case "$_J_V$_J_C" in - "f"|"fa"|"fal"|"fals"|"t"|"tr"|"tru") _J_V="$_J_V$_J_C";; - "false"|"true") - _J_STATE="$_J_STATE_DEFAULT" - echo "$_J_PATHNAME boolean $_J_V$_J_C" >&$_J_FD;; - *) _json_die "syntax: $_J_PATHNAME boolean $_J_V$_J_C";; - esac;; - - # Object values are relatively more complex than array values. - # They begin in the "object-0" state, which is almost but not - # quite a subset of the "whitespace" state for strings. When - # a string is encountered it is parsed as usual but the parser - # is set to return to the "object-value" state afterward. - # - # As in the "whitespace" state, extra whitespace characters - # are consumed and ignored. - # - # The parser will return to this "object" state later to - # either consume a comma and go around again or exit the - # subshell in which this object has been parsed. - "object-0") - case "$_J_C" in - "\"") - _J_FD=4 - _J_STATE="string" - _J_STATE_DEFAULT="object-value" - _J_V="";; - "}") exit;; - " "|""|" ") ;; - *) _json_die "syntax: $_J_PATHNAME";; - esac;; - - # "object-even" is like "object-0" but additionally commas are - # consumed to enforce the another key/value pair is coming. - "object-even") - case "$_J_C" in - "\"") - _J_FD=4 - _J_STATE="string" - _J_STATE_DEFAULT="object-value" - _J_V="";; - ",") _J_STATE="object-odd";; - "}") exit;; - " "|""|" ") ;; - *) _json_die "syntax: $_J_PATHNAME";; - esac;; - - # Object values have to return from whence they came. They use - # the "object-exit" state to signal the last character consumed - # to the containing scope. - "object-exit") #exit;; - case "$_J_C" in - ",") exit 101;; - "}") exit 102;; - *) exit 0;; - esac;; - - # "object-even" is like "object-0" but cannot consume a closing - # brace because it has just consumed a comma. - "object-odd") - case "$_J_C" in - "\"") - _J_FD=4 - _J_STATE="string" - _J_STATE_DEFAULT="object-value" - _J_V="";; - " "|""|" ") ;; - *) _json_die "syntax: $_J_PATHNAME";; - esac;; - - # After a string key has been consumed, the state machine - # progresses here where a colon and a value are parsed. The - # value is parsed in a subshell so the pathname can have the - # key appended to it before the parser continues. - "object-value") - case "$_J_C" in - ":") - _J_FD=1 - ( - [ "$_J_PATHNAME" = "/" ] && _J_PATHNAME="" - _J_PATHNAME="$_J_PATHNAME/$_J_V" - _J_STATE="whitespace" - _J_STATE_DEFAULT="object-exit" - _json - ) || case "$?" in - 101) _J_STATE="object-even" _J_C="," _json_char;; - 102) _J_STATE="object-even" _J_C="}" _json_char;; - esac - _J_STATE="object-even";; - " "|""|" ") ;; - *) _json_die "syntax: $_J_PATHNAME";; - esac;; - - # Null values work exactly like boolean values. See above. - "null") - case "$_J_V$_J_C" in - "n"|"nu"|"nul") _J_V="$_J_V$_J_C";; - "null") - _J_STATE="$_J_STATE_DEFAULT" - echo "$_J_PATHNAME null null" >&$_J_FD;; - *) _json_die "syntax: $_J_PATHNAME null $_J_V$_J_C";; - esac;; - - # Numbers that encounter a '.' become floating point and may - # continue consuming digits forever or may become - # scientific-notation. Any other character sends the parser - # back to its default state. - "number-float") - case "$_J_C" in - [0-9]) _J_V="$_J_V$_J_C";; - "E"|"e") _J_STATE="number-sci" _J_V="$_J_V$_J_C";; - *) - _J_STATE="$_J_STATE_DEFAULT" - echo "$_J_PATHNAME number $_J_V" >&$_J_FD - _json_char;; - esac;; - - # This is an entrypoint into parsing a number, used when - # the first digit consumed is non-zero. From here, a number - # may continue on a positive integer, become a floating-point - # number by consuming a '.', or become scientific-notation by - # consuming an 'E' or 'e'. Any other character sends the - # parser back to its default state. - "number-leading-nonzero") - case "$_J_C" in - ".") _J_STATE="number-float" _J_V="$_J_V$_J_C";; - [0-9]) _J_V="$_J_V$_J_C";; - "E"|"e") _J_STATE="number-sci" _J_V="$_J_V$_J_C";; - *) - _J_STATE="$_J_STATE_DEFAULT" - echo "$_J_PATHNAME number $_J_V" >&$_J_FD - _json_char;; - esac;; - - # This is an entrypoint into parsing a number, used when - # the first digit consumed is zero. From here, a number - # may remain zero, become a floating-point number by - # consuming a '.', or become scientific-notation by consuming - # an 'E' or 'e'. Any other character sends the parser back - # to its default state. - "number-leading-zero") - case "$_J_C" in - ".") _J_STATE="number-float" _J_V="$_J_V$_J_C";; - [0-9]) _json_die "syntax: $_J_PATHNAME number $_J_V$_J_C";; - "E"|"e") _J_STATE="number-sci" _J_V="$_J_V$_J_C";; - *) - _J_STATE="$_J_STATE_DEFAULT" - echo "$_J_PATHNAME number $_J_V" >&$_J_FD - _json_char;; - esac;; - - # This is an entrypoint into parsing a number, used when - # the first character consumed is a '-'. From here, a number - # may progress to the "number-leading-nonzero" or - # "number-leading-zero" states. Any other character sends - # the parser back to its default state. - "number-negative") - case "$_J_C" in - 0) _J_STATE="number-leading-zero" _J_V="$_J_V$_J_C";; - [1-9]) - _J_STATE="number-leading-nonzero" - _J_V="$_J_V$_J_C";; - *) - _J_STATE="$_J_STATE_DEFAULT" - echo "$_J_PATHNAME number $_J_V" >&$_J_FD - _json_char;; - esac;; - - # Numbers that encounter an 'E' or 'e' become - # scientific-notation and consume digits, optionally prefixed - # by a '+' or '-', forever. The actual consumption is - # delegated to the "number-sci-neg" and "number-sci-pos" - # states. Any other character immediately following the 'E' - # or 'e' is a syntax error. - "number-sci") - case "$_J_C" in - "+") _J_STATE="number-sci-pos" _J_V="$_J_V$_J_C";; - "-") _J_STATE="number-sci-neg" _J_V="$_J_V$_J_C";; - [0-9]) _J_STATE="number-sci-pos" _J_V="$_J_V$_J_C";; - *) _json_die "syntax: $_J_PATHNAME number $_J_V$_J_C";; - esac;; - - # Once in these states, numbers may consume digits forever. - # Any other character sends the parser back to its default - # state. - "number-sci-neg"|"number-sci-pos") - case "$_J_C" in - [0-9]) _J_V="$_J_V$_J_C";; - *) - _J_STATE="$_J_STATE_DEFAULT" - echo "$_J_PATHNAME number $_J_V" >&$_J_FD - _json_char;; - esac;; - - # Strings aren't as easy as they look. JSON supports several - # escape sequences that require the state machine to keep a - # history of its input. Basic backslash/newline/etc. escapes - # are simple because they only require one character of - # history. Unicode codepoint escapes require more. The - # strategy there is to add states to the machine. - # - # TODO It'd be nice to decode all escape sequences, including - # Unicode codepoints but that would definitely ruin the - # line-oriented thing we've got goin' on. - "string") - case "$_J_PREV_C$_J_C" in - "\\\""|"\\/"|"\\\\") _J_V="$_J_V$_J_C";; - "\\b"|"\\f"|"\\n"|"\\r") _J_V="$_J_V\\\\$_J_C";; - "\\u") _J_V="$_J_V\\\\$_J_C";; - *"\"") - _J_STATE="$_J_STATE_DEFAULT" - echo "$_J_PATHNAME string $_J_V" >&$_J_FD;; - *"\\") ;; - *) _J_V="$_J_V$_J_C";; - esac;; - - esac -} - -# Print an error message and GTFO. The message is the concatenation -# of all the arguments to this function. -_json_die() { - echo "json.sh: $*" >&2 - exit 1 -} |