From d69c6e1dcfd96adb9dfe012b80d3922b89be6dbf Mon Sep 17 00:00:00 2001
From: Mark Dilger <mark.dilger@enterprisedb.com>
Date: Wed, 22 Jan 2020 13:28:52 -0800
Subject: [PATCH 09/11] Making json parsing work without throwing exceptions.

This is largely based on Robert Haas's patch
v2-0004-WIP-Return-errors-rather-than-using-ereport.patch
---
 src/backend/utils/adt/json.c      | 473 ++++++++----------------------
 src/backend/utils/adt/jsonb.c     |   5 +-
 src/backend/utils/adt/jsonfuncs.c | 254 +++++++++++++++-
 src/include/common/jsonapi.h      |  30 +-
 src/include/utils/jsonfuncs.h     |  34 +++
 5 files changed, 432 insertions(+), 364 deletions(-)

diff --git a/src/backend/utils/adt/json.c b/src/backend/utils/adt/json.c
index ff0764dbc5..54075d07e3 100644
--- a/src/backend/utils/adt/json.c
+++ b/src/backend/utils/adt/json.c
@@ -34,6 +34,13 @@
 #include "utils/syscache.h"
 #include "utils/typcache.h"
 
+#define INSIST(x) \
+do { \
+	JsonParseErrorType	parse_result; \
+	if((parse_result = (x)) != JSON_SUCCESS) \
+		return parse_result; \
+} while (0)
+
 /*
  * The context of the parser is maintained by the recursive descent
  * mechanism, but is passed explicitly to the error reporting routine
@@ -76,19 +83,17 @@ typedef struct JsonAggState
 	Oid			val_output_func;
 } JsonAggState;
 
-static inline void json_lex(JsonLexContext *lex);
-static inline void json_lex_string(JsonLexContext *lex);
-static inline void json_lex_number(JsonLexContext *lex, char *s,
-								   bool *num_err, int *total_len);
-static inline void parse_scalar(JsonLexContext *lex, const JsonSemAction *sem);
-static void parse_object_field(JsonLexContext *lex, const JsonSemAction *sem);
-static void parse_object(JsonLexContext *lex, const JsonSemAction *sem);
-static void parse_array_element(JsonLexContext *lex, const JsonSemAction *sem);
-static void parse_array(JsonLexContext *lex, const JsonSemAction *sem);
-static void report_parse_error(JsonParseContext ctx, JsonLexContext *lex) pg_attribute_noreturn();
-static void report_invalid_token(JsonLexContext *lex) pg_attribute_noreturn();
-static int	report_json_context(JsonLexContext *lex);
-static char *extract_mb_char(char *s);
+static inline JsonTokenType lex_peek(JsonLexContext *lex) __attribute__((warn_unused_result));
+static inline JsonParseErrorType lex_expect(JsonParseContext ctx, JsonLexContext *lex, JsonTokenType token) __attribute__((warn_unused_result));
+static inline JsonParseErrorType json_lex_string(JsonLexContext *lex) __attribute__((warn_unused_result));
+static inline JsonParseErrorType json_lex_number(JsonLexContext *lex, char *s,
+												 bool *num_err, int *total_len) __attribute__((warn_unused_result));
+static inline JsonParseErrorType parse_scalar(JsonLexContext *lex, const JsonSemAction *sem) __attribute__((warn_unused_result));
+static JsonParseErrorType parse_object_field(JsonLexContext *lex, const JsonSemAction *sem) __attribute__((warn_unused_result));
+static JsonParseErrorType parse_object(JsonLexContext *lex, const JsonSemAction *sem) __attribute__((warn_unused_result));
+static JsonParseErrorType parse_array_element(JsonLexContext *lex, const JsonSemAction *sem) __attribute__((warn_unused_result));
+static JsonParseErrorType parse_array(JsonLexContext *lex, const JsonSemAction *sem) __attribute__((warn_unused_result));
+static JsonParseErrorType report_parse_error(JsonParseContext ctx, JsonLexContext *lex) __attribute__((warn_unused_result));
 static void composite_to_json(Datum composite, StringInfo result,
 							  bool use_line_feeds);
 static void array_dim_to_json(StringInfo result, int dim, int ndims, int *dims,
@@ -126,13 +131,14 @@ lex_peek(JsonLexContext *lex)
  * move the lexer to the next token if the current look_ahead token matches
  * the parameter token. Otherwise, report an error.
  */
-static inline void
+static inline JsonParseErrorType
 lex_expect(JsonParseContext ctx, JsonLexContext *lex, JsonTokenType token)
 {
 	if (lex_peek(lex) == token)
-		json_lex(lex);
+		INSIST(json_lex(lex));
 	else
-		report_parse_error(ctx, lex);
+		return report_parse_error(ctx, lex);
+	return JSON_SUCCESS;
 }
 
 /* chars to consider as part of an alphanumeric token */
@@ -175,7 +181,8 @@ IsValidJsonNumber(const char *str, int len)
 		dummy_lex.input_length = len;
 	}
 
-	json_lex_number(&dummy_lex, dummy_lex.input, &numeric_error, &total_len);
+	if (JSON_SUCCESS != json_lex_number(&dummy_lex, dummy_lex.input, &numeric_error, &total_len))
+		return false;
 
 	return (!numeric_error) && (total_len == dummy_lex.input_length);
 }
@@ -192,7 +199,7 @@ json_in(PG_FUNCTION_ARGS)
 
 	/* validate it */
 	lex = makeJsonLexContext(result, false);
-	pg_parse_json(lex, &nullSemAction);
+	pg_parse_json_or_throw(lex, &nullSemAction);
 
 	/* Internal representation is the same as text, for now */
 	PG_RETURN_TEXT_P(result);
@@ -239,7 +246,7 @@ json_recv(PG_FUNCTION_ARGS)
 
 	/* Validate it. */
 	lex = makeJsonLexContextCstringLen(str, nbytes, false);
-	pg_parse_json(lex, &nullSemAction);
+	pg_parse_json_or_throw(lex, &nullSemAction);
 
 	PG_RETURN_TEXT_P(cstring_to_text_with_len(str, nbytes));
 }
@@ -254,13 +261,13 @@ json_recv(PG_FUNCTION_ARGS)
  * action routines to be called at appropriate spots during parsing, and a
  * pointer to a state object to be passed to those routines.
  */
-void
+JsonParseErrorType
 pg_parse_json(JsonLexContext *lex, const JsonSemAction *sem)
 {
 	JsonTokenType tok;
 
 	/* get the initial token */
-	json_lex(lex);
+	INSIST(json_lex(lex));
 
 	tok = lex_peek(lex);
 
@@ -268,17 +275,17 @@ pg_parse_json(JsonLexContext *lex, const JsonSemAction *sem)
 	switch (tok)
 	{
 		case JSON_TOKEN_OBJECT_START:
-			parse_object(lex, sem);
+			INSIST(parse_object(lex, sem));
 			break;
 		case JSON_TOKEN_ARRAY_START:
-			parse_array(lex, sem);
+			INSIST(parse_array(lex, sem));
 			break;
 		default:
-			parse_scalar(lex, sem); /* json can be a bare scalar */
+			INSIST(parse_scalar(lex, sem)); /* json can be a bare scalar */
 	}
 
-	lex_expect(JSON_PARSE_END, lex, JSON_TOKEN_END);
-
+	INSIST(lex_expect(JSON_PARSE_END, lex, JSON_TOKEN_END));
+	return JSON_SUCCESS;
 }
 
 /*
@@ -305,19 +312,20 @@ json_count_array_elements(JsonLexContext *lex)
 	copylex.lex_level++;
 
 	count = 0;
-	lex_expect(JSON_PARSE_ARRAY_START, &copylex, JSON_TOKEN_ARRAY_START);
+	INSIST(lex_expect(JSON_PARSE_ARRAY_START, &copylex, JSON_TOKEN_ARRAY_START));
 	if (lex_peek(&copylex) != JSON_TOKEN_ARRAY_END)
 	{
 		while (1)
 		{
 			count++;
-			parse_array_element(&copylex, &nullSemAction);
+			if (JSON_SUCCESS != parse_array_element(&copylex, &nullSemAction))
+				break;
 			if (copylex.token_type != JSON_TOKEN_COMMA)
 				break;
-			json_lex(&copylex);
+			INSIST(json_lex(&copylex));
 		}
 	}
-	lex_expect(JSON_PARSE_ARRAY_NEXT, &copylex, JSON_TOKEN_ARRAY_END);
+	INSIST(lex_expect(JSON_PARSE_ARRAY_NEXT, &copylex, JSON_TOKEN_ARRAY_END));
 
 	return count;
 }
@@ -331,7 +339,7 @@ json_count_array_elements(JsonLexContext *lex)
  *	  - object ( { } )
  *	  - object field
  */
-static inline void
+static inline JsonParseErrorType
 parse_scalar(JsonLexContext *lex, const JsonSemAction *sem)
 {
 	char	   *val = NULL;
@@ -348,14 +356,14 @@ parse_scalar(JsonLexContext *lex, const JsonSemAction *sem)
 		case JSON_TOKEN_NULL:
 			break;
 		default:
-			report_parse_error(JSON_PARSE_VALUE, lex);
+			return report_parse_error(JSON_PARSE_VALUE, lex);
 	}
 	
 	/* if no semantic function, just consume the token */
 	if (sfunc == NULL)
 	{
-		json_lex(lex);
-		return;
+		INSIST(json_lex(lex));
+		return JSON_SUCCESS;
 	}
 
 	/* extract the de-escaped string value, or the raw lexeme */
@@ -374,13 +382,14 @@ parse_scalar(JsonLexContext *lex, const JsonSemAction *sem)
     }
 
 	/* consume the token */
-	json_lex(lex);
+	INSIST(json_lex(lex));
 
 	/* invoke the callback */
 	(*sfunc) (sem->semstate, val, tok);
+	return JSON_SUCCESS;
 }
 
-static void
+static JsonParseErrorType
 parse_object_field(JsonLexContext *lex, const JsonSemAction *sem)
 {
 	/*
@@ -396,12 +405,12 @@ parse_object_field(JsonLexContext *lex, const JsonSemAction *sem)
 	JsonTokenType tok;
 
 	if (lex_peek(lex) != JSON_TOKEN_STRING)
-		report_parse_error(JSON_PARSE_STRING, lex);
+		return report_parse_error(JSON_PARSE_STRING, lex);
 	if ((ostart != NULL || oend != NULL) && lex->strval != NULL)
 		fname = pstrdup(lex->strval->data);
-	json_lex(lex);
+	INSIST(json_lex(lex));
 
-	lex_expect(JSON_PARSE_OBJECT_LABEL, lex, JSON_TOKEN_COLON);
+	INSIST(lex_expect(JSON_PARSE_OBJECT_LABEL, lex, JSON_TOKEN_COLON));
 
 	tok = lex_peek(lex);
 	isnull = tok == JSON_TOKEN_NULL;
@@ -412,20 +421,21 @@ parse_object_field(JsonLexContext *lex, const JsonSemAction *sem)
 	switch (tok)
 	{
 		case JSON_TOKEN_OBJECT_START:
-			parse_object(lex, sem);
+			INSIST(parse_object(lex, sem));
 			break;
 		case JSON_TOKEN_ARRAY_START:
-			parse_array(lex, sem);
+			INSIST(parse_array(lex, sem));
 			break;
 		default:
-			parse_scalar(lex, sem);
+			INSIST(parse_scalar(lex, sem));
 	}
 
 	if (oend != NULL)
 		(*oend) (sem->semstate, fname, isnull);
+	return JSON_SUCCESS;
 }
 
-static void
+static JsonParseErrorType
 parse_object(JsonLexContext *lex, const JsonSemAction *sem)
 {
 	/*
@@ -450,35 +460,36 @@ parse_object(JsonLexContext *lex, const JsonSemAction *sem)
 	lex->lex_level++;
 
 	Assert(lex_peek(lex) == JSON_TOKEN_OBJECT_START);
-	json_lex(lex);
+	INSIST(json_lex(lex));
 
 	tok = lex_peek(lex);
 	switch (tok)
 	{
 		case JSON_TOKEN_STRING:
-			parse_object_field(lex, sem);
+			INSIST(parse_object_field(lex, sem));
 			while (lex_peek(lex) == JSON_TOKEN_COMMA)
 			{
-				json_lex(lex);
-				parse_object_field(lex, sem);
+				INSIST(json_lex(lex));
+				INSIST(parse_object_field(lex, sem));
 			}
 			break;
 		case JSON_TOKEN_OBJECT_END:
 			break;
 		default:
 			/* case of an invalid initial token inside the object */
-			report_parse_error(JSON_PARSE_OBJECT_START, lex);
+			return report_parse_error(JSON_PARSE_OBJECT_START, lex);
 	}
 
-	lex_expect(JSON_PARSE_OBJECT_NEXT, lex, JSON_TOKEN_OBJECT_END);
+	INSIST(lex_expect(JSON_PARSE_OBJECT_NEXT, lex, JSON_TOKEN_OBJECT_END));
 
 	lex->lex_level--;
 
 	if (oend != NULL)
 		(*oend) (sem->semstate);
+	return JSON_SUCCESS;
 }
 
-static void
+static JsonParseErrorType
 parse_array_element(JsonLexContext *lex, const JsonSemAction *sem)
 {
 	json_aelem_action astart = sem->array_element_start;
@@ -496,20 +507,21 @@ parse_array_element(JsonLexContext *lex, const JsonSemAction *sem)
 	switch (tok)
 	{
 		case JSON_TOKEN_OBJECT_START:
-			parse_object(lex, sem);
+			INSIST(parse_object(lex, sem));
 			break;
 		case JSON_TOKEN_ARRAY_START:
-			parse_array(lex, sem);
+			INSIST(parse_array(lex, sem));
 			break;
 		default:
-			parse_scalar(lex, sem);
+			INSIST(parse_scalar(lex, sem));
 	}
 
 	if (aend != NULL)
 		(*aend) (sem->semstate, isnull);
+	return JSON_SUCCESS;
 }
 
-static void
+static JsonParseErrorType
 parse_array(JsonLexContext *lex, const JsonSemAction *sem)
 {
 	/*
@@ -532,31 +544,32 @@ parse_array(JsonLexContext *lex, const JsonSemAction *sem)
 	 */
 	lex->lex_level++;
 
-	lex_expect(JSON_PARSE_ARRAY_START, lex, JSON_TOKEN_ARRAY_START);
+	INSIST(lex_expect(JSON_PARSE_ARRAY_START, lex, JSON_TOKEN_ARRAY_START));
 	if (lex_peek(lex) != JSON_TOKEN_ARRAY_END)
 	{
 
-		parse_array_element(lex, sem);
+		INSIST(parse_array_element(lex, sem));
 
 		while (lex_peek(lex) == JSON_TOKEN_COMMA)
 		{
-			json_lex(lex);
-			parse_array_element(lex, sem);
+			INSIST(json_lex(lex));
+			INSIST(parse_array_element(lex, sem));
 		}
 	}
 
-	lex_expect(JSON_PARSE_ARRAY_NEXT, lex, JSON_TOKEN_ARRAY_END);
+	INSIST(lex_expect(JSON_PARSE_ARRAY_NEXT, lex, JSON_TOKEN_ARRAY_END));
 
 	lex->lex_level--;
 
 	if (aend != NULL)
 		(*aend) (sem->semstate);
+	return JSON_SUCCESS;
 }
 
 /*
  * Lex one token from the input stream.
  */
-static inline void
+JsonParseErrorType
 json_lex(JsonLexContext *lex)
 {
 	char	   *s;
@@ -619,12 +632,12 @@ json_lex(JsonLexContext *lex)
 				break;
 			case '"':
 				/* string */
-				json_lex_string(lex);
+				INSIST(json_lex_string(lex));
 				lex->token_type = JSON_TOKEN_STRING;
 				break;
 			case '-':
 				/* Negative number. */
-				json_lex_number(lex, s + 1, NULL, NULL);
+				INSIST(json_lex_number(lex, s + 1, NULL, NULL));
 				lex->token_type = JSON_TOKEN_NUMBER;
 				break;
 			case '0':
@@ -638,7 +651,7 @@ json_lex(JsonLexContext *lex)
 			case '8':
 			case '9':
 				/* Positive number. */
-				json_lex_number(lex, s, NULL, NULL);
+				INSIST(json_lex_number(lex, s, NULL, NULL));
 				lex->token_type = JSON_TOKEN_NUMBER;
 				break;
 			default:
@@ -666,7 +679,7 @@ json_lex(JsonLexContext *lex)
 					{
 						lex->prev_token_terminator = lex->token_terminator;
 						lex->token_terminator = s + 1;
-						report_invalid_token(lex);
+						return JSON_INVALID_TOKEN;
 					}
 
 					/*
@@ -683,21 +696,22 @@ json_lex(JsonLexContext *lex)
 						else if (memcmp(s, "null", 4) == 0)
 							lex->token_type = JSON_TOKEN_NULL;
 						else
-							report_invalid_token(lex);
+							return JSON_INVALID_TOKEN;
 					}
 					else if (p - s == 5 && memcmp(s, "false", 5) == 0)
 						lex->token_type = JSON_TOKEN_FALSE;
 					else
-						report_invalid_token(lex);
+						return JSON_INVALID_TOKEN;
 
 				}
 		}						/* end of switch */
+	return JSON_SUCCESS;
 }
 
 /*
  * The next token in the input stream is known to be a string; lex it.
  */
-static inline void
+static inline JsonParseErrorType
 json_lex_string(JsonLexContext *lex)
 {
 	char	   *s;
@@ -718,7 +732,7 @@ json_lex_string(JsonLexContext *lex)
 		if (len >= lex->input_length)
 		{
 			lex->token_terminator = s;
-			report_invalid_token(lex);
+			return JSON_INVALID_TOKEN;
 		}
 		else if (*s == '"')
 			break;
@@ -727,12 +741,7 @@ json_lex_string(JsonLexContext *lex)
 			/* Per RFC4627, these characters MUST be escaped. */
 			/* Since *s isn't printable, exclude it from the context string */
 			lex->token_terminator = s;
-			ereport(ERROR,
-					(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-					 errmsg("invalid input syntax for type %s", "json"),
-					 errdetail("Character with value 0x%02x must be escaped.",
-							   (unsigned char) *s),
-					 report_json_context(lex)));
+			return JSON_ESCAPING_REQUIRED;
 		}
 		else if (*s == '\\')
 		{
@@ -742,7 +751,7 @@ json_lex_string(JsonLexContext *lex)
 			if (len >= lex->input_length)
 			{
 				lex->token_terminator = s;
-				report_invalid_token(lex);
+				return JSON_INVALID_TOKEN;
 			}
 			else if (*s == 'u')
 			{
@@ -756,7 +765,7 @@ json_lex_string(JsonLexContext *lex)
 					if (len >= lex->input_length)
 					{
 						lex->token_terminator = s;
-						report_invalid_token(lex);
+						return JSON_INVALID_TOKEN;
 					}
 					else if (*s >= '0' && *s <= '9')
 						ch = (ch * 16) + (*s - '0');
@@ -767,12 +776,7 @@ json_lex_string(JsonLexContext *lex)
 					else
 					{
 						lex->token_terminator = s + pg_mblen(s);
-						ereport(ERROR,
-								(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-								 errmsg("invalid input syntax for type %s",
-										"json"),
-								 errdetail("\"\\u\" must be followed by four hexadecimal digits."),
-								 report_json_context(lex)));
+						return JSON_UNICODE_ESCAPE_FORMAT;
 					}
 				}
 				if (lex->strval != NULL)
@@ -783,33 +787,20 @@ json_lex_string(JsonLexContext *lex)
 					if (ch >= 0xd800 && ch <= 0xdbff)
 					{
 						if (hi_surrogate != -1)
-							ereport(ERROR,
-									(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-									 errmsg("invalid input syntax for type %s",
-											"json"),
-									 errdetail("Unicode high surrogate must not follow a high surrogate."),
-									 report_json_context(lex)));
+							return JSON_UNICODE_HIGH_SURROGATE;
 						hi_surrogate = (ch & 0x3ff) << 10;
 						continue;
 					}
 					else if (ch >= 0xdc00 && ch <= 0xdfff)
 					{
 						if (hi_surrogate == -1)
-							ereport(ERROR,
-									(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-									 errmsg("invalid input syntax for type %s", "json"),
-									 errdetail("Unicode low surrogate must follow a high surrogate."),
-									 report_json_context(lex)));
+							return JSON_UNICODE_LOW_SURROGATE;
 						ch = 0x10000 + hi_surrogate + (ch & 0x3ff);
 						hi_surrogate = -1;
 					}
 
 					if (hi_surrogate != -1)
-						ereport(ERROR,
-								(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-								 errmsg("invalid input syntax for type %s", "json"),
-								 errdetail("Unicode low surrogate must follow a high surrogate."),
-								 report_json_context(lex)));
+						return JSON_UNICODE_LOW_SURROGATE;
 
 					/*
 					 * For UTF8, replace the escape sequence by the actual
@@ -821,11 +812,7 @@ json_lex_string(JsonLexContext *lex)
 					if (ch == 0)
 					{
 						/* We can't allow this, since our TEXT type doesn't */
-						ereport(ERROR,
-								(errcode(ERRCODE_UNTRANSLATABLE_CHARACTER),
-								 errmsg("unsupported Unicode escape sequence"),
-								 errdetail("\\u0000 cannot be converted to text."),
-								 report_json_context(lex)));
+						return JSON_UNICODE_CODE_POINT_ZERO;
 					}
 					else if (GetDatabaseEncoding() == PG_UTF8)
 					{
@@ -843,25 +830,14 @@ json_lex_string(JsonLexContext *lex)
 						appendStringInfoChar(lex->strval, (char) ch);
 					}
 					else
-					{
-						ereport(ERROR,
-								(errcode(ERRCODE_UNTRANSLATABLE_CHARACTER),
-								 errmsg("unsupported Unicode escape sequence"),
-								 errdetail("Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8."),
-								 report_json_context(lex)));
-					}
+						return JSON_UNICODE_HIGH_ESCAPE;
 
 				}
 			}
 			else if (lex->strval != NULL)
 			{
 				if (hi_surrogate != -1)
-					ereport(ERROR,
-							(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-							 errmsg("invalid input syntax for type %s",
-									"json"),
-							 errdetail("Unicode low surrogate must follow a high surrogate."),
-							 report_json_context(lex)));
+					return JSON_UNICODE_LOW_SURROGATE;
 
 				switch (*s)
 				{
@@ -888,13 +864,7 @@ json_lex_string(JsonLexContext *lex)
 					default:
 						/* Not a valid string escape, so error out. */
 						lex->token_terminator = s + pg_mblen(s);
-						ereport(ERROR,
-								(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-								 errmsg("invalid input syntax for type %s",
-										"json"),
-								 errdetail("Escape sequence \"\\%s\" is invalid.",
-										   extract_mb_char(s)),
-								 report_json_context(lex)));
+						return JSON_ESCAPING_INVALID;
 				}
 			}
 			else if (strchr("\"\\/bfnrt", *s) == NULL)
@@ -907,39 +877,26 @@ json_lex_string(JsonLexContext *lex)
 				 * shown it's not a performance win.
 				 */
 				lex->token_terminator = s + pg_mblen(s);
-				ereport(ERROR,
-						(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-						 errmsg("invalid input syntax for type %s", "json"),
-						 errdetail("Escape sequence \"\\%s\" is invalid.",
-								   extract_mb_char(s)),
-						 report_json_context(lex)));
+				return JSON_ESCAPING_INVALID;
 			}
 
 		}
 		else if (lex->strval != NULL)
 		{
 			if (hi_surrogate != -1)
-				ereport(ERROR,
-						(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-						 errmsg("invalid input syntax for type %s", "json"),
-						 errdetail("Unicode low surrogate must follow a high surrogate."),
-						 report_json_context(lex)));
-
+				return JSON_UNICODE_LOW_SURROGATE;
 			appendStringInfoChar(lex->strval, *s);
 		}
 
 	}
 
 	if (hi_surrogate != -1)
-		ereport(ERROR,
-				(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-				 errmsg("invalid input syntax for type %s", "json"),
-				 errdetail("Unicode low surrogate must follow a high surrogate."),
-				 report_json_context(lex)));
+		return JSON_UNICODE_LOW_SURROGATE;
 
 	/* Hooray, we found the end of the string! */
 	lex->prev_token_terminator = lex->token_terminator;
 	lex->token_terminator = s + 1;
+	return JSON_SUCCESS;
 }
 
 /*
@@ -970,7 +927,7 @@ json_lex_string(JsonLexContext *lex)
  * raising an error for a badly-formed number.  Also, if total_len is not NULL
  * the distance from lex->input to the token end+1 is returned to *total_len.
  */
-static inline void
+static inline JsonParseErrorType
 json_lex_number(JsonLexContext *lex, char *s,
 				bool *num_err, int *total_len)
 {
@@ -1059,8 +1016,9 @@ json_lex_number(JsonLexContext *lex, char *s,
 		lex->token_terminator = s;
 		/* handle error if any */
 		if (error)
-			report_invalid_token(lex);
+			return JSON_INVALID_TOKEN;
 	}
+	return JSON_SUCCESS;
 }
 
 /*
@@ -1068,219 +1026,36 @@ json_lex_number(JsonLexContext *lex, char *s,
  *
  * lex->token_start and lex->token_terminator must identify the current token.
  */
-static void
+static JsonParseErrorType
 report_parse_error(JsonParseContext ctx, JsonLexContext *lex)
 {
-	char	   *token;
-	int			toklen;
-
 	/* Handle case where the input ended prematurely. */
 	if (lex->token_start == NULL || lex->token_type == JSON_TOKEN_END)
-		ereport(ERROR,
-				(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-				 errmsg("invalid input syntax for type %s", "json"),
-				 errdetail("The input string ended unexpectedly."),
-				 report_json_context(lex)));
-
-	/* Separate out the current token. */
-	toklen = lex->token_terminator - lex->token_start;
-	token = palloc(toklen + 1);
-	memcpy(token, lex->token_start, toklen);
-	token[toklen] = '\0';
-
-	/* Complain, with the appropriate detail message. */
-	if (ctx == JSON_PARSE_END)
-		ereport(ERROR,
-				(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-				 errmsg("invalid input syntax for type %s", "json"),
-				 errdetail("Expected end of input, but found \"%s\".",
-						   token),
-				 report_json_context(lex)));
-	else
-	{
-		switch (ctx)
-		{
-			case JSON_PARSE_VALUE:
-				ereport(ERROR,
-						(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-						 errmsg("invalid input syntax for type %s", "json"),
-						 errdetail("Expected JSON value, but found \"%s\".",
-								   token),
-						 report_json_context(lex)));
-				break;
-			case JSON_PARSE_STRING:
-				ereport(ERROR,
-						(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-						 errmsg("invalid input syntax for type %s", "json"),
-						 errdetail("Expected string, but found \"%s\".",
-								   token),
-						 report_json_context(lex)));
-				break;
-			case JSON_PARSE_ARRAY_START:
-				ereport(ERROR,
-						(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-						 errmsg("invalid input syntax for type %s", "json"),
-						 errdetail("Expected array element or \"]\", but found \"%s\".",
-								   token),
-						 report_json_context(lex)));
-				break;
-			case JSON_PARSE_ARRAY_NEXT:
-				ereport(ERROR,
-						(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-						 errmsg("invalid input syntax for type %s", "json"),
-						 errdetail("Expected \",\" or \"]\", but found \"%s\".",
-								   token),
-						 report_json_context(lex)));
-				break;
-			case JSON_PARSE_OBJECT_START:
-				ereport(ERROR,
-						(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-						 errmsg("invalid input syntax for type %s", "json"),
-						 errdetail("Expected string or \"}\", but found \"%s\".",
-								   token),
-						 report_json_context(lex)));
-				break;
-			case JSON_PARSE_OBJECT_LABEL:
-				ereport(ERROR,
-						(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-						 errmsg("invalid input syntax for type %s", "json"),
-						 errdetail("Expected \":\", but found \"%s\".",
-								   token),
-						 report_json_context(lex)));
-				break;
-			case JSON_PARSE_OBJECT_NEXT:
-				ereport(ERROR,
-						(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-						 errmsg("invalid input syntax for type %s", "json"),
-						 errdetail("Expected \",\" or \"}\", but found \"%s\".",
-								   token),
-						 report_json_context(lex)));
-				break;
-			case JSON_PARSE_OBJECT_COMMA:
-				ereport(ERROR,
-						(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-						 errmsg("invalid input syntax for type %s", "json"),
-						 errdetail("Expected string, but found \"%s\".",
-								   token),
-						 report_json_context(lex)));
-				break;
-			default:
-				elog(ERROR, "unexpected json parse state: %d", ctx);
-		}
-	}
-}
-
-/*
- * Report an invalid input token.
- *
- * lex->token_start and lex->token_terminator must identify the token.
- */
-static void
-report_invalid_token(JsonLexContext *lex)
-{
-	char	   *token;
-	int			toklen;
-
-	/* Separate out the offending token. */
-	toklen = lex->token_terminator - lex->token_start;
-	token = palloc(toklen + 1);
-	memcpy(token, lex->token_start, toklen);
-	token[toklen] = '\0';
-
-	ereport(ERROR,
-			(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-			 errmsg("invalid input syntax for type %s", "json"),
-			 errdetail("Token \"%s\" is invalid.", token),
-			 report_json_context(lex)));
-}
+		return JSON_EXPECTED_MORE;
 
-/*
- * Report a CONTEXT line for bogus JSON input.
- *
- * lex->token_terminator must be set to identify the spot where we detected
- * the error.  Note that lex->token_start might be NULL, in case we recognized
- * error at EOF.
- *
- * The return value isn't meaningful, but we make it non-void so that this
- * can be invoked inside ereport().
- */
-static int
-report_json_context(JsonLexContext *lex)
-{
-	const char *context_start;
-	const char *context_end;
-	const char *line_start;
-	int			line_number;
-	char	   *ctxt;
-	int			ctxtlen;
-	const char *prefix;
-	const char *suffix;
-
-	/* Choose boundaries for the part of the input we will display */
-	context_start = lex->input;
-	context_end = lex->token_terminator;
-	line_start = context_start;
-	line_number = 1;
-	for (;;)
+	switch (ctx)
 	{
-		/* Always advance over newlines */
-		if (context_start < context_end && *context_start == '\n')
-		{
-			context_start++;
-			line_start = context_start;
-			line_number++;
-			continue;
-		}
-		/* Otherwise, done as soon as we are close enough to context_end */
-		if (context_end - context_start < 50)
-			break;
-		/* Advance to next multibyte character */
-		if (IS_HIGHBIT_SET(*context_start))
-			context_start += pg_mblen(context_start);
-		else
-			context_start++;
+		case JSON_PARSE_END:
+			return JSON_EXPECTED_END;
+		case JSON_PARSE_VALUE:
+			return JSON_EXPECTED_JSON;
+		case JSON_PARSE_STRING:
+			return JSON_EXPECTED_STRING;
+		case JSON_PARSE_ARRAY_START:
+			return JSON_EXPECTED_ARRAY_FIRST;
+		case JSON_PARSE_ARRAY_NEXT:
+			return JSON_EXPECTED_ARRAY_NEXT;
+		case JSON_PARSE_OBJECT_START:
+			return JSON_EXPECTED_OBJECT_FIRST;
+		case JSON_PARSE_OBJECT_LABEL:
+			return JSON_EXPECTED_COLON;
+		case JSON_PARSE_OBJECT_NEXT:
+			return JSON_EXPECTED_OBJECT_NEXT;
+		case JSON_PARSE_OBJECT_COMMA:
+			return JSON_EXPECTED_STRING;
+		default:
+			return JSON_BAD_PARSER_STATE;;
 	}
-
-	/*
-	 * We add "..." to indicate that the excerpt doesn't start at the
-	 * beginning of the line ... but if we're within 3 characters of the
-	 * beginning of the line, we might as well just show the whole line.
-	 */
-	if (context_start - line_start <= 3)
-		context_start = line_start;
-
-	/* Get a null-terminated copy of the data to present */
-	ctxtlen = context_end - context_start;
-	ctxt = palloc(ctxtlen + 1);
-	memcpy(ctxt, context_start, ctxtlen);
-	ctxt[ctxtlen] = '\0';
-
-	/*
-	 * Show the context, prefixing "..." if not starting at start of line, and
-	 * suffixing "..." if not ending at end of line.
-	 */
-	prefix = (context_start > line_start) ? "..." : "";
-	suffix = (lex->token_type != JSON_TOKEN_END && context_end - lex->input < lex->input_length && *context_end != '\n' && *context_end != '\r') ? "..." : "";
-
-	return errcontext("JSON data, line %d: %s%s%s",
-					  line_number, prefix, ctxt, suffix);
-}
-
-/*
- * Extract a single, possibly multi-byte char from the input string.
- */
-static char *
-extract_mb_char(char *s)
-{
-	char	   *res;
-	int			len;
-
-	len = pg_mblen(s);
-	res = palloc(len + 1);
-	memcpy(res, s, len);
-	res[len] = '\0';
-
-	return res;
 }
 
 /*
@@ -2492,7 +2267,7 @@ json_typeof(PG_FUNCTION_ARGS)
 	lex = makeJsonLexContext(json, false);
 
 	/* Lex exactly one token from the input and check its type. */
-	json_lex(lex);
+	json_lex_or_throw(lex);
 	tok = lex_peek(lex);
 	switch (tok)
 	{
diff --git a/src/backend/utils/adt/jsonb.c b/src/backend/utils/adt/jsonb.c
index c95e112184..63072f616e 100644
--- a/src/backend/utils/adt/jsonb.c
+++ b/src/backend/utils/adt/jsonb.c
@@ -272,7 +272,7 @@ jsonb_from_cstring(char *json, int len)
 	sem.scalar = jsonb_in_scalar;
 	sem.object_field_start = jsonb_in_object_field_start;
 
-	pg_parse_json(lex, &sem);
+	pg_parse_json_or_throw(lex, &sem);
 
 	/* after parsing, the item member has the composed jsonb structure */
 	PG_RETURN_POINTER(JsonbValueToJsonb(state.res));
@@ -860,8 +860,7 @@ datum_to_jsonb(Datum val, bool is_null, JsonbInState *result,
 					sem.scalar = jsonb_in_scalar;
 					sem.object_field_start = jsonb_in_object_field_start;
 
-					pg_parse_json(lex, &sem);
-
+					pg_parse_json_or_throw(lex, &sem);
 				}
 				break;
 			case JSONBTYPE_JSONB:
diff --git a/src/backend/utils/adt/jsonfuncs.c b/src/backend/utils/adt/jsonfuncs.c
index 3979145ecc..be5d30239d 100644
--- a/src/backend/utils/adt/jsonfuncs.c
+++ b/src/backend/utils/adt/jsonfuncs.c
@@ -30,6 +30,7 @@
 #include "utils/json.h"
 #include "utils/jsonb.h"
 #include "utils/lsyscache.h"
+#include "utils/mbutils.h"
 #include "utils/memutils.h"
 #include "utils/syscache.h"
 #include "utils/typcache.h"
@@ -328,6 +329,9 @@ typedef struct JsObject
 			hash_destroy((jso)->val.json_hash); \
 	} while (0)
 
+/* functions for json parsing */
+static char *extract_mb_char(char *s);
+
 /* semantic action functions for json_object_keys */
 static void okeys_object_field_start(void *state, char *fname, bool isnull);
 static void okeys_array_start(void *state);
@@ -483,6 +487,23 @@ static void transform_string_values_object_field_start(void *state, char *fname,
 static void transform_string_values_array_element_start(void *state, bool isnull);
 static void transform_string_values_scalar(void *state, char *token, JsonTokenType tokentype);
 
+/*
+ * Extract a single, possibly multi-byte char from the input string.
+ */
+static char *
+extract_mb_char(char *s)
+{
+	char	   *res;
+	int			len;
+
+	len = pg_mblen(s);
+	res = palloc(len + 1);
+	memcpy(res, s, len);
+	res[len] = '\0';
+
+	return res;
+}
+
 /*
  * makeJsonLexContext
  *
@@ -625,7 +646,7 @@ json_object_keys(PG_FUNCTION_ARGS)
 		sem->object_field_start = okeys_object_field_start;
 		/* remainder are all NULL, courtesy of palloc0 above */
 
-		pg_parse_json(lex, sem);
+		pg_parse_json_or_throw(lex, sem);
 		/* keys are now in state->result */
 
 		pfree(lex->strval->data);
@@ -656,6 +677,78 @@ json_object_keys(PG_FUNCTION_ARGS)
 	SRF_RETURN_DONE(funcctx);
 }
 
+/*
+ * Report a CONTEXT line for bogus JSON input.
+ *
+ * lex->token_terminator must be set to identify the spot where we detected
+ * the error.  Note that lex->token_start might be NULL, in case we recognized
+ * error at EOF.
+ *
+ * The return value isn't meaningful, but we make it non-void so that this
+ * can be invoked inside ereport().
+ */
+int
+report_json_context(JsonLexContext *lex)
+{
+	const char *context_start;
+	const char *context_end;
+	const char *line_start;
+	int			line_number;
+	char	   *ctxt;
+	int			ctxtlen;
+	const char *prefix;
+	const char *suffix;
+
+	/* Choose boundaries for the part of the input we will display */
+	context_start = lex->input;
+	context_end = lex->token_terminator;
+	line_start = context_start;
+	line_number = 1;
+	for (;;)
+	{
+		/* Always advance over newlines */
+		if (context_start < context_end && *context_start == '\n')
+		{
+			context_start++;
+			line_start = context_start;
+			line_number++;
+			continue;
+		}
+		/* Otherwise, done as soon as we are close enough to context_end */
+		if (context_end - context_start < 50)
+			break;
+		/* Advance to next multibyte character */
+		if (IS_HIGHBIT_SET(*context_start))
+			context_start += pg_mblen(context_start);
+		else
+			context_start++;
+	}
+
+	/*
+	 * We add "..." to indicate that the excerpt doesn't start at the
+	 * beginning of the line ... but if we're within 3 characters of the
+	 * beginning of the line, we might as well just show the whole line.
+	 */
+	if (context_start - line_start <= 3)
+		context_start = line_start;
+
+	/* Get a null-terminated copy of the data to present */
+	ctxtlen = context_end - context_start;
+	ctxt = palloc(ctxtlen + 1);
+	memcpy(ctxt, context_start, ctxtlen);
+	ctxt[ctxtlen] = '\0';
+
+	/*
+	 * Show the context, prefixing "..." if not starting at start of line, and
+	 * suffixing "..." if not ending at end of line.
+	 */
+	prefix = (context_start > line_start) ? "..." : "";
+	suffix = (lex->token_type != JSON_TOKEN_END && context_end - lex->input < lex->input_length && *context_end != '\n' && *context_end != '\r') ? "..." : "";
+
+	return errcontext("JSON data, line %d: %s%s%s",
+					  line_number, prefix, ctxt, suffix);
+}
+
 static void
 okeys_object_field_start(void *state, char *fname, bool isnull)
 {
@@ -1019,7 +1112,7 @@ get_worker(text *json,
 		sem->array_element_end = get_array_element_end;
 	}
 
-	pg_parse_json(lex, sem);
+	pg_parse_json_or_throw(lex, sem);
 
 	return state->tresult;
 }
@@ -1567,7 +1660,7 @@ json_array_length(PG_FUNCTION_ARGS)
 	sem->scalar = alen_scalar;
 	sem->array_element_start = alen_array_element_start;
 
-	pg_parse_json(lex, sem);
+	pg_parse_json_or_throw(lex, sem);
 
 	PG_RETURN_INT32(state->count);
 }
@@ -1662,6 +1755,145 @@ jsonb_each_text(PG_FUNCTION_ARGS)
 	return each_worker_jsonb(fcinfo, "jsonb_each_text", true);
 }
 
+static char *
+extract_token(JsonLexContext *lex)
+{
+	int toklen = lex->token_terminator - lex->token_start;
+	char *token = palloc(toklen + 1);
+
+	memcpy(token, lex->token_start, toklen);
+	token[toklen] = '\0';
+	return token;
+}
+
+void
+throw_json_parse_error(JsonParseErrorType error, JsonLexContext *lex)
+{
+	switch (error)
+	{
+		case JSON_SUCCESS:
+			elog(ERROR, "internal error in json parser");
+			break;
+		case JSON_ESCAPING_INVALID:
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+					 errmsg("invalid input syntax for type %s", "json"),
+					 errdetail("Escape sequence \"\\%s\" is invalid.",
+							   extract_mb_char(lex->token_terminator - 1)), // XXX WRONG AND BUSTED
+					 report_json_context(lex)));
+		case JSON_ESCAPING_REQUIRED:
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+					 errmsg("invalid input syntax for type %s", "json"),
+					 errdetail("Character with value 0x%02x must be escaped.",
+							   (unsigned char) *(lex->token_terminator)),
+					 report_json_context(lex)));
+		case JSON_EXPECTED_END:
+			ereport(ERROR,
+					 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+					  errmsg("invalid input syntax for type %s", "json"),
+					  errdetail("Expected end of input, but found \"%s\".",
+								extract_token(lex)),
+					  report_json_context(lex)));
+		case JSON_EXPECTED_ARRAY_FIRST:
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+					 errmsg("invalid input syntax for type %s", "json"),
+					 errdetail("Expected array element or \"]\", but found \"%s\".",
+							   extract_token(lex)),
+					 report_json_context(lex)));
+		case JSON_EXPECTED_ARRAY_NEXT:
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+					 errmsg("invalid input syntax for type %s", "json"),
+					 errdetail("Expected \",\" or \"]\", but found \"%s\".",
+							   extract_token(lex)),
+					 report_json_context(lex)));
+		case JSON_EXPECTED_COLON:
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+					 errmsg("invalid input syntax for type %s", "json"),
+					 errdetail("Expected \":\", but found \"%s\".",
+							   extract_token(lex)),
+					 report_json_context(lex)));
+		case JSON_EXPECTED_JSON:
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+					 errmsg("invalid input syntax for type %s", "json"),
+					 errdetail("Expected JSON value, but found \"%s\".",
+							   extract_token(lex)),
+					 report_json_context(lex)));
+		case JSON_EXPECTED_MORE:
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+					 errmsg("invalid input syntax for type %s", "json"),
+					 errdetail("The input string ended unexpectedly."),
+					 report_json_context(lex)));
+		case JSON_EXPECTED_OBJECT_FIRST:
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+					 errmsg("invalid input syntax for type %s", "json"),
+					 errdetail("Expected string or \"}\", but found \"%s\".",
+							   extract_token(lex)),
+					 report_json_context(lex)));
+		case JSON_EXPECTED_OBJECT_NEXT:
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+					 errmsg("invalid input syntax for type %s", "json"),
+					 errdetail("Expected \",\" or \"}\", but found \"%s\".",
+							   extract_token(lex)),
+					 report_json_context(lex)));
+		case JSON_EXPECTED_STRING:
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+					 errmsg("invalid input syntax for type %s", "json"),
+					 errdetail("Expected string, but found \"%s\".",
+							   extract_token(lex)),
+					 report_json_context(lex)));
+		case JSON_INVALID_TOKEN:
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+					 errmsg("invalid input syntax for type %s", "json"),
+					 errdetail("Token \"%s\" is invalid.", extract_token(lex)),
+					 report_json_context(lex)));
+		case JSON_UNICODE_CODE_POINT_ZERO:
+			ereport(ERROR,
+					(errcode(ERRCODE_UNTRANSLATABLE_CHARACTER),
+					 errmsg("unsupported Unicode escape sequence"),
+					 errdetail("\\u0000 cannot be converted to text."),
+					 report_json_context(lex)));
+		case JSON_UNICODE_ESCAPE_FORMAT:
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+					 errmsg("invalid input syntax for type %s", "json"),
+					 errdetail("\"\\u\" must be followed by four hexadecimal digits."),
+					 report_json_context(lex)));
+		case JSON_UNICODE_HIGH_ESCAPE:
+			ereport(ERROR,
+					(errcode(ERRCODE_UNTRANSLATABLE_CHARACTER),
+					 errmsg("unsupported Unicode escape sequence"),
+					 errdetail("Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8."),
+					 report_json_context(lex)));
+		case JSON_UNICODE_HIGH_SURROGATE:
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+					 errmsg("invalid input syntax for type %s", "json"),
+					 errdetail("Unicode high surrogate must not follow a high surrogate."),
+					 report_json_context(lex)));
+		case JSON_UNICODE_LOW_SURROGATE:
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+					 errmsg("invalid input syntax for type %s", "json"),
+					 errdetail("Unicode low surrogate must follow a high surrogate."),
+					 report_json_context(lex)));
+		case JSON_BAD_PARSER_STATE:
+			ereport(ERROR,
+					(errcode(ERRCODE_INTERNAL_ERROR),
+					 errmsg("json parser encountered an internally inconsistent state"),
+					 report_json_context(lex)));	/* TODO: Is it safe to call report_json_context here?  Perhaps the bad parser state causes problems? */
+	}
+}
+
 static Datum
 each_worker_jsonb(FunctionCallInfo fcinfo, const char *funcname, bool as_text)
 {
@@ -1833,7 +2065,7 @@ each_worker(FunctionCallInfo fcinfo, bool as_text)
 										   "json_each temporary cxt",
 										   ALLOCSET_DEFAULT_SIZES);
 
-	pg_parse_json(lex, sem);
+	pg_parse_json_or_throw(lex, sem);
 
 	MemoryContextDelete(state->tmp_cxt);
 
@@ -2132,7 +2364,7 @@ elements_worker(FunctionCallInfo fcinfo, const char *funcname, bool as_text)
 										   "json_array_elements temporary cxt",
 										   ALLOCSET_DEFAULT_SIZES);
 
-	pg_parse_json(lex, sem);
+	pg_parse_json_or_throw(lex, sem);
 
 	MemoryContextDelete(state->tmp_cxt);
 
@@ -2504,7 +2736,7 @@ populate_array_json(PopulateArrayContext *ctx, char *json, int len)
 	sem.array_element_end = populate_array_element_end;
 	sem.scalar = populate_array_scalar;
 
-	pg_parse_json(state.lex, &sem);
+	pg_parse_json_or_throw(state.lex, &sem);
 
 	/* number of dimensions should be already known */
 	Assert(ctx->ndims > 0 && ctx->dims);
@@ -3361,7 +3593,7 @@ get_json_object_as_hash(char *json, int len, const char *funcname)
 	sem->object_field_start = hash_object_field_start;
 	sem->object_field_end = hash_object_field_end;
 
-	pg_parse_json(lex, sem);
+	pg_parse_json_or_throw(lex, sem);
 
 	return tab;
 }
@@ -3660,7 +3892,7 @@ populate_recordset_worker(FunctionCallInfo fcinfo, const char *funcname,
 
 		state->lex = lex;
 
-		pg_parse_json(lex, sem);
+		pg_parse_json_or_throw(lex, sem);
 	}
 	else
 	{
@@ -3990,7 +4222,7 @@ json_strip_nulls(PG_FUNCTION_ARGS)
 	sem->array_element_start = sn_array_element_start;
 	sem->object_field_start = sn_object_field_start;
 
-	pg_parse_json(lex, sem);
+	pg_parse_json_or_throw(lex, sem);
 
 	PG_RETURN_TEXT_P(cstring_to_text_with_len(state->strval->data,
 											  state->strval->len));
@@ -5129,7 +5361,7 @@ iterate_json_values(text *json, uint32 flags, void *action_state,
 	sem->scalar = iterate_values_scalar;
 	sem->object_field_start = iterate_values_object_field_start;
 
-	pg_parse_json(lex, sem);
+	pg_parse_json_or_throw(lex, sem);
 }
 
 /*
@@ -5249,7 +5481,7 @@ transform_json_string_values(text *json, void *action_state,
 	sem->array_element_start = transform_string_values_array_element_start;
 	sem->object_field_start = transform_string_values_object_field_start;
 
-	pg_parse_json(lex, sem);
+	pg_parse_json_or_throw(lex, sem);
 
 	return cstring_to_text_with_len(state->strval->data, state->strval->len);
 }
diff --git a/src/include/common/jsonapi.h b/src/include/common/jsonapi.h
index 581fd48036..162437193a 100644
--- a/src/include/common/jsonapi.h
+++ b/src/include/common/jsonapi.h
@@ -33,6 +33,28 @@ typedef enum
 	JSON_TOKEN_END
 } JsonTokenType;
 
+typedef enum
+{
+	JSON_SUCCESS = 0,
+	JSON_ESCAPING_INVALID,
+	JSON_ESCAPING_REQUIRED,
+	JSON_EXPECTED_ARRAY_FIRST,
+	JSON_EXPECTED_ARRAY_NEXT,
+	JSON_EXPECTED_COLON,
+	JSON_EXPECTED_END,
+	JSON_EXPECTED_JSON,
+	JSON_EXPECTED_MORE,
+	JSON_EXPECTED_OBJECT_FIRST,
+	JSON_EXPECTED_OBJECT_NEXT,
+	JSON_EXPECTED_STRING,
+	JSON_INVALID_TOKEN,
+	JSON_UNICODE_CODE_POINT_ZERO,
+	JSON_UNICODE_ESCAPE_FORMAT,
+	JSON_UNICODE_HIGH_ESCAPE,
+	JSON_UNICODE_HIGH_SURROGATE,
+	JSON_UNICODE_LOW_SURROGATE,
+	JSON_BAD_PARSER_STATE
+} JsonParseErrorType;
 
 /*
  * All the fields in this structure should be treated as read-only.
@@ -104,7 +126,13 @@ extern const JsonSemAction nullSemAction;
  * points to. If the action pointers are NULL the parser
  * does nothing and just continues.
  */
-extern void pg_parse_json(JsonLexContext *lex, const JsonSemAction *sem);
+extern JsonParseErrorType pg_parse_json(JsonLexContext *lex, const JsonSemAction *sem) __attribute__((warn_unused_result));
+
+/*
+ * Lex one token from the input stream.
+ */
+extern JsonParseErrorType json_lex(JsonLexContext *lex) __attribute__((warn_unused_result));
+
 
 /*
  * json_count_array_elements performs a fast secondary parse to determine the
diff --git a/src/include/utils/jsonfuncs.h b/src/include/utils/jsonfuncs.h
index bade7248f9..82a56eaf06 100644
--- a/src/include/utils/jsonfuncs.h
+++ b/src/include/utils/jsonfuncs.h
@@ -25,5 +25,39 @@ extern char *JsonEncodeDateTime(char *buf, Datum value, Oid typid,
 extern text *transform_json_string_values(text *json, void *action_state,
 										  JsonTransformStringValuesAction transform_action);
 extern JsonLexContext *makeJsonLexContext(text *json, bool need_escapes);
+extern int	report_json_context(JsonLexContext *lex);
+
+extern void throw_json_parse_error(JsonParseErrorType error, JsonLexContext *lex) pg_attribute_noreturn();
+
+static inline void pg_parse_json_or_throw(JsonLexContext *lex, const JsonSemAction *sem);
+static inline void json_lex_or_throw(JsonLexContext *lex);
+
+#define PARSE_OR_THROW(x, lex) \
+do { \
+	JsonParseErrorType	parse_result; \
+	if ((parse_result = (x)) != JSON_SUCCESS) \
+		throw_json_parse_error(parse_result, (lex)); \
+} while (0)
+
+/*
+ * pg_parse_json will parse the string in the lex calling the
+ * action functions in sem at the appropriate points. It is
+ * up to them to keep what state they need	in semstate. If they
+ * need access to the state of the lexer, then its pointer
+ * should be passed to them as a member of whatever semstate
+ * points to. If the action pointers are NULL the parser
+ * does nothing and just continues.
+ */
+static inline void
+pg_parse_json_or_throw(JsonLexContext *lex, const JsonSemAction *sem)
+{
+	PARSE_OR_THROW(pg_parse_json(lex, sem), lex);
+}
+
+static inline void
+json_lex_or_throw(JsonLexContext *lex)
+{
+	PARSE_OR_THROW(json_lex(lex), lex);
+}
 
 #endif							/* JSONFUNCS_H */
-- 
2.21.1 (Apple Git-122.3)