diff --git a/examples/simple_parse.c b/examples/simple_parse.c index 044337b..905068e 100644 --- a/examples/simple_parse.c +++ b/examples/simple_parse.c @@ -158,7 +158,10 @@ json_t *load_json(const char *text) { if (root) { return root; } else { - fprintf(stderr, "json error on line %d: %s\n", error.line, error.text); + char *detailed = json_error_get_detailed(&error, text, + JSON_ERROR_COLOR | JSON_ERROR_ARROW_LEN(4)); + fprintf(stderr, "Error:\n%s\n", detailed); + free(detailed); return (json_t *)0; } } diff --git a/src/error.c b/src/error.c index 58c8379..fee1c12 100644 --- a/src/error.c +++ b/src/error.c @@ -1,5 +1,6 @@ #include #include "jansson_private.h" +#include "utf.h" void jsonp_error_init(json_error_t *error, const char *source) { @@ -61,3 +62,202 @@ void jsonp_error_vset(json_error_t *error, int line, int column, vsnprintf(error->text, JSON_ERROR_TEXT_LENGTH, msg, ap); error->text[JSON_ERROR_TEXT_LENGTH - 1] = '\0'; } + +static size_t json_error_get_utf8_column(json_error_t *error, const char *src) +{ + size_t i = 0; + const char *s = src; + const char *colend = src + error->column; + + while (s < colend) { + if (!(s = utf8_iterate(s, colend - s, NULL))) + return error->column; + i++; + } + + return i; +} + +static char *json_error_get_source_text(json_error_t *error, const char *src) +{ + const char *start; + const char *end; + size_t len; + char *s; + + // TODO: Pick start properly so we don't split a UTF-8 code point. + start = &src[(error->position - error->column)]; + end = strchr(start, '\n'); + + if (!end) { + end = src + strlen(src); + } + + len = (end - start) + 2; + + if (!(s = jsonp_malloc(len))) { + return NULL; + } + + if (snprintf(s, len - 1, "%.*s", (int)len - 2, start) < 0) { + free(s); + return NULL; + } + + return s; +} + +static char *json_error_get_arrow(json_error_t *error, + const char *src, size_t flags) +{ + size_t msglen; + int offset = 0; + int ret = 0; + char *msg; + size_t utf8_column = 0; + int arrowlen = flags & JSON_ERROR_ARROW_MAXLEN; + #define DEFAULT_ARROWLEN 5 + const char padchars[] = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"; + + if (strlen(src) < 2) { + return jsonp_strdup(""); + } + + if (arrowlen < 0) { + arrowlen = DEFAULT_ARROWLEN; + } + + if (arrowlen >= (int)sizeof(padchars)) { + arrowlen = sizeof(padchars); + } + + msglen = (error->column + strlen(error->text) + arrowlen + 1) * 2; + + if (!(msg = jsonp_malloc(msglen))) { + return NULL; + } + + msglen--; + + #ifndef _WIN32 + if (flags & JSON_ERROR_COLOR) { + if ((ret = snprintf(&msg[offset], msglen, "%s", "\x1b[01;32m")) < 0) { + goto fail; + } + } + offset += ret; + #endif // _WIN32 + + // TODO: Make sure this works on Windows. + // TODO: How to check if console supports UTF-8? If not we want normal column value. + // Get the error column based on UTF-8 code points + // so that the arrow points in the correct position. + utf8_column = json_error_get_utf8_column(error, src); + + if ((utf8_column + strlen(error->text) + (arrowlen + 1)) > JSON_ERROR_SOURCE_LENGTH) { + // Flip the arrow if the column is too far to the right. + if ((ret = snprintf(&msg[offset], msglen - offset, + "%*.*s^", arrowlen, arrowlen, padchars)) < 0) { + goto fail; + } + } else { + + // Print the arrow. + if ((ret = snprintf(&msg[offset], msglen - offset, + "%*s^%*.*s", + (int)utf8_column, "", + arrowlen, arrowlen, padchars)) < 0) { + goto fail; + } + } + + offset += ret; + + #ifndef _WIN32 + if (flags & JSON_ERROR_COLOR) { + if (snprintf(&msg[offset], msglen - offset, + "%s", "\x1b[0m\x1b[0m") < 0) { + goto fail; + } + } + #endif // _WIN32 + + return msg; +fail: + if (msg) jsonp_free(msg); + return NULL; +} + +char *json_error_get_detailed(json_error_t *error, const char *src, size_t flags) +{ + char *problem_src = NULL; + char *arrow = NULL; + char *s = NULL; + size_t len; + size_t arrowlen = (flags & JSON_ERROR_ARROW_MAXLEN) + 1; + size_t textlen; + size_t utf8_column; + size_t srclen; + size_t total; + + if (!(problem_src = json_error_get_source_text(error, src))) { + return NULL; + } + + if (!(arrow = json_error_get_arrow(error, src, flags))) { + goto fail; + } + + textlen = strlen(error->text); + srclen = strlen(problem_src); + utf8_column = json_error_get_utf8_column(error, src); + total = (utf8_column + arrowlen + textlen + 3); + //problem_src[error->column]= '_'; + + len = (srclen + arrowlen + textlen) * 2; + + if (!(s = malloc(len))) { + goto fail; + } + + // If the error message goes outside of the console width, flip it! + if (total > JSON_ERROR_SOURCE_LENGTH) { + if (snprintf(s, len - 1, "%s\n%*s(%s) %s\n", + problem_src, + (int)(utf8_column - textlen - 3 - arrowlen), "", + error->text, arrow) < 0) { + goto fail; + } + } else { + + if (snprintf(s, len - 1, "%s\n%s (%s)\n", + problem_src, arrow, error->text) < 0) { + goto fail; + } + } + + jsonp_free(problem_src); + jsonp_free(arrow); + + return s; +fail: + if (problem_src) jsonp_free(problem_src); + if (arrow) jsonp_free(arrow); + if (s) jsonp_free(s); + return NULL; +} + +void json_error_print_detailed(FILE *fd, json_error_t *error, const char *src, size_t flags) +{ + char *d; + + if (!(d = json_error_get_detailed(error, src, flags))) { + // Since we're reporting an error, at least report something! + fprintf(fd, "%s\n", error->text); + return; + } + + fprintf(fd, "%s", d); + jsonp_free(d); +} + diff --git a/src/jansson.def b/src/jansson.def index da4cfd4..85c779a 100644 --- a/src/jansson.def +++ b/src/jansson.def @@ -66,4 +66,6 @@ EXPORTS json_unpack_ex json_vunpack_ex json_set_alloc_funcs + json_error_get_detailed + json_error_print_detailed diff --git a/src/jansson.h b/src/jansson.h index 6f7fd07..30eda0b 100644 --- a/src/jansson.h +++ b/src/jansson.h @@ -126,6 +126,13 @@ typedef struct { char text[JSON_ERROR_TEXT_LENGTH]; } json_error_t; +#define JSON_ERROR_ARROW_MAXLEN 0x1f +#define JSON_ERROR_ARROW_LEN(n) ((n) & JSON_ERROR_ARROW_MAXLEN) +#define JSON_ERROR_COLOR 0x20 +#define JSON_ERROR_FLIP 0x40 + +char *json_error_get_detailed(json_error_t *error, const char *src, size_t flags); +void json_error_print_detailed(FILE *fd, json_error_t *error, const char *src, size_t flags); /* getters, setters, manipulation */ diff --git a/src/utf.c b/src/utf.c index b56e125..d0ad389 100644 --- a/src/utf.c +++ b/src/utf.c @@ -185,3 +185,73 @@ int utf8_check_string(const char *string, size_t length) return 1; } + +// +// This code is from: +// http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ +// +// Copyright (c) 2008-2009 Bjoern Hoehrmann +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS IN THE SOFTWARE. +// +#define UTF8_ACCEPT 0 +#define UTF8_REJECT 1 + +static const uint8_t utf8d[] = { + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 00..1f + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 20..3f + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 40..5f + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 60..7f + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, // 80..9f + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, // a0..bf + 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // c0..df + 0xa,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x4,0x3,0x3, // e0..ef + 0xb,0x6,0x6,0x6,0x5,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8, // f0..ff + 0x0,0x1,0x2,0x3,0x5,0x8,0x7,0x1,0x1,0x1,0x4,0x6,0x1,0x1,0x1,0x1, // s0..s0 + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1, // s1..s2 + 1,2,1,1,1,1,1,2,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1, // s3..s4 + 1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,3,1,3,1,1,1,1,1,1, // s5..s6 + 1,3,1,1,1,1,1,3,1,3,1,1,1,1,1,1,1,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // s7..s8 +}; + +static uint32_t decode(uint32_t* state, uint32_t* codep, uint32_t byte) +{ + uint32_t type = utf8d[byte]; + + *codep = (*state != UTF8_ACCEPT) ? + (byte & 0x3fu) | (*codep << 6) : + (0xff >> type) & (byte); + + *state = utf8d[256 + (*state) * 16 + type]; + return *state; +} + +int utf8_strlen(const char *s, size_t *count) +{ + uint32_t codepoint; + uint32_t state = UTF8_ACCEPT; + + for (*count = 0; *s; ++s) { + if (!decode(&state, &codepoint, (uint8_t)*s)) { + (*count)++; + } + } + + return state != UTF8_ACCEPT; +} diff --git a/src/utf.h b/src/utf.h index 2cebea0..9ffabae 100644 --- a/src/utf.h +++ b/src/utf.h @@ -23,5 +23,6 @@ size_t utf8_check_full(const char *buffer, size_t size, int32_t *codepoint); const char *utf8_iterate(const char *buffer, size_t size, int32_t *codepoint); int utf8_check_string(const char *string, size_t length); +int utf8_strlen(const char *str, size_t *count); #endif