From bb77165f704b413a531e6cd4c29ce66b33675e09 Mon Sep 17 00:00:00 2001 From: Krystian Chmielewski Date: Fri, 29 Oct 2021 11:18:09 +0000 Subject: [PATCH] remove regex from yaml parser Signed-off-by: Krystian Chmielewski --- .../device_binary_format/yaml/yaml_parser.cpp | 39 ++++++++++++++++++- .../device_binary_format/yaml/yaml_parser.h | 3 +- .../yaml/yaml_parser_tests.cpp | 14 ++++++- 3 files changed, 51 insertions(+), 5 deletions(-) diff --git a/shared/source/device_binary_format/yaml/yaml_parser.cpp b/shared/source/device_binary_format/yaml/yaml_parser.cpp index 69eba7f34d..3c389fd0dd 100644 --- a/shared/source/device_binary_format/yaml/yaml_parser.cpp +++ b/shared/source/device_binary_format/yaml/yaml_parser.cpp @@ -108,6 +108,43 @@ bool tokenizeEndLine(ConstStringRef text, LinesCache &outLines, TokensCache &out return true; } +bool isValidInlineCollectionFormat(const char *context, const char *contextEnd) { + auto consumeAlphaNum = [](const char *&text) { + while (isAlphaNumeric(*text)) { + text++; + } + }; + + bool endNum = false; + bool endCollection = false; + context++; // skip '[' + while (context < contextEnd && *context != '\n') { + if (isWhitespace(*context)) { + context++; + } else if (false == endNum) { + if (isAlphaNumeric(*context)) { + consumeAlphaNum(context); + endNum = true; + } else { + return false; + } + } else if (false == endCollection) { + if (*context == ',') { + context++; + endNum = false; + } else if (*context == ']') { + context++; + endCollection = true; + } else { + return false; + } + } else { + return false; + } + } + return endCollection; +} + bool tokenize(ConstStringRef text, LinesCache &outLines, TokensCache &outTokens, std::string &outErrReason, std::string &outWarning) { if (text.empty()) { outWarning.append("NEO::Yaml : input text is empty\n"); @@ -200,7 +237,7 @@ bool tokenize(ConstStringRef text, LinesCache &outLines, TokensCache &outTokens, break; } case '[': - if (false == std::regex_search(context.pos, inlineCollectionRegex)) { + if (false == isValidInlineCollectionFormat(context.pos, text.end())) { outErrReason = constructYamlError(outLines.size(), context.lineBeginPos, context.pos, inlineCollectionYamlErrorMsg.data()); return false; } diff --git a/shared/source/device_binary_format/yaml/yaml_parser.h b/shared/source/device_binary_format/yaml/yaml_parser.h index d4856f5386..315540a00d 100644 --- a/shared/source/device_binary_format/yaml/yaml_parser.h +++ b/shared/source/device_binary_format/yaml/yaml_parser.h @@ -12,7 +12,6 @@ #include "shared/source/utilities/stackvec.h" #include -#include #include namespace NEO { @@ -256,7 +255,7 @@ using LinesCache = StackVec; std::string constructYamlError(size_t lineNumber, const char *lineBeg, const char *parsePos, const char *reason = nullptr); -static std::regex inlineCollectionRegex(R"regex(^\[(\s*(\d|\w)+,?)+\s*\]\s*\n)regex"); +bool isValidInlineCollectionFormat(const char *context, const char *contextEnd); constexpr ConstStringRef inlineCollectionYamlErrorMsg = "NEO::Yaml : Inline collection is not in valid regex format - ^\\[(\\s*(\\d|\\w)+,?)+\\s*\\]\\s*\\n"; bool tokenize(ConstStringRef text, LinesCache &outLines, TokensCache &outTokens, std::string &outErrReason, std::string &outWarning); diff --git a/shared/test/unit_test/device_binary_format/yaml/yaml_parser_tests.cpp b/shared/test/unit_test/device_binary_format/yaml/yaml_parser_tests.cpp index ad4f7463c5..8633290587 100644 --- a/shared/test/unit_test/device_binary_format/yaml/yaml_parser_tests.cpp +++ b/shared/test/unit_test/device_binary_format/yaml/yaml_parser_tests.cpp @@ -166,6 +166,11 @@ TEST(YamlIsMatched, WhenTextIsTooShortThenReturnFalse) { EXPECT_FALSE(NEO::Yaml::isMatched(text, text.begin(), "bcd")); } +TEST(YamlIsValidInlineCollectionFormat, WhenEndIsReachedThenReturnFalse) { + const char coll[8] = "[ 1, 2 "; + EXPECT_FALSE(NEO::Yaml::isValidInlineCollectionFormat(coll, coll + 7)); +} + TEST(YamlConsumeNumberOrSign, GivenValidNumberOrSignThenReturnProperEndingPosition) { ConstStringRef plus5 = "a+5"; ConstStringRef minus7 = "b -7 ["; @@ -479,7 +484,12 @@ TEST(YamlTokenize, GivenInvalidInlineCollectionThenEmitsError) { warnings.clear(); errors.clear(); - success = NEO::Yaml::tokenize("[[1,2,3,4]]\n", lines, tokens, errors, warnings); + success = NEO::Yaml::tokenize("[[1,2,3,4]\n", lines, tokens, errors, warnings); + EXPECT_FALSE(success); + EXPECT_STREQ("NEO::Yaml : Could not parse line : [0] : [[] <-- parser position on error. Reason : NEO::Yaml : Inline collection is not in valid regex format - ^\\[(\\s*(\\d|\\w)+,?)+\\s*\\]\\s*\\n\n", errors.c_str()); + EXPECT_TRUE(warnings.empty()) << warnings; + + success = NEO::Yaml::tokenize("[1 2 3 4]\n", lines, tokens, errors, warnings); EXPECT_FALSE(success); EXPECT_STREQ("NEO::Yaml : Could not parse line : [0] : [[] <-- parser position on error. Reason : NEO::Yaml : Inline collection is not in valid regex format - ^\\[(\\s*(\\d|\\w)+,?)+\\s*\\]\\s*\\n\n", errors.c_str()); EXPECT_TRUE(warnings.empty()) << warnings; @@ -1317,7 +1327,7 @@ TEST(YamlBuildTree, GivenInlineCollectionThenProperlyCreatesChildNodes) { R"===( banana : yellow kiwi : green - apple : [red, green, blue] + apple : [ red, green, blue ] pear : pearish )===";