Add inline collection support in yaml parser

Added support for inline collection in following syntax: [1, 2, 3]

Signed-off-by: Krystian Chmielewski <krystian.chmielewski@intel.com>
This commit is contained in:
Krystian Chmielewski
2021-10-25 15:50:14 +00:00
committed by Compute-Runtime-Automation
parent 6769df2f5d
commit 174c1dfe64
3 changed files with 173 additions and 64 deletions

View File

@@ -199,12 +199,34 @@ bool tokenize(ConstStringRef text, LinesCache &outLines, TokensCache &outTokens,
context.isParsingIdent = false;
break;
}
case '[':
if (false == std::regex_search(context.pos, inlineCollectionRegex)) {
outErrReason = constructYamlError(outLines.size(), context.lineBeginPos, context.pos, inlineCollectionYamlErrorMsg.data());
return false;
}
context.lineTraits.hasInlineDataMarkers = true;
outTokens.push_back(Token(ConstStringRef(context.pos, 1), Token::CollectionBeg));
++context.pos;
break;
case ']':
if (false == context.lineTraits.hasInlineDataMarkers) {
outErrReason = constructYamlError(outLines.size(), context.lineBeginPos, context.pos, inlineCollectionYamlErrorMsg.data());
return false;
}
outTokens.push_back(Token(ConstStringRef(context.pos, 1), Token::CollectionEnd));
++context.pos;
break;
case ',':
if (false == context.lineTraits.hasInlineDataMarkers) {
outErrReason = constructYamlError(outLines.size(), context.lineBeginPos, context.pos, inlineCollectionYamlErrorMsg.data());
return false;
}
outTokens.push_back(Token(ConstStringRef(context.pos, 1), Token::SingleCharacter));
++context.pos;
break;
case '{':
case '}':
case '[':
case ']':
case ',':
outErrReason = constructYamlError(outLines.size(), context.lineBeginPos, context.pos, "NEO::Yaml : Inline collections are not supported yet");
outErrReason = constructYamlError(outLines.size(), context.lineBeginPos, context.pos, "NEO::Yaml : Inline dictionaries are not supported");
return false;
case ':':
context.lineTraits.hasDictionaryEntry = true;
@@ -329,29 +351,45 @@ bool buildTree(const LinesCache &lines, const TokensCache &tokens, NodesCache &o
}
}
if (lines[lineId].traits.hasInlineDataMarkers) {
outErrReason = "Inline collections are not supported yet\n";
return false;
} else {
if (Line::LineType::DictionaryEntry == lines[lineId].lineType) {
auto numTokensInLine = lines[lineId].last - lines[lineId].first + 1;
outNodes.rbegin()->key = lines[lineId].first;
UNRECOVERABLE_IF(numTokensInLine < 3); // at least key, : and \n
if (('#' != tokens[lines[lineId].first + 2]) && ('\n' != tokens[lines[lineId].first + 2])) {
outNodes.rbegin()->value = lines[lineId].first + 2;
}
} else {
auto numTokensInLine = lines[lineId].last - lines[lineId].first + 1;
(void)numTokensInLine;
UNRECOVERABLE_IF(numTokensInLine < 2); // at least : - and \n
UNRECOVERABLE_IF(Line::LineType::ListEntry != lines[lineId].lineType);
UNRECOVERABLE_IF('-' != tokens[lines[lineId].first]);
if (('#' != tokens[lines[lineId].first + 1]) && ('\n' != tokens[lines[lineId].first + 1])) {
outNodes.rbegin()->value = lines[lineId].first + 1;
if (Line::LineType::DictionaryEntry == lines[lineId].lineType) {
auto numTokensInLine = lines[lineId].last - lines[lineId].first + 1;
outNodes.rbegin()->key = lines[lineId].first;
UNRECOVERABLE_IF(numTokensInLine < 3); // at least key, : and \n
if (lines[lineId].traits.hasInlineDataMarkers) {
auto collectionBeg = lines[lineId].first + 2;
auto collectionEnd = lines[lineId].last - 1;
UNRECOVERABLE_IF(tokens[collectionBeg].traits.type != Token::Type::CollectionBeg || tokens[collectionEnd].traits.type != Token::Type::CollectionEnd);
auto &parentNode = *outNodes.rbegin();
Node *lastAddedNode = nullptr;
for (auto currTokenId = collectionBeg + 1; currTokenId < collectionEnd; currTokenId += 2) {
auto tokenType = tokens[currTokenId].traits.type;
UNRECOVERABLE_IF(tokenType != Token::Type::LiteralNumber && tokenType != Token::Type::LiteralString);
if (lastAddedNode == nullptr) {
lastAddedNode = &addNode(outNodes, parentNode);
} else {
lastAddedNode = &addNode(outNodes, *lastAddedNode, parentNode);
}
lastAddedNode->indent = currLineIndent + 1;
lastAddedNode->value = currTokenId;
}
nesting.push_back(parentNode.id);
} else if (('#' != tokens[lines[lineId].first + 2]) && ('\n' != tokens[lines[lineId].first + 2])) {
outNodes.rbegin()->value = lines[lineId].first + 2;
}
} else {
auto numTokensInLine = lines[lineId].last - lines[lineId].first + 1;
(void)numTokensInLine;
UNRECOVERABLE_IF(numTokensInLine < 2); // at least : - and \n
UNRECOVERABLE_IF(Line::LineType::ListEntry != lines[lineId].lineType);
UNRECOVERABLE_IF('-' != tokens[lines[lineId].first]);
if (('#' != tokens[lines[lineId].first + 1]) && ('\n' != tokens[lines[lineId].first + 1])) {
outNodes.rbegin()->value = lines[lineId].first + 1;
}
++lineId;
}
++lineId;
}
while (false == nesting.empty()) {

View File

@@ -12,6 +12,7 @@
#include "shared/source/utilities/stackvec.h"
#include <iterator>
#include <regex>
#include <string>
namespace NEO {
@@ -148,7 +149,9 @@ struct Token {
SingleCharacter,
Comment,
FileSectionBeg,
FileSectionEnd };
FileSectionEnd,
CollectionBeg,
CollectionEnd };
constexpr Token(ConstStringRef tokData, Type tokType) {
pos = tokData.begin();
@@ -253,6 +256,9 @@ using LinesCache = StackVec<Line, 512>;
std::string constructYamlError(size_t lineNumber, const char *lineBeg, const char *parsePos, const char *reason = nullptr);
static std::regex inlineCollectionRegex(R"regex(^\[(\s*(\d|\w)+,?)+\]\s*\n)regex");
constexpr ConstStringRef inlineCollectionYamlErrorMsg = "NEO::Yaml : Inline collection is not in valid regex format - ^\\[(\\s*(\\d|\\w)+,?)+\\]\\s*\\n";
bool tokenize(ConstStringRef text, LinesCache &outLines, TokensCache &outTokens, std::string &outErrReason, std::string &outWarning);
using NodeId = uint32_t;

View File

@@ -411,50 +411,77 @@ TEST(YamlTokenize, WhenTextIsNotPartOfACollectionThenEmitsError) {
EXPECT_TRUE(warnings.empty()) << warnings;
}
TEST(YamlTokenize, GivenInlineCollectionThenReturnErrorAsUnsupported) {
TEST(YamlTokenize, GivenInlineDictionariesThenEmitsError) {
NEO::Yaml::LinesCache lines;
NEO::Yaml::TokensCache tokens;
std::string warnings;
std::string errors;
bool success = NEO::Yaml::tokenize("[\n", lines, tokens, errors, warnings);
bool success = NEO::Yaml::tokenize("{\n", lines, tokens, errors, warnings);
EXPECT_FALSE(success);
EXPECT_STREQ("NEO::Yaml : Could not parse line : [0] : [[] <-- parser position on error. Reason : NEO::Yaml : Inline collections are not supported yet\n", errors.c_str());
EXPECT_STREQ("NEO::Yaml : Could not parse line : [0] : [{] <-- parser position on error. Reason : NEO::Yaml : Inline dictionaries are not supported\n", errors.c_str());
EXPECT_TRUE(warnings.empty()) << warnings;
lines.clear();
tokens.clear();
warnings.clear();
errors.clear();
success = NEO::Yaml::tokenize("]\n", lines, tokens, errors, warnings);
EXPECT_FALSE(success);
EXPECT_STREQ("NEO::Yaml : Could not parse line : [0] : []] <-- parser position on error. Reason : NEO::Yaml : Inline collections are not supported yet\n", errors.c_str());
EXPECT_TRUE(warnings.empty()) << warnings;
lines.clear();
tokens.clear();
warnings.clear();
errors.clear();
success = NEO::Yaml::tokenize("{\n", lines, tokens, errors, warnings);
EXPECT_FALSE(success);
EXPECT_STREQ("NEO::Yaml : Could not parse line : [0] : [{] <-- parser position on error. Reason : NEO::Yaml : Inline collections are not supported yet\n", errors.c_str());
EXPECT_TRUE(warnings.empty()) << warnings;
lines.clear();
tokens.clear();
warnings.clear();
errors.clear();
success = NEO::Yaml::tokenize("}\n", lines, tokens, errors, warnings);
EXPECT_FALSE(success);
EXPECT_STREQ("NEO::Yaml : Could not parse line : [0] : [}] <-- parser position on error. Reason : NEO::Yaml : Inline collections are not supported yet\n", errors.c_str());
EXPECT_STREQ("NEO::Yaml : Could not parse line : [0] : [}] <-- parser position on error. Reason : NEO::Yaml : Inline dictionaries are not supported\n", errors.c_str());
EXPECT_TRUE(warnings.empty()) << warnings;
}
TEST(YamlTokenize, GivenInvalidInlineCollectionThenEmitsError) {
NEO::Yaml::LinesCache lines;
NEO::Yaml::TokensCache tokens;
std::string warnings;
std::string errors;
bool success = NEO::Yaml::tokenize("]\n", lines, tokens, errors, warnings);
EXPECT_FALSE(success);
EXPECT_STREQ("NEO::Yaml : Could not parse line : [0] : []] <-- parser position on error. Reason : NEO::Yaml : Inline collection is not in valid regex format - ^\\[(\\s*(\\d|\\w)+,?)+\\]\\s*\\n\n", errors.c_str());
EXPECT_TRUE(warnings.empty()) << warnings;
lines.clear();
tokens.clear();
warnings.clear();
errors.clear();
success = NEO::Yaml::tokenize("a , b\n", lines, tokens, errors, warnings);
success = NEO::Yaml::tokenize(",\n", lines, tokens, errors, warnings);
EXPECT_FALSE(success);
EXPECT_STREQ("NEO::Yaml : Could not parse line : [0] : [a ,] <-- parser position on error. Reason : NEO::Yaml : Inline collections are not supported yet\n", errors.c_str());
EXPECT_STREQ("NEO::Yaml : Could not parse line : [0] : [,] <-- parser position on error. Reason : NEO::Yaml : Inline collection is not in valid regex format - ^\\[(\\s*(\\d|\\w)+,?)+\\]\\s*\\n\n", errors.c_str());
EXPECT_TRUE(warnings.empty()) << warnings;
lines.clear();
tokens.clear();
warnings.clear();
errors.clear();
success = NEO::Yaml::tokenize("[123,32,,]\n", lines, tokens, errors, warnings);
EXPECT_FALSE(success);
EXPECT_STREQ("NEO::Yaml : Could not parse line : [0] : [[] <-- parser position on error. Reason : NEO::Yaml : Inline collection is not in valid regex format - ^\\[(\\s*(\\d|\\w)+,?)+\\]\\s*\\n\n", errors.c_str());
EXPECT_TRUE(warnings.empty()) << warnings;
lines.clear();
tokens.clear();
warnings.clear();
errors.clear();
success = NEO::Yaml::tokenize("[1,2,3,4]]\n", lines, tokens, errors, warnings);
EXPECT_FALSE(success);
EXPECT_STREQ("NEO::Yaml : Could not parse line : [0] : [[] <-- parser position on error. Reason : NEO::Yaml : Inline collection is not in valid regex format - ^\\[(\\s*(\\d|\\w)+,?)+\\]\\s*\\n\n", errors.c_str());
EXPECT_TRUE(warnings.empty()) << warnings;
lines.clear();
tokens.clear();
warnings.clear();
errors.clear();
success = NEO::Yaml::tokenize("[[1,2,3,4]]\n", lines, tokens, errors, warnings);
EXPECT_FALSE(success);
EXPECT_STREQ("NEO::Yaml : Could not parse line : [0] : [[] <-- parser position on error. Reason : NEO::Yaml : Inline collection is not in valid regex format - ^\\[(\\s*(\\d|\\w)+,?)+\\]\\s*\\n\n", errors.c_str());
EXPECT_TRUE(warnings.empty()) << warnings;
}
@@ -1061,22 +1088,6 @@ TEST(YamlBuildTree, GivenEmptyLinesThenSkipsThem) {
EXPECT_STREQ("NEO::Yaml : Text has no data\n", warnings.c_str());
}
TEST(YamlBuildTree, GivenInlineCollectionsThenReturnsFalseAsUnsupported) {
NEO::Yaml::LinesCache lines = {
Line{NEO::Yaml::Line::LineType::Empty, 0, 0, 0, {}},
Line{NEO::Yaml::Line::LineType::ListEntry, 0, 0, 0, {}},
Line{NEO::Yaml::Line::LineType::FileSection, 0, 0, 0, {}},
};
lines[1].traits.hasInlineDataMarkers = true;
NEO::Yaml::TokensCache tokens;
NEO::Yaml::NodesCache tree;
std::string errors, warnings;
bool success = NEO::Yaml::buildTree(lines, tokens, tree, errors, warnings);
EXPECT_FALSE(success);
EXPECT_TRUE(warnings.empty()) << warnings;
EXPECT_STREQ("Inline collections are not supported yet\n", errors.c_str());
}
template <typename ContainerT, typename IndexT>
auto at(ContainerT &container, IndexT index) -> decltype(std::declval<ContainerT>()[0]) & {
if (index >= container.size()) {
@@ -1301,6 +1312,60 @@ TEST(YamlBuildTree, GivenNestedCollectionsThenProperlyCreatesChildNodes) {
EXPECT_STREQ("bitter", at(tokens, appleFlavorsBitter.value).cstrref().str().c_str());
}
TEST(YamlBuildTree, GivenInlineCollectionThenProperlyCreatesChildNodes) {
ConstStringRef yaml =
R"===(
banana : yellow
kiwi : green
apple : [red, green, blue]
pear : pearish
)===";
NEO::Yaml::LinesCache lines;
NEO::Yaml::TokensCache tokens;
std::string warnings;
std::string errors;
bool success = NEO::Yaml::tokenize(yaml, lines, tokens, errors, warnings);
ASSERT_TRUE(success);
EXPECT_TRUE(warnings.empty()) << warnings;
EXPECT_TRUE(errors.empty()) << errors;
NEO::Yaml::NodesCache treeNodes;
success = NEO::Yaml::buildTree(lines, tokens, treeNodes, errors, warnings);
EXPECT_TRUE(success);
EXPECT_TRUE(warnings.empty()) << warnings;
EXPECT_TRUE(errors.empty()) << errors;
ASSERT_EQ(8U, treeNodes.size());
auto &rootNode = *treeNodes.begin();
ASSERT_EQ(4U, rootNode.numChildren);
auto &banana = at(treeNodes, rootNode.firstChildId);
auto &kiwi = at(treeNodes, banana.nextSiblingId);
auto &apple = at(treeNodes, kiwi.nextSiblingId);
auto &appleRed = at(treeNodes, apple.firstChildId);
auto &appleGreen = at(treeNodes, appleRed.nextSiblingId);
auto &appleBlue = at(treeNodes, appleGreen.nextSiblingId);
auto &pear = at(treeNodes, apple.nextSiblingId);
EXPECT_STREQ("banana", at(tokens, banana.key).cstrref().str().c_str());
EXPECT_STREQ("yellow", at(tokens, banana.value).cstrref().str().c_str());
EXPECT_STREQ("kiwi", at(tokens, kiwi.key).cstrref().str().c_str());
EXPECT_STREQ("green", at(tokens, kiwi.value).cstrref().str().c_str());
EXPECT_STREQ("apple", at(tokens, apple.key).cstrref().str().c_str());
EXPECT_EQ(NEO::Yaml::invalidTokenId, apple.value);
EXPECT_STREQ("red", at(tokens, appleRed.value).cstrref().str().c_str());
EXPECT_EQ(NEO::Yaml::invalidTokenId, appleRed.key);
EXPECT_STREQ("green", at(tokens, appleGreen.value).cstrref().str().c_str());
EXPECT_EQ(NEO::Yaml::invalidTokenId, appleGreen.key);
EXPECT_STREQ("blue", at(tokens, appleBlue.value).cstrref().str().c_str());
EXPECT_EQ(NEO::Yaml::invalidTokenId, appleBlue.key);
EXPECT_STREQ("pear", at(tokens, pear.key).cstrref().str().c_str());
EXPECT_STREQ("pearish", at(tokens, pear.value).cstrref().str().c_str());
}
TEST(YamlBuildTree, WhenTabsAreUsedAfterIndentWasParsedThenTreatThemAsSeparators) {
ConstStringRef yaml =
R"===(