mirror of
https://github.com/intel/llvm.git
synced 2026-01-16 05:32:28 +08:00
[NFC][GlobPattern] Add GlobPattern::longest_substr() (#164512)
Finds longest (almost) plain substring in the pattern. Implementation is conservative to avoid false positives. The result is not used to optimize `GlobPattern::match()` so it's calculated on request. For * https://github.com/llvm/llvm-project/pull/164545 --------- Co-authored-by: Luke Lau <luke@igalia.com>
This commit is contained in:
@@ -79,6 +79,9 @@ public:
|
||||
StringRef prefix() const { return Pattern.take_front(PrefixSize); }
|
||||
// Returns plain suffix of the pattern.
|
||||
StringRef suffix() const { return Pattern.take_back(SuffixSize); }
|
||||
// Returns the longest plain substring of the pattern between prefix and
|
||||
// suffix.
|
||||
StringRef longest_substr() const;
|
||||
|
||||
private:
|
||||
StringRef Pattern;
|
||||
|
||||
@@ -132,6 +132,49 @@ parseBraceExpansions(StringRef S, std::optional<size_t> MaxSubPatterns) {
|
||||
return std::move(SubPatterns);
|
||||
}
|
||||
|
||||
static StringRef maxPlainSubstring(StringRef S) {
|
||||
StringRef Best;
|
||||
while (!S.empty()) {
|
||||
size_t PrefixSize = S.find_first_of("?*[{\\");
|
||||
if (PrefixSize == std::string::npos)
|
||||
PrefixSize = S.size();
|
||||
|
||||
if (Best.size() < PrefixSize)
|
||||
Best = S.take_front(PrefixSize);
|
||||
|
||||
S = S.drop_front(PrefixSize);
|
||||
|
||||
// It's impossible, as the first and last characters of the input string
|
||||
// must be Glob special characters, otherwise they would be parts of
|
||||
// the prefix or the suffix.
|
||||
assert(!S.empty());
|
||||
|
||||
switch (S.front()) {
|
||||
case '\\':
|
||||
S = S.drop_front(2);
|
||||
break;
|
||||
case '[': {
|
||||
// Drop '[' and the first character which can be ']'.
|
||||
S = S.drop_front(2);
|
||||
size_t EndBracket = S.find_first_of("]");
|
||||
// Should not be possible, SubGlobPattern::create should fail on invalid
|
||||
// pattern before we get here.
|
||||
assert(EndBracket != std::string::npos);
|
||||
S = S.drop_front(EndBracket + 1);
|
||||
break;
|
||||
}
|
||||
case '{':
|
||||
// TODO: implement.
|
||||
// Fallback to whatever is best for now.
|
||||
return Best;
|
||||
default:
|
||||
S = S.drop_front(1);
|
||||
}
|
||||
}
|
||||
|
||||
return Best;
|
||||
}
|
||||
|
||||
Expected<GlobPattern>
|
||||
GlobPattern::create(StringRef S, std::optional<size_t> MaxSubPatterns) {
|
||||
GlobPattern Pat;
|
||||
@@ -202,6 +245,11 @@ GlobPattern::SubGlobPattern::create(StringRef S) {
|
||||
return Pat;
|
||||
}
|
||||
|
||||
StringRef GlobPattern::longest_substr() const {
|
||||
return maxPlainSubstring(
|
||||
Pattern.drop_front(PrefixSize).drop_back(SuffixSize));
|
||||
}
|
||||
|
||||
bool GlobPattern::match(StringRef S) const {
|
||||
if (!S.consume_front(prefix()))
|
||||
return false;
|
||||
|
||||
@@ -329,6 +329,72 @@ TEST_F(GlobPatternTest, PrefixSuffix) {
|
||||
EXPECT_EQ("cd", Pat->suffix());
|
||||
}
|
||||
|
||||
TEST_F(GlobPatternTest, Substr) {
|
||||
auto Pat = GlobPattern::create("");
|
||||
ASSERT_TRUE((bool)Pat);
|
||||
EXPECT_EQ("", Pat->longest_substr());
|
||||
|
||||
Pat = GlobPattern::create("abcd");
|
||||
ASSERT_TRUE((bool)Pat);
|
||||
EXPECT_EQ("", Pat->longest_substr());
|
||||
|
||||
Pat = GlobPattern::create("a*bcd");
|
||||
ASSERT_TRUE((bool)Pat);
|
||||
EXPECT_EQ("", Pat->longest_substr());
|
||||
|
||||
Pat = GlobPattern::create("*abcd");
|
||||
ASSERT_TRUE((bool)Pat);
|
||||
EXPECT_EQ("", Pat->longest_substr());
|
||||
|
||||
Pat = GlobPattern::create("abcd*");
|
||||
ASSERT_TRUE((bool)Pat);
|
||||
EXPECT_EQ("", Pat->longest_substr());
|
||||
|
||||
Pat = GlobPattern::create("a*bc*d");
|
||||
ASSERT_TRUE((bool)Pat);
|
||||
EXPECT_EQ("bc", Pat->longest_substr());
|
||||
|
||||
Pat = GlobPattern::create("a*bc*def*g");
|
||||
ASSERT_TRUE((bool)Pat);
|
||||
EXPECT_EQ("def", Pat->longest_substr());
|
||||
|
||||
Pat = GlobPattern::create("a*bcd*ef*g");
|
||||
ASSERT_TRUE((bool)Pat);
|
||||
EXPECT_EQ("bcd", Pat->longest_substr());
|
||||
|
||||
Pat = GlobPattern::create("a*bcd*efg*h");
|
||||
ASSERT_TRUE((bool)Pat);
|
||||
EXPECT_EQ("bcd", Pat->longest_substr());
|
||||
|
||||
Pat = GlobPattern::create("a*bcd[ef]g*h");
|
||||
ASSERT_TRUE((bool)Pat);
|
||||
EXPECT_EQ("bcd", Pat->longest_substr());
|
||||
|
||||
Pat = GlobPattern::create("a*bc[d]efg*h");
|
||||
ASSERT_TRUE((bool)Pat);
|
||||
EXPECT_EQ("efg", Pat->longest_substr());
|
||||
|
||||
Pat = GlobPattern::create("a*bc[]]efg*h");
|
||||
ASSERT_TRUE((bool)Pat);
|
||||
EXPECT_EQ("efg", Pat->longest_substr());
|
||||
|
||||
Pat = GlobPattern::create("a*bcde\\fg*h");
|
||||
ASSERT_TRUE((bool)Pat);
|
||||
EXPECT_EQ("bcde", Pat->longest_substr());
|
||||
|
||||
Pat = GlobPattern::create("a*bcde\\[fg*h");
|
||||
ASSERT_TRUE((bool)Pat);
|
||||
EXPECT_EQ("bcde", Pat->longest_substr());
|
||||
|
||||
Pat = GlobPattern::create("a*bcde?fg*h");
|
||||
ASSERT_TRUE((bool)Pat);
|
||||
EXPECT_EQ("bcde", Pat->longest_substr());
|
||||
|
||||
Pat = GlobPattern::create("a*bcdef{g}*h");
|
||||
ASSERT_TRUE((bool)Pat);
|
||||
EXPECT_EQ("bcdef", Pat->longest_substr());
|
||||
}
|
||||
|
||||
TEST_F(GlobPatternTest, Pathological) {
|
||||
std::string P, S(40, 'a');
|
||||
StringRef Pieces[] = {"a*", "[ba]*", "{b*,a*}*"};
|
||||
|
||||
Reference in New Issue
Block a user