commit 629ea1cf2db4ff349911063996ce523ff46602af
parent 731ef31b053faaf69fe63c327f778d85dd4bfc1b
Author: cfillion <cfillion@users.noreply.github.com>
Date: Tue, 18 Feb 2020 13:59:21 -0500
Merge branch 'filter-improvements'
Diffstat:
3 files changed, 97 insertions(+), 62 deletions(-)
diff --git a/src/filter.cpp b/src/filter.cpp
@@ -27,35 +27,53 @@ Filter::Filter(const std::string &input)
void Filter::set(const std::string &input)
{
- enum State { Default, DoubleQuote, SingleQuote };
-
m_input = input;
m_root.clear();
std::string buf;
+ char quote = 0;
int flags = 0;
- State state = Default;
Group *group = &m_root;
- for(const char c : input) {
- if(c == '"' && state != SingleQuote) {
- state = state == Default ? DoubleQuote : Default;
- flags |= Node::QuotedFlag;
- continue;
- }
- else if(c == '\'' && state != DoubleQuote) {
- state = state == Default ? SingleQuote : Default;
- flags |= Node::QuotedFlag;
+ for(size_t i = 0; i < input.size(); ++i) {
+ const char c = input[i];
+
+ const bool isStart = buf.empty(),
+ isEnd = i+1 == input.size() || input[i+1] == '\x20';
+
+ if((c == '"' || c == '\'') && ((!quote && isStart) || quote == c)) {
+ if(quote)
+ quote = 0;
+ else {
+ flags |= Node::LiteralFlag | Node::FullWordFlag;
+ quote = c;
+ }
continue;
}
else if(c == '\x20') {
- if(state == Default) {
+ if(quote)
+ flags &= ~Node::FullWordFlag;
+ else {
group = group->push(buf, &flags);
buf.clear();
continue;
}
- else
- flags |= Node::PhraseFlag;
+ }
+ else if(!quote) {
+ if(c == '^' && isStart) {
+ flags |= Node::StartAnchorFlag;
+ continue;
+ }
+ else if(c == '$' && isEnd) {
+ flags |= Node::EndAnchorFlag;
+ continue;
+ }
+ else if(flags & Node::LiteralFlag) {
+ // force-close the token after having parsed a closing quote
+ // and only after having parsed all trailing anchors
+ group = group->push(buf, &flags);
+ buf.clear();
+ }
}
buf += c;
@@ -77,12 +95,12 @@ Filter::Group::Group(Type type, int flags, Group *parent)
{
}
-Filter::Group *Filter::Group::push(std::string buf, int *flags)
+Filter::Group *Filter::Group::push(const std::string &buf, int *flags)
{
if(buf.empty())
return this;
- if((*flags & QuotedFlag) == 0) {
+ if(!(*flags & LiteralFlag)) {
if(buf == "NOT") {
*flags ^= Token::NotFlag;
return this;
@@ -117,15 +135,6 @@ Filter::Group *Filter::Group::push(std::string buf, int *flags)
}
}
- if(buf.size() > 1 && buf.front() == '^') {
- *flags |= Node::StartAnchorFlag;
- buf.erase(0, 1); // we need to recheck the size() below, for '$'
- }
- if(buf.size() > 1 && buf.back() == '$') {
- *flags |= Node::EndAnchorFlag;
- buf.pop_back();
- }
-
Group *group = m_open ? this : m_parent;
group->m_nodes.push_back(std::make_unique<Token>(buf, *flags));
*flags = 0;
@@ -193,7 +202,7 @@ bool Filter::Token::matchRow(const std::string &str) const
return false;
if(test(EndAnchorFlag) && !isEnd)
return false;
- if(test(QuotedFlag) && !test(PhraseFlag)) {
+ if(test(FullWordFlag)) {
return
(isStart || !isalnum(str[pos - 1])) &&
(isEnd || !isalnum(str[pos + m_buf.size()]));
diff --git a/src/filter.hpp b/src/filter.hpp
@@ -41,9 +41,9 @@ private:
enum Flag {
StartAnchorFlag = 1<<0,
EndAnchorFlag = 1<<1,
- QuotedFlag = 1<<2,
+ LiteralFlag = 1<<2,
NotFlag = 1<<3,
- PhraseFlag = 1<<4,
+ FullWordFlag = 1<<4,
};
Node(int flags) : m_flags(flags) {}
@@ -65,7 +65,7 @@ private:
Group(Type type, int flags = 0, Group *parent = nullptr);
void clear() { m_nodes.clear(); }
- Group *push(std::string, int *flags);
+ Group *push(const std::string &, int *flags);
bool match(const std::vector<std::string> &) const override;
diff --git a/test/filter.cpp b/test/filter.cpp
@@ -61,29 +61,46 @@ TEST_CASE("quote phrase matching", M) {
Filter f;
SECTION("double quotes")
- f.set("\"hello world\"");
+ f.set("\"foo bar\" baz");
SECTION("single quotes")
- f.set("'hello world'");
+ f.set("'foo bar' baz");
- REQUIRE(f.match({"hello world"}));
- REQUIRE(f.match({"BEFOREhello worldAFTER"}));
- REQUIRE_FALSE(f.match({"helloworld"}));
- REQUIRE_FALSE(f.match({"hello test world"}));
+ REQUIRE(f.match({"baz foo bar"}));
+ REQUIRE(f.match({"BEFOREfoo barAFTER baz"}));
+ REQUIRE_FALSE(f.match({"foobarbaz"}));
+ REQUIRE_FALSE(f.match({"foo test bar baz"}));
}
-TEST_CASE("quote word matching", M) {
+TEST_CASE("full word matching", M) {
Filter f;
SECTION("double quotes")
- f.set("\"word\"");
+ f.set("\"hello\" world");
SECTION("single quotes")
- f.set("'word'");
+ f.set("'hello' world");
+
+ REQUIRE(f.match({"BEFORE hello AFTER world"}));
+ REQUIRE(f.match({"_hello_ world"}));
+ REQUIRE_FALSE(f.match({"BEFOREhello world"}));
+ REQUIRE_FALSE(f.match({"helloAFTER world"}));
+ REQUIRE_FALSE(f.match({"BEFOREhelloAFTER world"}));
+}
+
+TEST_CASE("late opening quote", M) {
+ Filter f;
+ f.set("foo'bar'");
+
+ REQUIRE(f.match({"foo'bar'"}));
+ REQUIRE_FALSE(f.match({"foo bar"}));
+}
- REQUIRE(f.match({"BEFORE word AFTER"}));
- REQUIRE(f.match({"_word_"}));
- REQUIRE_FALSE(f.match({"BEFOREword"}));
- REQUIRE_FALSE(f.match({"wordAFTER"}));
- REQUIRE_FALSE(f.match({"BEFOREwordAFTER"}));
+TEST_CASE("early closing quote", M) {
+ Filter f;
+ f.set("'foo'bar");
+
+ REQUIRE(f.match({"foo bar"}));
+ REQUIRE_FALSE(f.match({"foobar"}));
+ REQUIRE_FALSE(f.match({"foo ar"}));
}
TEST_CASE("mixing quotes", M) {
@@ -123,17 +140,16 @@ TEST_CASE("start of string", M) {
SECTION("single") {
f.set("^");
- REQUIRE(f.match({"hel^lo world"}));
- REQUIRE_FALSE(f.match({"hello world"}));
+ REQUIRE(f.match({"hello world"}));
}
- SECTION("quote before") {
+ SECTION("literal ^") {
f.set("'^hello'");
- REQUIRE(f.match({"hello world"}));
+ REQUIRE(f.match({"^hello world"}));
REQUIRE_FALSE(f.match({"world hello"}));
}
- SECTION("quote after") {
+ SECTION("full word") {
f.set("^'hello");
REQUIRE(f.match({"hello world"}));
REQUIRE_FALSE(f.match({"world hello"}));
@@ -159,19 +175,18 @@ TEST_CASE("end of string", M) {
SECTION("single") {
f.set("$");
- REQUIRE(f.match({"hel$lo world"}));
- REQUIRE_FALSE(f.match({"hello world"}));
+ REQUIRE(f.match({"hello world"}));
}
- SECTION("quote before") {
+ SECTION("full word") {
f.set("'hello'$");
REQUIRE(f.match({"hello"}));
REQUIRE_FALSE(f.match({"hello world"}));
}
- SECTION("quote after") {
+ SECTION("literal $") {
f.set("'hello$'");
- REQUIRE(f.match({"hello"}));
+ REQUIRE(f.match({"hello$"}));
REQUIRE_FALSE(f.match({"hello world"}));
}
}
@@ -214,13 +229,6 @@ TEST_CASE("OR operator", M) {
REQUIRE(f.match({"bacon"}));
}
- SECTION("quoted") {
- f.set("hello 'OR' bacon");
-
- REQUIRE_FALSE(f.match({"hello world"}));
- REQUIRE(f.match({"hello OR bacon"}));
- }
-
SECTION("reset") {
f.set("hello OR bacon world");
@@ -233,6 +241,12 @@ TEST_CASE("OR operator", M) {
f.set("OR");
REQUIRE(f.match({"anything"}));
}
+
+ SECTION("literal OR") {
+ f.set("'OR'");
+ REQUIRE(f.match({"OR"}));
+ REQUIRE_FALSE(f.match({"foo"}));
+ }
}
TEST_CASE("NOT operator", M) {
@@ -265,7 +279,7 @@ TEST_CASE("NOT operator", M) {
REQUIRE(f.match({"hello", "bacon"}));
}
- SECTION("quote word matching") {
+ SECTION("full word matching") {
f.set("NOT 'hello'");
REQUIRE(f.match({"hellobacon"}));
}
@@ -275,6 +289,12 @@ TEST_CASE("NOT operator", M) {
REQUIRE(f.match({"hello"}));
REQUIRE_FALSE(f.match({"world"}));
}
+
+ SECTION("literal NOT") {
+ f.set("'NOT'");
+ REQUIRE(f.match({"NOT"}));
+ REQUIRE_FALSE(f.match({"foo"}));
+ }
}
TEST_CASE("AND grouping", M) {
@@ -320,4 +340,10 @@ TEST_CASE("AND grouping", M) {
REQUIRE_FALSE(f.match({"apple bacon"}));
REQUIRE_FALSE(f.match({"bacon"}));
}
+
+ SECTION("literal parentheses") {
+ f.set("'('");
+ REQUIRE(f.match({"("}));
+ REQUIRE_FALSE(f.match({"foo"}));
+ }
}