Merge branch 'filter-improvements' - reapack

commit 629ea1cf2db4ff349911063996ce523ff46602af
parent 731ef31b053faaf69fe63c327f778d85dd4bfc1b
Author: cfillion <cfillion@users.noreply.github.com>
Date:   Tue, 18 Feb 2020 13:59:21 -0500

Merge branch 'filter-improvements'

Diffstat:
M src/filter.cpp  | 63 ++++++++++++++++++++++++++++++++++++---------------------------
M src/filter.hpp  | 6 +++---
M test/filter.cpp  | 90 +++++++++++++++++++++++++++++++++++++++++++++++++++----------------------------

3 files changed, 97 insertions(+), 62 deletions(-)
diff --git a/src/filter.cpp b/src/filter.cpp
@@ -27,35 +27,53 @@ Filter::Filter(const std::string &input)
 
 void Filter::set(const std::string &input)
 {
-  enum State { Default, DoubleQuote, SingleQuote };
-
   m_input = input;
   m_root.clear();
 
   std::string buf;
+  char quote = 0;
   int flags = 0;
-  State state = Default;
   Group *group = &m_root;
 
-  for(const char c : input) {
-    if(c == '"' && state != SingleQuote) {
-      state = state == Default ? DoubleQuote : Default;
-      flags |= Node::QuotedFlag;
-      continue;
-    }
-    else if(c == '\'' && state != DoubleQuote) {
-      state = state == Default ? SingleQuote : Default;
-      flags |= Node::QuotedFlag;
+  for(size_t i = 0; i < input.size(); ++i) {
+    const char c = input[i];
+
+    const bool isStart = buf.empty(),
+               isEnd = i+1 == input.size() || input[i+1] == '\x20';
+
+    if((c == '"' || c == '\'') && ((!quote && isStart) || quote == c)) {
+      if(quote)
+        quote = 0;
+      else {
+        flags |= Node::LiteralFlag | Node::FullWordFlag;
+        quote = c;
+      }
       continue;
     }
     else if(c == '\x20') {
-      if(state == Default) {
+      if(quote)
+        flags &= ~Node::FullWordFlag;
+      else {
         group = group->push(buf, &flags);
         buf.clear();
         continue;
       }
-      else
-        flags |= Node::PhraseFlag;
+    }
+    else if(!quote) {
+      if(c == '^' && isStart) {
+        flags |= Node::StartAnchorFlag;
+        continue;
+      }
+      else if(c == '$' && isEnd) {
+        flags |= Node::EndAnchorFlag;
+        continue;
+      }
+      else if(flags & Node::LiteralFlag) {
+        // force-close the token after having parsed a closing quote
+        // and only after having parsed all trailing anchors
+        group = group->push(buf, &flags);
+        buf.clear();
+      }
     }
 
     buf += c;
@@ -77,12 +95,12 @@ Filter::Group::Group(Type type, int flags, Group *parent)
 {
 }
 
-Filter::Group *Filter::Group::push(std::string buf, int *flags)
+Filter::Group *Filter::Group::push(const std::string &buf, int *flags)
 {
   if(buf.empty())
     return this;
 
-  if((*flags & QuotedFlag) == 0) {
+  if(!(*flags & LiteralFlag)) {
     if(buf == "NOT") {
       *flags ^= Token::NotFlag;
       return this;
@@ -117,15 +135,6 @@ Filter::Group *Filter::Group::push(std::string buf, int *flags)
     }
   }
 
-  if(buf.size() > 1 && buf.front() == '^') {
-    *flags |= Node::StartAnchorFlag;
-    buf.erase(0, 1); // we need to recheck the size() below, for '$'
-  }
-  if(buf.size() > 1 && buf.back() == '$') {
-    *flags |= Node::EndAnchorFlag;
-    buf.pop_back();
-  }
-
   Group *group = m_open ? this : m_parent;
   group->m_nodes.push_back(std::make_unique<Token>(buf, *flags));
   *flags = 0;
@@ -193,7 +202,7 @@ bool Filter::Token::matchRow(const std::string &str) const
     return false;
   if(test(EndAnchorFlag) && !isEnd)
     return false;
-  if(test(QuotedFlag) && !test(PhraseFlag)) {
+  if(test(FullWordFlag)) {
     return
       (isStart || !isalnum(str[pos - 1])) &&
       (isEnd || !isalnum(str[pos + m_buf.size()]));
diff --git a/src/filter.hpp b/src/filter.hpp
@@ -41,9 +41,9 @@ private:
     enum Flag {
       StartAnchorFlag = 1<<0,
       EndAnchorFlag   = 1<<1,
-      QuotedFlag      = 1<<2,
+      LiteralFlag     = 1<<2,
       NotFlag         = 1<<3,
-      PhraseFlag      = 1<<4,
+      FullWordFlag    = 1<<4,
     };
 
     Node(int flags) : m_flags(flags) {}
@@ -65,7 +65,7 @@ private:
 
     Group(Type type, int flags = 0, Group *parent = nullptr);
     void clear() { m_nodes.clear(); }
-    Group *push(std::string, int *flags);
+    Group *push(const std::string &, int *flags);
 
     bool match(const std::vector<std::string> &) const override;
 
diff --git a/test/filter.cpp b/test/filter.cpp
@@ -61,29 +61,46 @@ TEST_CASE("quote phrase matching", M) {
   Filter f;
 
   SECTION("double quotes")
-    f.set("\"hello world\"");
+    f.set("\"foo bar\" baz");
   SECTION("single quotes")
-    f.set("'hello world'");
+    f.set("'foo bar' baz");
 
-  REQUIRE(f.match({"hello world"}));
-  REQUIRE(f.match({"BEFOREhello worldAFTER"}));
-  REQUIRE_FALSE(f.match({"helloworld"}));
-  REQUIRE_FALSE(f.match({"hello test world"}));
+  REQUIRE(f.match({"baz foo bar"}));
+  REQUIRE(f.match({"BEFOREfoo barAFTER baz"}));
+  REQUIRE_FALSE(f.match({"foobarbaz"}));
+  REQUIRE_FALSE(f.match({"foo test bar baz"}));
 }
 
-TEST_CASE("quote word matching", M) {
+TEST_CASE("full word matching", M) {
   Filter f;
 
   SECTION("double quotes")
-    f.set("\"word\"");
+    f.set("\"hello\" world");
   SECTION("single quotes")
-    f.set("'word'");
+    f.set("'hello' world");
+
+  REQUIRE(f.match({"BEFORE hello AFTER world"}));
+  REQUIRE(f.match({"_hello_ world"}));
+  REQUIRE_FALSE(f.match({"BEFOREhello world"}));
+  REQUIRE_FALSE(f.match({"helloAFTER world"}));
+  REQUIRE_FALSE(f.match({"BEFOREhelloAFTER world"}));
+}
+
+TEST_CASE("late opening quote", M) {
+  Filter f;
+  f.set("foo'bar'");
+
+  REQUIRE(f.match({"foo'bar'"}));
+  REQUIRE_FALSE(f.match({"foo bar"}));
+}
 
-  REQUIRE(f.match({"BEFORE word AFTER"}));
-  REQUIRE(f.match({"_word_"}));
-  REQUIRE_FALSE(f.match({"BEFOREword"}));
-  REQUIRE_FALSE(f.match({"wordAFTER"}));
-  REQUIRE_FALSE(f.match({"BEFOREwordAFTER"}));
+TEST_CASE("early closing quote", M) {
+  Filter f;
+  f.set("'foo'bar");
+
+  REQUIRE(f.match({"foo bar"}));
+  REQUIRE_FALSE(f.match({"foobar"}));
+  REQUIRE_FALSE(f.match({"foo ar"}));
 }
 
 TEST_CASE("mixing quotes", M) {
@@ -123,17 +140,16 @@ TEST_CASE("start of string", M) {
 
   SECTION("single") {
     f.set("^");
-    REQUIRE(f.match({"hel^lo world"}));
-    REQUIRE_FALSE(f.match({"hello world"}));
+    REQUIRE(f.match({"hello world"}));
   }
 
-  SECTION("quote before") {
+  SECTION("literal ^") {
     f.set("'^hello'");
-    REQUIRE(f.match({"hello world"}));
+    REQUIRE(f.match({"^hello world"}));
     REQUIRE_FALSE(f.match({"world hello"}));
   }
 
-  SECTION("quote after") {
+  SECTION("full word") {
     f.set("^'hello");
     REQUIRE(f.match({"hello world"}));
     REQUIRE_FALSE(f.match({"world hello"}));
@@ -159,19 +175,18 @@ TEST_CASE("end of string", M) {
 
   SECTION("single") {
     f.set("$");
-    REQUIRE(f.match({"hel$lo world"}));
-    REQUIRE_FALSE(f.match({"hello world"}));
+    REQUIRE(f.match({"hello world"}));
   }
 
-  SECTION("quote before") {
+  SECTION("full word") {
     f.set("'hello'$");
     REQUIRE(f.match({"hello"}));
     REQUIRE_FALSE(f.match({"hello world"}));
   }
 
-  SECTION("quote after") {
+  SECTION("literal $") {
     f.set("'hello$'");
-    REQUIRE(f.match({"hello"}));
+    REQUIRE(f.match({"hello$"}));
     REQUIRE_FALSE(f.match({"hello world"}));
   }
 }
@@ -214,13 +229,6 @@ TEST_CASE("OR operator", M) {
     REQUIRE(f.match({"bacon"}));
   }
 
-  SECTION("quoted") {
-    f.set("hello 'OR' bacon");
-
-    REQUIRE_FALSE(f.match({"hello world"}));
-    REQUIRE(f.match({"hello OR bacon"}));
-  }
-
   SECTION("reset") {
     f.set("hello OR bacon world");
 
@@ -233,6 +241,12 @@ TEST_CASE("OR operator", M) {
     f.set("OR");
     REQUIRE(f.match({"anything"}));
   }
+
+  SECTION("literal OR") {
+    f.set("'OR'");
+    REQUIRE(f.match({"OR"}));
+    REQUIRE_FALSE(f.match({"foo"}));
+  }
 }
 
 TEST_CASE("NOT operator", M) {
@@ -265,7 +279,7 @@ TEST_CASE("NOT operator", M) {
     REQUIRE(f.match({"hello", "bacon"}));
   }
 
-  SECTION("quote word matching") {
+  SECTION("full word matching") {
     f.set("NOT 'hello'");
     REQUIRE(f.match({"hellobacon"}));
   }
@@ -275,6 +289,12 @@ TEST_CASE("NOT operator", M) {
     REQUIRE(f.match({"hello"}));
     REQUIRE_FALSE(f.match({"world"}));
   }
+
+  SECTION("literal NOT") {
+    f.set("'NOT'");
+    REQUIRE(f.match({"NOT"}));
+    REQUIRE_FALSE(f.match({"foo"}));
+  }
 }
 
 TEST_CASE("AND grouping", M) {
@@ -320,4 +340,10 @@ TEST_CASE("AND grouping", M) {
     REQUIRE_FALSE(f.match({"apple bacon"}));
     REQUIRE_FALSE(f.match({"bacon"}));
   }
+
+  SECTION("literal parentheses") {
+    f.set("'('");
+    REQUIRE(f.match({"("}));
+    REQUIRE_FALSE(f.match({"foo"}));
+  }
 }

	reapack Package manager for REAPER
	Log \| Files \| Refs \| Submodules \| README \| LICENSE

M	src/filter.cpp	\|	63	++++++++++++++++++++++++++++++++++++---------------------------
M	src/filter.hpp	\|	6	+++---
M	test/filter.cpp	\|	90	+++++++++++++++++++++++++++++++++++++++++++++++++++----------------------------