commit 6b37b62dabd37a0604cbb1a1fa8ec8ff131895d5
parent b71e5f0c8123cc3d8c6a19e1761ef0e9c4a644cd
Author: cfillion <cfillion@users.noreply.github.com>
Date: Thu, 5 Jan 2017 03:03:33 -0500
filter: implement exact word matching [t=185894]
Diffstat:
3 files changed, 49 insertions(+), 9 deletions(-)
diff --git a/src/filter.cpp b/src/filter.cpp
@@ -47,6 +47,7 @@ void Filter::set(const string &input)
state = DoubleQuote;
flags |= Node::QuotedFlag;
+ continue;
}
else if(c == '\'' && state != DoubleQuote) {
if(state == SingleQuote)
@@ -55,13 +56,19 @@ void Filter::set(const string &input)
state = SingleQuote;
flags |= Node::QuotedFlag;
+ continue;
}
- else if(c == '\x20' && state == Default) {
- group = group->push(buf, &flags);
- buf.clear();
+ else if(c == '\x20') {
+ if(state == Default) {
+ group = group->push(buf, &flags);
+ buf.clear();
+ continue;
+ }
+ else
+ flags |= Node::PhraseFlag;
}
- else
- buf += c;
+
+ buf += c;
}
group->push(buf, &flags);
@@ -197,12 +204,23 @@ bool Filter::Token::match(const vector<string> &rows) const
bool Filter::Token::matchRow(const string &str) const
{
const size_t pos = str.find(m_buf);
+ const bool isStart = pos == 0, isEnd = pos + m_buf.size() == str.size();
const bool fail = test(NotFlag);
- if(test(StartAnchorFlag) && pos != 0)
+ if(test(StartAnchorFlag) && !isStart)
return fail;
- if(test(EndAnchorFlag) && pos + m_buf.size() != str.size())
+ if(test(EndAnchorFlag) && !isEnd)
return fail;
- return (pos != string::npos) ^ fail;
+ if(pos == string::npos)
+ return fail;
+
+ if(test(QuotedFlag) && !test(PhraseFlag)) {
+ return fail ^ (
+ (isStart || !isalnum(str[pos - 1])) &&
+ (isEnd || !isalnum(str[pos + m_buf.size()]))
+ );
+ }
+
+ return !fail;
}
diff --git a/src/filter.hpp b/src/filter.hpp
@@ -43,6 +43,7 @@ private:
EndAnchorFlag = 1<<1,
QuotedFlag = 1<<2,
NotFlag = 1<<3,
+ PhraseFlag = 1<<4,
};
Node(int flags) : m_flags(flags) {}
diff --git a/test/filter.cpp b/test/filter.cpp
@@ -59,7 +59,7 @@ TEST_CASE("word matching", M) {
REQUIRE(f.match({"hello test world"}));
}
-TEST_CASE("quote matching", M) {
+TEST_CASE("quote phrase matching", M) {
Filter f;
SECTION("double quotes")
@@ -68,10 +68,26 @@ TEST_CASE("quote matching", M) {
f.set("'hello world'");
REQUIRE(f.match({"hello world"}));
+ REQUIRE(f.match({"BEFOREhello worldAFTER"}));
REQUIRE_FALSE(f.match({"helloworld"}));
REQUIRE_FALSE(f.match({"hello test world"}));
}
+TEST_CASE("quote word matching", M) {
+ Filter f;
+
+ SECTION("double quotes")
+ f.set("\"word\"");
+ SECTION("single quotes")
+ f.set("'word'");
+
+ REQUIRE(f.match({"BEFORE word AFTER"}));
+ REQUIRE(f.match({"_word_"}));
+ REQUIRE_FALSE(f.match({"BEFOREword"}));
+ REQUIRE_FALSE(f.match({"wordAFTER"}));
+ REQUIRE_FALSE(f.match({"BEFOREwordAFTER"}));
+}
+
TEST_CASE("mixing quotes", M) {
Filter f;
@@ -251,6 +267,11 @@ TEST_CASE("NOT operator", M) {
REQUIRE(f.match({"hello", "bacon"}));
}
+ SECTION("quote word matching") {
+ f.set("NOT 'hello'");
+ REQUIRE(f.match({"hellobacon"}));
+ }
+
SECTION("NOT NOT") {
f.set("NOT NOT hello");
REQUIRE(f.match({"hello"}));