commit 7808ea3a5ffe8c2045dc76099f8968e3b8104360
parent 732741b62fe1cb9cf19ecca8b210474d076ba8b3
Author: Roberto Ierusalimschy <roberto@inf.puc-rio.br>
Date: Wed, 5 May 1999 16:22:49 -0300
new implementation for '*' in patterns + new option '+'
Diffstat:
M | liolib.c | | | 26 | ++++++++++++++------------ |
M | lstrlib.c | | | 238 | ++++++++++++++++++++++++++++++++++++++++++++++--------------------------------- |
M | lualib.h | | | 5 | +++-- |
3 files changed, 156 insertions(+), 113 deletions(-)
diff --git a/liolib.c b/liolib.c
@@ -1,5 +1,5 @@
/*
-** $Id: liolib.c,v 1.37 1999/04/05 19:47:05 roberto Exp roberto $
+** $Id: liolib.c,v 1.38 1999/04/14 20:40:32 roberto Exp $
** Standard I/O (and system) library
** See Copyright Notice in lua.h
*/
@@ -244,23 +244,25 @@ static int read_pattern (FILE *f, char *p) {
p++;
continue;
default: {
- char *ep; /* get what is next */
+ char *ep = luaI_classend(p); /* get what is next */
int m; /* match result */
if (c == NEED_OTHER) c = getc(f);
- if (c != EOF)
- m = luaI_singlematch(c, p, &ep);
- else {
- luaI_singlematch(0, p, &ep); /* to set "ep" */
- m = 0; /* EOF matches no pattern */
- }
+ m = (c==EOF) ? 0 : luaI_singlematch(c, p, ep);
if (m) {
if (!inskip) luaL_addchar(c);
c = NEED_OTHER;
}
switch (*ep) {
- case '*': /* repetition */
- if (!m) p = ep+1; /* else stay in (repeat) the same item */
- continue;
+ case '+': /* repetition (1 or more) */
+ if (!m) goto break_while; /* pattern fails? */
+ /* else go through */
+ case '*': /* repetition (0 or more) */
+ while (m) { /* reads the same item until it fails */
+ c = getc(f);
+ m = (c==EOF) ? 0 : luaI_singlematch(c, p, ep);
+ if (m && !inskip) luaL_addchar(c);
+ }
+ /* go through to continue reading the pattern */
case '?': /* optional */
p = ep+1; /* continues reading the pattern */
continue;
@@ -336,7 +338,7 @@ static void io_read (void) {
success = 1; /* always success */
break;
case 4: /* word */
- success = read_pattern(f, "{%s*}%S%S*");
+ success = read_pattern(f, "{%s*}%S+");
break;
default:
success = read_pattern(f, p);
diff --git a/lstrlib.c b/lstrlib.c
@@ -1,5 +1,5 @@
/*
-** $Id: lstrlib.c,v 1.28 1999/02/26 15:49:53 roberto Exp roberto $
+** $Id: lstrlib.c,v 1.29 1999/04/30 14:12:05 roberto Exp roberto $
** Standard library for strings and pattern-matching
** See Copyright Notice in lua.h
*/
@@ -130,7 +130,7 @@ struct Capture {
#define ESC '%'
-#define SPECIALS "^$*?.([%-"
+#define SPECIALS "^$*+?.([%-"
static void push_captures (struct Capture *cap) {
@@ -160,8 +160,21 @@ static int capture_to_close (struct Capture *cap) {
}
-static char *bracket_end (char *p) {
- return (*p == 0) ? NULL : strchr((*p=='^') ? p+2 : p+1, ']');
+char *luaI_classend (char *p) {
+ switch (*p++) {
+ case ESC:
+ if (*p == '\0')
+ luaL_verror("incorrect pattern (ends with `%c')", ESC);
+ return p+1;
+ case '[':
+ if (*p == '^') p++;
+ if (*p == ']') p++;
+ p = strchr(p, ']');
+ if (!p) lua_error("incorrect pattern (missing `]')");
+ return p+1;
+ default:
+ return p;
+ }
}
@@ -184,48 +197,55 @@ static int matchclass (int c, int cl) {
}
-int luaI_singlematch (int c, char *p, char **ep) {
+
+static int matchbracketclass (int c, char *p, char *end) {
+ int sig = 1;
+ if (*(p+1) == '^') {
+ sig = 0;
+ p++; /* skip the '^' */
+ }
+ while (++p < end) {
+ if (*p == ESC) {
+ p++;
+ if ((p < end) && matchclass(c, (unsigned char)*p))
+ return sig;
+ }
+ else if ((*(p+1) == '-') && (p+2 < end)) {
+ p+=2;
+ if ((int)(unsigned char)*(p-2) <= c && c <= (int)(unsigned char)*p)
+ return sig;
+ }
+ else if ((unsigned char)*p == c) return sig;
+ }
+ return !sig;
+}
+
+
+
+int luaI_singlematch (int c, char *p, char *ep) {
switch (*p) {
case '.': /* matches any char */
- *ep = p+1;
return 1;
- case '\0': /* end of pattern; matches nothing */
- *ep = p;
- return 0;
case ESC:
- if (*(++p) == '\0')
- luaL_verror("incorrect pattern (ends with `%c')", ESC);
- *ep = p+1;
- return matchclass(c, (unsigned char)*p);
- case '[': {
- char *end = bracket_end(p+1);
- int sig = *(p+1) == '^' ? (p++, 0) : 1;
- if (end == NULL) lua_error("incorrect pattern (missing `]')");
- *ep = end+1;
- while (++p < end) {
- if (*p == ESC) {
- if (((p+1) < end) && matchclass(c, (unsigned char)*++p))
- return sig;
- }
- else if ((*(p+1) == '-') && (p+2 < end)) {
- p+=2;
- if ((int)(unsigned char)*(p-2) <= c && c <= (int)(unsigned char)*p)
- return sig;
- }
- else if ((unsigned char)*p == c) return sig;
- }
- return !sig;
- }
+ return matchclass(c, (unsigned char)*(p+1));
+ case '[':
+ return matchbracketclass(c, p, ep-1);
default:
- *ep = p+1;
return ((unsigned char)*p == c);
}
}
-static char *matchbalance (char *s, int b, int e, struct Capture *cap) {
- if (*s != b) return NULL;
+static char *match (char *s, char *p, struct Capture *cap);
+
+
+static char *matchbalance (char *s, char *p, struct Capture *cap) {
+ if (*p == 0 || *(p+1) == 0)
+ lua_error("unbalanced pattern");
+ if (*s != *p) return NULL;
else {
+ int b = *p;
+ int e = *(p+1);
int cont = 1;
while (++s < cap->src_end) {
if (*s == e) {
@@ -238,89 +258,109 @@ static char *matchbalance (char *s, int b, int e, struct Capture *cap) {
}
-static char *matchitem (char *s, char *p, struct Capture *cap, char **ep) {
- if (*p == ESC) {
- p++;
- if (isdigit((unsigned char)*p)) { /* capture */
- int l = check_cap(*p, cap);
- int len = cap->capture[l].len;
- *ep = p+1;
- if (cap->src_end-s >= len && memcmp(cap->capture[l].init, s, len) == 0)
- return s+len;
- else return NULL;
- }
- else if (*p == 'b') { /* balanced string */
- p++;
- if (*p == 0 || *(p+1) == 0)
- lua_error("unbalanced pattern");
- *ep = p+2;
- return matchbalance(s, *p, *(p+1), cap);
- }
- else p--; /* and go through */
+static char *max_expand (char *s, char *p, char *ep, struct Capture *cap) {
+ int i = 0; /* counts maximum expand for item */
+ while ((s+i)<cap->src_end && luaI_singlematch((unsigned char)*(s+i), p, ep))
+ i++;
+ /* keeps trying to match mith the maximum repetitions */
+ while (i>=0) {
+ char *res = match((s+i), ep+1, cap);
+ if (res) return res;
+ i--; /* else didn't match; reduce 1 repetition to try again */
}
- /* "luaI_singlematch" sets "ep" (so must be called even at the end of "s" */
- return (luaI_singlematch((unsigned char)*s, p, ep) && s<cap->src_end) ?
- s+1 : NULL;
+ return NULL;
+}
+
+
+static char *min_expand (char *s, char *p, char *ep, struct Capture *cap) {
+ for (;;) {
+ char *res = match(s, ep+1, cap);
+ if (res != NULL)
+ return res;
+ else if (s<cap->src_end && luaI_singlematch((unsigned char)*s, p, ep))
+ s++; /* try with one more repetition */
+ else return NULL;
+ }
+}
+
+
+static char *start_capt (char *s, char *p, struct Capture *cap) {
+ char *res;
+ int level = cap->level;
+ if (level >= MAX_CAPT) lua_error("too many captures");
+ cap->capture[level].init = s;
+ cap->capture[level].len = -1;
+ cap->level = level+1;
+ if ((res=match(s, p+1, cap)) == NULL) /* match failed? */
+ cap->level--; /* undo capture */
+ return res;
+}
+
+
+static char *end_capt (char *s, char *p, struct Capture *cap) {
+ int l = capture_to_close(cap);
+ char *res;
+ cap->capture[l].len = s - cap->capture[l].init; /* close capture */
+ if ((res = match(s, p+1, cap)) == NULL) /* match failed? */
+ cap->capture[l].len = -1; /* undo capture */
+ return res;
+}
+
+
+static char *match_capture (char *s, int level, struct Capture *cap) {
+ int l = check_cap(level, cap);
+ int len = cap->capture[l].len;
+ if (cap->src_end-s >= len &&
+ memcmp(cap->capture[l].init, s, len) == 0)
+ return s+len;
+ else return NULL;
}
static char *match (char *s, char *p, struct Capture *cap) {
init: /* using goto's to optimize tail recursion */
switch (*p) {
- case '(': { /* start capture */
- char *res;
- if (cap->level >= MAX_CAPT) lua_error("too many captures");
- cap->capture[cap->level].init = s;
- cap->capture[cap->level].len = -1;
- cap->level++;
- if ((res=match(s, p+1, cap)) == NULL) /* match failed? */
- cap->level--; /* undo capture */
- return res;
- }
- case ')': { /* end capture */
- int l = capture_to_close(cap);
- char *res;
- cap->capture[l].len = s - cap->capture[l].init; /* close capture */
- if ((res = match(s, p+1, cap)) == NULL) /* match failed? */
- cap->capture[l].len = -1; /* undo capture */
- return res;
- }
+ case '(': /* start capture */
+ return start_capt(s, p, cap);
+ case ')': /* end capture */
+ return end_capt(s, p, cap);
+ case ESC: /* may be %[0-9] or %b */
+ if (isdigit((unsigned char)(*(p+1)))) { /* capture? */
+ s = match_capture(s, *(p+1), cap);
+ if (s == NULL) return NULL;
+ p+=2; goto init; /* else return match(p+2, s, cap) */
+ }
+ else if (*(p+1) == 'b') { /* balanced string? */
+ s = matchbalance(s, p+2, cap);
+ if (s == NULL) return NULL;
+ p+=4; goto init; /* else return match(p+4, s, cap); */
+ }
+ else goto dflt; /* case default */
case '\0': /* end of pattern */
return s; /* match succeeded */
case '$':
if (*(p+1) == '\0') /* is the '$' the last char in pattern? */
return (s == cap->src_end) ? s : NULL; /* check end of string */
- /* else is a regular '$'; go through */
- default: { /* it is a pattern item */
- char *ep; /* will point to what is next */
- char *s1 = matchitem(s, p, cap, &ep);
+ else goto dflt;
+ default: dflt: { /* it is a pattern item */
+ char *ep = luaI_classend(p); /* points to what is next */
+ int m = s<cap->src_end && luaI_singlematch((unsigned char)*s, p, ep);
switch (*ep) {
- case '*': { /* repetition */
- char *res;
- if (s1 && s1>s && ((res=match(s1, p, cap)) != NULL))
- return res;
- p=ep+1; goto init; /* else return match(s, ep+1, cap); */
- }
case '?': { /* optional */
char *res;
- if (s1 && ((res=match(s1, ep+1, cap)) != NULL))
+ if (m && ((res=match(s+1, ep+1, cap)) != NULL))
return res;
p=ep+1; goto init; /* else return match(s, ep+1, cap); */
}
- case '-': { /* repetition */
- char *res;
- if ((res = match(s, ep+1, cap)) != NULL)
- return res;
- else if (s1 && s1>s) {
- s = s1;
- goto init; /* return match(s1, p, cap); */
- }
- else
- return NULL;
- }
+ case '*': /* 0 or more repetitions */
+ return max_expand(s, p, ep, cap);
+ case '+': /* 1 or more repetitions */
+ return (m ? max_expand(s+1, p, ep, cap) : NULL);
+ case '-': /* 0 or more repetitions (minimum) */
+ return min_expand(s, p, ep, cap);
default:
- if (s1) { s=s1; p=ep; goto init; } /* return match(s1, ep, cap); */
- else return NULL;
+ if (!m) return NULL;
+ s++; p=ep; goto init; /* else return match(s+1, ep, cap); */
}
}
}
diff --git a/lualib.h b/lualib.h
@@ -1,5 +1,5 @@
/*
-** $Id: lualib.h,v 1.4 1998/06/19 16:14:09 roberto Exp roberto $
+** $Id: lualib.h,v 1.5 1999/01/08 16:47:44 roberto Exp roberto $
** Lua standard libraries
** See Copyright Notice in lua.h
*/
@@ -29,7 +29,8 @@ void lua_userinit (void);
/* Auxiliary functions (private) */
-int luaI_singlematch (int c, char *p, char **ep);
+char *luaI_classend (char *p);
+int luaI_singlematch (int c, char *p, char *ep);
#endif