Skip to content

Commit

Permalink
regexp, regsub: add support for -expanded
Browse files Browse the repository at this point in the history
Fixes #311

Signed-off-by: Steve Bennett <steveb@workware.net.au>
  • Loading branch information
msteveb committed Sep 22, 2024
1 parent c4b5075 commit 8a438c3
Show file tree
Hide file tree
Showing 6 changed files with 75 additions and 20 deletions.
26 changes: 22 additions & 4 deletions jim-regexp.c
Original file line number Diff line number Diff line change
Expand Up @@ -137,10 +137,10 @@ int Jim_RegexpCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv)
int eflags = 0;
int option;
enum {
OPT_INDICES, OPT_NOCASE, OPT_LINE, OPT_ALL, OPT_INLINE, OPT_START, OPT_END
OPT_INDICES, OPT_NOCASE, OPT_LINE, OPT_ALL, OPT_INLINE, OPT_START, OPT_EXPANDED, OPT_END
};
static const char * const options[] = {
"-indices", "-nocase", "-line", "-all", "-inline", "-start", "--", NULL
"-indices", "-nocase", "-line", "-all", "-inline", "-start", "-expanded", "--", NULL
};

for (i = 1; i < argc; i++) {
Expand Down Expand Up @@ -185,6 +185,15 @@ int Jim_RegexpCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv)
return JIM_ERR;
}
break;

case OPT_EXPANDED:
#ifdef REG_EXPANDED
regcomp_flags |= REG_EXPANDED;
break;
#else
Jim_SetResultFormatted(interp, "not supported: %#s", argv[i]);
return JIM_ERR;
#endif
}
}
if (argc - i < 2) {
Expand Down Expand Up @@ -361,10 +370,10 @@ int Jim_RegsubCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv)
const char *pattern;
int option;
enum {
OPT_NOCASE, OPT_LINE, OPT_ALL, OPT_START, OPT_COMMAND, OPT_END
OPT_NOCASE, OPT_LINE, OPT_ALL, OPT_START, OPT_COMMAND, OPT_EXPANDED, OPT_END
};
static const char * const options[] = {
"-nocase", "-line", "-all", "-start", "-command", "--", NULL
"-nocase", "-line", "-all", "-start", "-command", "-expanded", "--", NULL
};

for (i = 1; i < argc; i++) {
Expand Down Expand Up @@ -405,6 +414,15 @@ int Jim_RegsubCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv)
case OPT_COMMAND:
opt_command = 1;
break;

case OPT_EXPANDED:
#ifdef REG_EXPANDED
regcomp_flags |= REG_EXPANDED;
break;
#else
Jim_SetResultFormatted(interp, "not supported: %#s", argv[i]);
return JIM_ERR;
#endif
}
}
if (argc - i != 3 && argc - i != 4) {
Expand Down
13 changes: 11 additions & 2 deletions jim_tcl.txt
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ Changes since 0.83
1. `aio` - support for configurable read and write buffering
2. `exec` TIP 424 - support safer +exec | + syntax (also +open "|| pipeline..."+) (see https://core.tcl-lang.org/tips/doc/trunk/tip/424.md)
3. New `lsubst` command to create lists using subst-style substitution
4. Add support for `regexp -expanded` and `regsub -expanded`

Changes between 0.82 and 0.83
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Expand Down Expand Up @@ -3876,7 +3877,7 @@ See `aio read`

regexp
~~~~~~
+*regexp ?-nocase? ?-line? ?-indices? ?-start* 'offset'? *?-all? ?-inline? ?--?* 'exp string ?matchVar? ?subMatchVar subMatchVar \...?'+
+*regexp ?-nocase? ?-line? ?-indices? ?-start* 'offset'? *?-all? ?-inline? ?-expanded? ?--?* 'exp string ?matchVar? ?subMatchVar subMatchVar \...?'+

Determines whether the regular expression +'exp'+ matches part or
all of +'string'+ and returns 1 if it does, 0 if it doesn't.
Expand Down Expand Up @@ -3949,13 +3950,17 @@ The following switches modify the behaviour of +'regexp'+
data, plus one element for each subexpression in the regular
expression.

+*-expanded*+::
Enables use of the expanded regular expression syntax where whitespace
and comments are ignored.

+*--*+::
Marks the end of switches. The argument following this one will be
treated as +'exp'+ even if it starts with a +-+.

regsub
~~~~~~
+*regsub ?-nocase? ?-all? ?-line? ?-command? ?-start* 'offset'? ?*--*? 'exp string subSpec ?varName?'+
+*regsub ?-nocase? ?-all? ?-line? ?-command? ?-expanded? ?-start* 'offset'? ?*--*? 'exp string subSpec ?varName?'+

This command matches the regular expression +'exp'+ against
+'string'+ using the rules described in REGULAR EXPRESSIONS
Expand Down Expand Up @@ -4037,6 +4042,10 @@ The following switches modify the behaviour of +'regsub'+
start matching the regular expression. +'offset'+ will be
constrained to the bounds of the input string.

+*-expanded*+::
Enables use of the expanded regular expression syntax where whitespace
and comments are ignored.

+*--*+::
Marks the end of switches. The argument following this one will be
treated as +'exp'+ even if it starts with a +-+.
Expand Down
27 changes: 27 additions & 0 deletions jimregexp.c
Original file line number Diff line number Diff line change
Expand Up @@ -948,6 +948,27 @@ static int regatom(regex_t *preg, int *flagp)

ret = regnode(preg, EXACTLY);

if (preg->cflags & REG_EXPANDED) {
/* Skip leading white space */
while ((ch = *preg->regparse) != 0) {
if (strchr(" \t\r\n\f\v", ch)) {
preg->regparse++;
continue;
}
break;
}
if (ch == '#') {
/* And skip comments to end of line */
preg->regparse++;
while ((ch = *preg->regparse) != 0) {
preg->regparse++;
if (ch == '\n') {
break;
}
}
}
}

/* Note that a META operator such as ? or * consumes the
* preceding char.
* Thus we must be careful to look ahead by 2 and add the
Expand Down Expand Up @@ -993,6 +1014,12 @@ static int regatom(regex_t *preg, int *flagp)
break;
}

/* For REG_EXPANDED, if we hit white space, stop */
if ((preg->cflags & REG_EXPANDED) && n == 1 && strchr(" \t\r\n\f\v", ch)) {
preg->regparse += n;
break;
}

/* No, so just add this char normally */
regc(preg, ch);
added++;
Expand Down
1 change: 1 addition & 0 deletions jimregexp.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ typedef regexp regex_t;
#define REG_ICASE 2

#define REG_NOTBOL 16
#define REG_EXPANDED 32

enum {
REG_NOERROR, /* Success. */
Expand Down
6 changes: 3 additions & 3 deletions tests/regexp.test
Original file line number Diff line number Diff line change
Expand Up @@ -199,9 +199,9 @@ test regexp-6.1 {regexp errors} {
test regexp-6.2 {regexp errors} {
list [catch {regexp -nocase a} msg] $msg
} {1 {wrong # args: should be "regexp ?-switch ...? exp string ?matchVar? ?subMatchVar ...?"}}
test regexp-6.3 {regexp errors} jim {
test regexp-6.3 {regexp errors} -constraints jim -body {
list [catch {regexp -gorp a} msg] $msg
} {1 {bad switch "-gorp": must be --, -all, -indices, -inline, -line, -nocase, or -start}}
} -result {1 {bad switch "-gorp": must be --, -all, -expanded, -indices, -inline, -line, -nocase, or -start}}
test regexp-6.4 {regexp errors} {
catch {regexp a( b} msg
} 1
Expand Down Expand Up @@ -367,7 +367,7 @@ test regexp-11.4 {regsub errors} {
} {1 {wrong # args: should be "regsub ?-switch ...? exp string subSpec ?varName?"}}
test regexp-11.5 {regsub errors} -constraints jim -body {
list [catch {regsub -gorp a b c} msg] $msg
} -result {1 {bad switch "-gorp": must be --, -all, -command, -line, -nocase, or -start}}
} -result {1 {bad switch "-gorp": must be --, -all, -command, -expanded, -line, -nocase, or -start}}
test regexp-11.6 {regsub errors} {
catch {regsub -nocase a( b c d} msg
} 1
Expand Down
22 changes: 11 additions & 11 deletions tests/regexp2.test
Original file line number Diff line number Diff line change
Expand Up @@ -463,12 +463,12 @@ test regexpComp-9.6 {-all option to regsub} {
}
} {1 123xxx}

#test regexpComp-10.1 {expanded syntax in regsub} {
# evalInProc {
# set foo xxx
# list [regsub -expanded ". \#comment\n . \#comment2" abc def foo] $foo
# }
#} {1 defc}
test regexpComp-10.1 {expanded syntax in regsub} {
evalInProc {
set foo xxx
list [regsub -expanded ". \#comment\n . \#comment2" abc def foo] $foo
}
} {1 defc}
test regexpComp-10.2 {newline sensitivity in regsub} {
evalInProc {
set foo xxx
Expand Down Expand Up @@ -523,11 +523,11 @@ test regexpComp-11.4 {regsub errors} {
list [catch {regsub a b c d e f} msg] $msg
}
} {1 {wrong # args: should be "regsub ?-switch ...? exp string subSpec ?varName?"}}
#test regexpComp-11.5 {regsub errors} {
# evalInProc {
# list [catch {regsub -gorp a b c} msg] $msg
# }
#} {1 {bad switch "-gorp": must be -all, -nocase, -expanded, -line, -linestop, -lineanchor, -start, or --}}
test regexpComp-11.5 {regsub errors} {
evalInProc {
list [catch {regsub -gorp a b c} msg] $msg
}
} {1 {bad switch "-gorp": must be --, -all, -command, -expanded, -line, -nocase, or -start}}
test regexpComp-11.6 {regsub errors} {
evalInProc {
list [catch {regsub -nocase a( b c d} msg] $msg
Expand Down

0 comments on commit 8a438c3

Please sign in to comment.