Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
106 changes: 65 additions & 41 deletions jim-regexp.c
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ int Jim_RegexpCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv)
int opt_indices = 0;
int opt_all = 0;
int opt_inline = 0;
int opt_lineanchor = 0;
regex_t *regex;
int match, i, j;
int offset = 0;
Expand All @@ -137,10 +138,10 @@ int Jim_RegexpCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv)
int eflags = 0;
int option;
enum {
OPT_INDICES, OPT_NOCASE, OPT_LINE, OPT_ALL, OPT_INLINE, OPT_START, OPT_EXPANDED, OPT_END
OPT_INDICES, OPT_NOCASE, OPT_LINE, OPT_LINESTOP, OPT_LINEANCHOR, OPT_ALL, OPT_INLINE, OPT_START, OPT_EXPANDED, OPT_END
};
static const char * const options[] = {
"-indices", "-nocase", "-line", "-all", "-inline", "-start", "-expanded", "--", NULL
"-indices", "-nocase", "-line", "-linestop", "-lineanchor", "-all", "-inline", "-start", "-expanded", "--", NULL
};

for (i = 1; i < argc; i++) {
Expand All @@ -149,7 +150,7 @@ int Jim_RegexpCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv)
if (*opt != '-') {
break;
}
if (Jim_GetEnum(interp, argv[i], options, &option, "switch", JIM_ERRMSG | JIM_ENUM_ABBREV) != JIM_OK) {
if (Jim_GetEnum(interp, argv[i], options, &option, "option", JIM_ERRMSG | JIM_ENUM_ABBREV) != JIM_OK) {
return JIM_ERR;
}
if (option == OPT_END) {
Expand All @@ -167,8 +168,20 @@ int Jim_RegexpCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv)

case OPT_LINE:
regcomp_flags |= REG_NEWLINE;
opt_lineanchor = 1;
break;

#ifdef REG_NEWLINE_STOP
case OPT_LINESTOP:
regcomp_flags |= REG_NEWLINE_STOP;
break;
#endif
#ifdef REG_NEWLINE_ANCHOR
case OPT_LINEANCHOR:
regcomp_flags |= REG_NEWLINE_ANCHOR;
opt_lineanchor = 1;
break;
#endif
case OPT_ALL:
opt_all = 1;
break;
Expand All @@ -186,14 +199,15 @@ int Jim_RegexpCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv)
}
break;

case OPT_EXPANDED:
#ifdef REG_EXPANDED
case OPT_EXPANDED:
regcomp_flags |= REG_EXPANDED;
break;
#else
#endif
default:
/* Could get here if -linestop or -lineanchor or -expanded is not supported */
Jim_SetResultFormatted(interp, "not supported: %#s", argv[i]);
return JIM_ERR;
#endif
}
}
if (argc - i < 2) {
Expand Down Expand Up @@ -313,7 +327,7 @@ int Jim_RegexpCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv)
}
}

if (opt_all && (pattern[0] != '^' || (regcomp_flags & REG_NEWLINE)) && *source_str) {
if (opt_all && (pattern[0] != '^' || opt_lineanchor) && *source_str) {
if (pmatch[0].rm_eo) {
offset += utf8_strlen(source_str, pmatch[0].rm_eo);
source_str += pmatch[0].rm_eo;
Expand Down Expand Up @@ -369,10 +383,10 @@ int Jim_RegsubCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv)
const char *pattern;
int option;
enum {
OPT_NOCASE, OPT_LINE, OPT_ALL, OPT_START, OPT_COMMAND, OPT_EXPANDED, OPT_END
OPT_NOCASE, OPT_LINE, OPT_LINESTOP, OPT_LINEANCHOR, OPT_ALL, OPT_START, OPT_COMMAND, OPT_EXPANDED, OPT_END
};
static const char * const options[] = {
"-nocase", "-line", "-all", "-start", "-command", "-expanded", "--", NULL
"-nocase", "-line", "-linestop", "-lineanchor", "-all", "-start", "-command", "-expanded", "--", NULL
};

for (i = 1; i < argc; i++) {
Expand All @@ -381,7 +395,7 @@ int Jim_RegsubCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv)
if (*opt != '-') {
break;
}
if (Jim_GetEnum(interp, argv[i], options, &option, "switch", JIM_ERRMSG | JIM_ENUM_ABBREV) != JIM_OK) {
if (Jim_GetEnum(interp, argv[i], options, &option, "option", JIM_ERRMSG | JIM_ENUM_ABBREV) != JIM_OK) {
return JIM_ERR;
}
if (option == OPT_END) {
Expand All @@ -397,6 +411,16 @@ int Jim_RegsubCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv)
regcomp_flags |= REG_NEWLINE;
break;

#ifdef REG_NEWLINE_STOP
case OPT_LINESTOP:
regcomp_flags |= REG_NEWLINE_STOP;
break;
#endif
#ifdef REG_NEWLINE_ANCHOR
case OPT_LINEANCHOR:
regcomp_flags |= REG_NEWLINE_ANCHOR;
break;
#endif
case OPT_ALL:
opt_all = 1;
break;
Expand All @@ -414,26 +438,28 @@ int Jim_RegsubCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv)
opt_command = 1;
break;

case OPT_EXPANDED:
#ifdef REG_EXPANDED
case OPT_EXPANDED:
regcomp_flags |= REG_EXPANDED;
break;
#else
#endif

default:
/* Could get here if -linestop or -lineanchor or -expanded is not supported */
Jim_SetResultFormatted(interp, "not supported: %#s", argv[i]);
return JIM_ERR;
#endif
}
}
if (argc - i != 3 && argc - i != 4) {
return JIM_USAGE;
}

/* Need to ensure that this is unshared, so just duplicate it always */
/* Need to ensure that this is unshared, so just duplicate it always */
regcomp_obj = Jim_DuplicateObj(interp, argv[i]);
Jim_IncrRefCount(regcomp_obj);
Jim_IncrRefCount(regcomp_obj);
regex = SetRegexpFromAny(interp, regcomp_obj, regcomp_flags);
if (!regex) {
Jim_DecrRefCount(interp, regcomp_obj);
Jim_DecrRefCount(interp, regcomp_obj);
return JIM_ERR;
}
pattern = Jim_String(argv[i]);
Expand All @@ -443,7 +469,7 @@ int Jim_RegsubCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv)
cmd_prefix = argv[i + 2];
if (Jim_ListLength(interp, cmd_prefix) == 0) {
Jim_SetResultString(interp, "command prefix must be a list of at least one element", -1);
Jim_DecrRefCount(interp, regcomp_obj);
Jim_DecrRefCount(interp, regcomp_obj);
return JIM_ERR;
}
Jim_IncrRefCount(cmd_prefix);
Expand Down Expand Up @@ -485,7 +511,11 @@ int Jim_RegsubCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv)

n = source_len - offset;
p = source_str + offset;
do {

/* To match Tcl, an empty pattern does not match at the end
* of the string.
*/
while (n || pattern[0]) {
int match = jim_regexec(regex, p, MAX_SUB_MATCHES, pmatch, regexec_flags);

if (match >= REG_BADPAT) {
Expand Down Expand Up @@ -579,28 +609,22 @@ int Jim_RegsubCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv)
break;
}

/* An anchored pattern without -line must be done */
if ((regcomp_flags & REG_NEWLINE) == 0 && pattern[0] == '^') {
break;
}

/* If the pattern is empty, need to step forwards */
if (pattern[0] == '\0' && n) {
/* Need to copy the char we are moving over */
Jim_AppendString(interp, resultObj, p, 1);
p++;
n--;
}

regexec_flags = 0;
if (pmatch[0].rm_eo == pmatch[0].rm_so) {
/* The match did not advance the string, so set REG_NOTBOL to force the next match */
regexec_flags = REG_NOTBOL;
}
else {
regexec_flags = 0;
/* Matched a zero length string. Need to avoid matching the same position again */
if (pattern[0] == '^') {
/* An anchored search sets REG_BOL */
regexec_flags = REG_NOTBOL;
}
else {
/* A non-anchored search advances by one char */
int charlen = utf8_charlen(p[0]);
Jim_AppendString(interp, resultObj, p, charlen);
p += charlen;
n -= charlen;
}
}

} while (n);
}

/*
* Copy the portion of the string after the last match to the
Expand Down Expand Up @@ -631,15 +655,15 @@ int Jim_RegsubCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv)
Jim_DecrRefCount(interp, cmd_prefix);
}

Jim_DecrRefCount(interp, regcomp_obj);
Jim_DecrRefCount(interp, regcomp_obj);

return result;
}

int Jim_regexpInit(Jim_Interp *interp)
{
Jim_PackageProvideCheck(interp, "regexp");
Jim_RegisterSimpleCmd(interp, "regexp", "?-switch ...? exp string ?matchVar? ?subMatchVar ...?", 2, -1, Jim_RegexpCmd);
Jim_RegisterSimpleCmd(interp, "regsub", "?-switch ...? exp string subSpec ?varName?", 3, -1, Jim_RegsubCmd);
Jim_RegisterSimpleCmd(interp, "regexp", "?-option ...? exp string ?matchVar? ?subMatchVar ...?", 2, -1, Jim_RegexpCmd);
Jim_RegisterSimpleCmd(interp, "regsub", "?-option ...? exp string subSpec ?varName?", 3, -1, Jim_RegsubCmd);
return JIM_OK;
}
10 changes: 10 additions & 0 deletions jim.c
Original file line number Diff line number Diff line change
Expand Up @@ -3142,11 +3142,21 @@ int Jim_CompareStringImmediate(Jim_Interp *interp, Jim_Obj *objPtr, const char *
}
}

/* Note that we explicitly sort -- after other options */
static int qsortCompareStringPointers(const void *a, const void *b)
{
char *const *sa = (char *const *)a;
char *const *sb = (char *const *)b;

/* Always sort "--" to the end to match Tcl 9.0 */
if (strcmp(*sa, "--") == 0) {
return 1;
}
if (strcmp(*sb, "--") == 0) {
/* Always sort "--" to the end */
return -1;
}

return strcmp(*sa, *sb);
}

Expand Down
6 changes: 3 additions & 3 deletions jimregexp.c
Original file line number Diff line number Diff line change
Expand Up @@ -1234,7 +1234,7 @@ int jim_regexec(regex_t *preg, const char *string, size_t nmatch, regmatch_t
}
if (*string) {
nextline:
if (preg->cflags & REG_NEWLINE) {
if (preg->cflags & REG_NEWLINE_ANCHOR) {
/* Try the next anchor? */
string = strchr(string, '\n');
if (string) {
Expand Down Expand Up @@ -1369,12 +1369,12 @@ static const char *str_find(const char *string, int c, int nocase)
/**
* Returns true if 'ch' is an end-of-line char.
*
* In REG_NEWLINE mode, \n is considered EOL in
* In REG_NEWLINE_STOP mode, \n is considered EOL in
* addition to \0
*/
static int reg_iseol(regex_t *preg, int ch)
{
if (preg->cflags & REG_NEWLINE) {
if (preg->cflags & REG_NEWLINE_STOP) {
return ch == '\0' || ch == '\n';
}
else {
Expand Down
5 changes: 4 additions & 1 deletion jimregexp.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,11 @@ typedef struct regexp {
typedef regexp regex_t;

#define REG_EXTENDED 0
#define REG_NEWLINE 1
#define REG_ICASE 2
#define REG_NEWLINE_ANCHOR 4
#define REG_NEWLINE_STOP 8
/* REG_NEWLINE is POSIX */
#define REG_NEWLINE (REG_NEWLINE_ANCHOR | REG_NEWLINE_STOP)

#define REG_NOTBOL 16
#define REG_EXPANDED 32
Expand Down
26 changes: 23 additions & 3 deletions make-bootstrap-jim
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,26 @@
# which can be compiled to provide a bootstrap version of jimsh.
# e.g. cc -o jimsh0 jimsh0.c

JIMREGEXP_H=jimregexp.h
JIMREGEXP_C=jimregexp.c
JIM_REGEXP=JIM_REGEXP

while [ $# -gt 0 ]; do
case "$1" in
--no-regexp)
# don't include builtin regexp extension
JIMREGEXP_H=""
JIMREGEXP_C=""
JIM_REGEXP=""
;;
*)
echo "Unknown option: $1"
exit 1
;;
esac
shift
done

makeext()
{
source="$1"
Expand Down Expand Up @@ -49,7 +69,7 @@ allexts="bootstrap aio readdir regexp file glob exec clock array stdlib tclcompa
echo "/* This is single source file, bootstrap version of Jim Tcl. See http://jim.tcl.tk/ */"

# define some core features
for i in JIM_COMPAT JIM_ANSIC JIM_REGEXP HAVE_NO_AUTOCONF JIM_TINY _JIMAUTOCONF_H; do
for i in JIM_COMPAT JIM_ANSIC $JIM_REGEXP HAVE_NO_AUTOCONF JIM_TINY _JIMAUTOCONF_H; do
echo "#define $i"
done
echo '#define TCL_LIBRARY "."'
Expand Down Expand Up @@ -121,7 +141,7 @@ outputsource()
}

# Now output header files, removing references to jim header files
for i in jim-win32compat.h utf8.h jim.h jim-subcmd.h jimregexp.h jim-signal.h jimiocompat.h; do
for i in jim-win32compat.h utf8.h jim.h jim-subcmd.h $JIMREGEXP_H jim-signal.h jimiocompat.h; do
outputsource $i
done

Expand All @@ -135,7 +155,7 @@ done
makeloadexts $allexts

# And finally the core source code
for i in jim.c jim-subcmd.c utf8.c jim-format.c jimregexp.c jimiocompat.c jim-win32compat.c jim-nosignal.c; do
for i in jim.c jim-subcmd.c utf8.c jim-format.c $JIMREGEXP_C jimiocompat.c jim-win32compat.c jim-nosignal.c; do
outputsource $i
done
echo "#ifndef JIM_BOOTSTRAP_LIB_ONLY"
Expand Down
2 changes: 1 addition & 1 deletion test-bootstrap-jim
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

set -e
echo "Building bootstrap jimsh"
./make-bootstrap-jim >jimsh_bootstrap.c
./make-bootstrap-jim "$@" >jimsh_bootstrap.c
${CC:-cc} -o jimsh_bootstrap jimsh_bootstrap.c
echo "Testing bootstrap jimsh"
( cd tests; ../jimsh_bootstrap runall.tcl )
Expand Down
Loading