Skip to content

Commit 1411505

Browse files
committed
Made regex configuration same way as in Nginx.
Implement searching in chunked TfwStr.
1 parent 60c5b2a commit 1411505

File tree

13 files changed

+504
-43
lines changed

13 files changed

+504
-43
lines changed

fw/cfg.c

Lines changed: 123 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,7 @@ entry_set_name(TfwCfgEntry *e)
243243

244244
BUG_ON(!e);
245245
BUG_ON(e->name);
246+
//pr_notice("rule=%i",rule);
246247

247248
if (!rule) {
248249
name = e->ftoken;
@@ -253,6 +254,7 @@ entry_set_name(TfwCfgEntry *e)
253254
}
254255

255256
T_DBG3("set name: %.*s\n", len, name);
257+
//pr_notice("set name: %.*s\n", len, name);
256258

257259
if (!check_identifier(name, len))
258260
return -EINVAL;
@@ -276,6 +278,7 @@ entry_set_first_token(TfwCfgEntry *e, const char *src, int len)
276278
BUG_ON(e->ftoken);
277279

278280
T_DBG3("set first token: %.*s\n", len, src);
281+
//pr_notice("set first token: %.*s\n", len, src);
279282

280283
if (!src || !len)
281284
return -EINVAL;
@@ -360,6 +363,7 @@ entry_add_rule_param(const char **param, const char *src, size_t len)
360363
if (!(dst = alloc_and_copy_literal(src, len)))
361364
return -ENOMEM;
362365
*param = dst;
366+
//pr_notice("entry_add_rule_param param=%s\n", dst);
363367
return 0;
364368
}
365369

@@ -400,6 +404,9 @@ typedef enum {
400404
TOKEN_SEMICOLON,
401405
TOKEN_LITERAL,
402406
TOKEN_ARROW,
407+
TOKEN_TILDA,
408+
TOKEN_REGEX,
409+
TOKEN_REGEX_CI,
403410
_TOKEN_COUNT,
404411
} token_t;
405412

@@ -591,9 +598,12 @@ read_next_token(TfwCfgParserState *ps)
591598
TOKEN_NEQSIGN);
592599
TFSM_COND_MOVE_EXIT(ps->c == '>' && ps->prev_c == '-',
593600
TOKEN_ARROW);
601+
TFSM_COND_MOVE_EXIT(ps->c == '*' && ps->prev_c == '~',
602+
TOKEN_REGEX_CI);
594603

595604
/* Special case to differ single equal sign from double one. */
596-
TFSM_COND_MOVE(ps->c == '=', TS_EQSIGN);
605+
TFSM_COND_MOVE(ps->c == '=', TS_EQSIGN);
606+
TFSM_COND_MOVE(ps->c == '~', TS_TILDA);
597607

598608
/* Everything else is not a special character and therefore
599609
* it starts a literal. */
@@ -622,6 +632,14 @@ read_next_token(TfwCfgParserState *ps)
622632
TFSM_JMP_EXIT(TOKEN_EQSIGN);
623633
}
624634

635+
FSM_STATE(TS_TILDA) {
636+
TFSM_COND_JMP_EXIT(!ps->c, TOKEN_REGEX);
637+
638+
/* If this is double equal sign, eat second sign and exit. */
639+
TFSM_COND_MOVE_EXIT(ps->c == '*', TOKEN_REGEX_CI);
640+
TFSM_JMP_EXIT(TOKEN_REGEX);
641+
}
642+
625643
FSM_STATE(TS_COMMENT) {
626644
TFSM_COND_JMP_EXIT(!ps->c, TOKEN_NA);
627645

@@ -716,6 +734,7 @@ entry_set_cond(TfwCfgEntry *e, token_t cond_type, const char *src, int len)
716734
int name_len = sizeof(TFW_CFG_RULE_NAME) - 1;
717735
TfwCfgRule *rule = &e->rule;
718736

737+
//pr_notice("entry_set_cond cond_type=%u\n", (unsigned int)cond_type);
719738
BUG_ON(!e->ftoken);
720739
BUG_ON(e->name);
721740

@@ -735,7 +754,23 @@ entry_set_cond(TfwCfgEntry *e, token_t cond_type, const char *src, int len)
735754
if (!(e->name = alloc_and_copy_literal(name, name_len)))
736755
return -ENOMEM;
737756

738-
rule->inv = cond_type == TOKEN_DEQSIGN ? false : true;
757+
758+
switch (cond_type) {
759+
case TOKEN_REGEX:
760+
rule->regex = TFW_REGEX_REGULAR;
761+
rule->inv = false;
762+
break;
763+
case TOKEN_REGEX_CI:
764+
rule->regex = TFW_REGEX_CI;
765+
rule->inv = false;
766+
break;
767+
default:
768+
rule->regex = TFW_REGEX_NO;
769+
rule->inv = cond_type == TOKEN_DEQSIGN ? false : true;
770+
break;
771+
}
772+
773+
739774
return 0;
740775
}
741776

@@ -795,22 +830,29 @@ parse_cfg_entry(TfwCfgParserState *ps)
795830
* condition key for rule; in last three cases predefined rule name is used.
796831
*/
797832
FSM_STATE(PS_START_NEW_ENTRY) {
833+
834+
//pr_notice("FSM_STATE(PS_START_NEW_ENTRY)\n");
798835
entry_reset(&ps->e);
799836
ps->e.line_no = ps->line_no;
800837
ps->e.line = ps->line;
801838

802839
PFSM_COND_MOVE(ps->t == TOKEN_ARROW, PS_RULE_PURE_ACTION);
803840

804841
ps->err = entry_set_first_token(&ps->e, ps->lit, ps->lit_len);
842+
//pr_notice("ps->e.ftoken=%s\n", ps->e.ftoken);
805843
FSM_COND_JMP(ps->err, PS_EXIT);
806844

807845
PFSM_MOVE(PS_PLAIN_OR_RULE);
808846
}
809847

810848
FSM_STATE(PS_PLAIN_OR_RULE) {
849+
//pr_notice("FSM_STATE(PS_PLAIN_OR_RULE)\n");
850+
//pr_notice("ps->e.ftoken=%s\n", ps->e.ftoken);
811851
PFSM_COND_MOVE(ps->t == TOKEN_DEQSIGN ||
812-
ps->t == TOKEN_NEQSIGN,
813-
PS_RULE_COND);
852+
ps->t == TOKEN_NEQSIGN ||
853+
ps->t == TOKEN_REGEX ||
854+
ps->t == TOKEN_REGEX_CI,
855+
PS_RULE_COND);
814856
PFSM_COND_MOVE(ps->t == TOKEN_LITERAL, PS_PLAIN_OR_LONG_RULE);
815857

816858
/* Jump to plain val/attr scheme to make remained checks
@@ -821,67 +863,82 @@ parse_cfg_entry(TfwCfgParserState *ps)
821863
}
822864

823865
FSM_STATE(PS_PLAIN_OR_LONG_RULE) {
866+
//pr_notice("FSM_STATE(PS_PLAIN_OR_LONG_RULE)\n");
824867
FSM_COND_JMP(ps->t == TOKEN_DEQSIGN ||
825-
ps->t == TOKEN_NEQSIGN,
826-
PS_LONG_RULE_COND);
868+
ps->t == TOKEN_NEQSIGN ||
869+
ps->t == TOKEN_REGEX ||
870+
ps->t == TOKEN_REGEX_CI,
871+
PS_LONG_RULE_COND);
827872

828873
/* This is not rule (simple or extended), so jump to
829874
* plain val/attr scheme. */
830875
ps->err = entry_set_name(&ps->e);
831876
FSM_COND_JMP(ps->err, PS_EXIT);
832877
FSM_COND_JMP(ps->t == TOKEN_EQSIGN, PS_STORE_ATTR_PREV);
833878
FSM_COND_JMP(ps->t == TOKEN_LITERAL ||
834-
ps->t == TOKEN_SEMICOLON ||
835-
ps->t == TOKEN_LBRACE,
836-
PS_STORE_VAL_PREV);
879+
ps->t == TOKEN_SEMICOLON ||
880+
ps->t == TOKEN_LBRACE,
881+
PS_STORE_VAL_PREV);
837882

838883
ps->err = -EINVAL;
839884
FSM_JMP(PS_EXIT);
840885
}
841886

842887
FSM_STATE(PS_LONG_RULE_COND) {
888+
//pr_notice("FSM_STATE(PS_LONG_RULE_COND)\n");
843889
ps->err = entry_add_rule_param(&ps->e.rule.fst_ext,
844-
ps->prev_lit,
845-
ps->prev_lit_len);
890+
ps->prev_lit,
891+
ps->prev_lit_len);
846892
FSM_COND_JMP(ps->err, PS_EXIT);
847893
PFSM_MOVE(PS_RULE_COND);
848894
}
849895

850896
FSM_STATE(PS_RULE_COND) {
897+
//pr_notice("FSM_STATE(PS_RULE_COND) ps->t=%u\n", (unsigned)ps->t);
898+
//pr_notice("ps->e.ftoken=%s\n", ps->e.ftoken);
899+
900+
FSM_COND_JMP(ps->prev_t == TOKEN_REGEX ||
901+
ps->prev_t == TOKEN_REGEX_CI,
902+
PS_STORE_VAL_PREV_REGEX);
903+
851904
PFSM_COND_JMP_EXIT_ERROR(ps->t != TOKEN_LITERAL);
852905
ps->err = entry_set_cond(&ps->e, ps->prev_t, ps->lit,
853-
ps->lit_len);
906+
ps->lit_len);
854907
FSM_COND_JMP(ps->err, PS_EXIT);
855908
PFSM_MOVE(PS_RULE_COND_END);
856909
}
857910

858911
FSM_STATE(PS_RULE_COND_END) {
912+
//pr_notice("FSM_STATE(PS_RULE_COND_END)\n");;
859913
PFSM_COND_JMP_EXIT_ERROR(ps->t != TOKEN_ARROW);
860914
PFSM_MOVE(PS_RULE_ACTION);
861915
}
862916

863917
FSM_STATE(PS_RULE_PURE_ACTION) {
918+
//pr_notice("FSM_STATE(PS_RULE_PURE_ACTION)\n");
864919
ps->err = entry_set_name(&ps->e);
865920
FSM_COND_JMP(ps->err, PS_EXIT);
866921
FSM_JMP(PS_RULE_ACTION);
867922
}
868923

869924
FSM_STATE(PS_RULE_ACTION) {
925+
//pr_notice("FSM_STATE(PS_RULE_ACTION)\n");
870926
PFSM_COND_JMP_EXIT_ERROR(ps->t != TOKEN_LITERAL);
871927
ps->err = entry_add_rule_param(&ps->e.rule.act, ps->lit,
872-
ps->lit_len);
928+
ps->lit_len);
873929
FSM_COND_JMP(ps->err, PS_EXIT);
874930
PFSM_MOVE(PS_RULE_ACTION_VAL);
875931
}
876932

877933
FSM_STATE(PS_RULE_ACTION_VAL) {
934+
//pr_notice("FSM_STATE(PS_RULE_ACTION_VAL)\n");
878935
FSM_COND_JMP(ps->t == TOKEN_SEMICOLON, PS_SEMICOLON);
879936
PFSM_COND_JMP_EXIT_ERROR(ps->t != TOKEN_EQSIGN);
880937
read_next_token(ps);
881938
PFSM_COND_JMP_EXIT_ERROR(ps->t != TOKEN_LITERAL);
882939

883940
ps->err = entry_add_rule_param(&ps->e.rule.val, ps->lit,
884-
ps->lit_len);
941+
ps->lit_len);
885942
FSM_COND_JMP(ps->err, PS_EXIT);
886943

887944
read_next_token(ps);
@@ -904,6 +961,7 @@ parse_cfg_entry(TfwCfgParserState *ps)
904961
*/
905962

906963
FSM_STATE(PS_VAL_OR_ATTR) {
964+
//pr_notice("FSM_STATE(PS_VAL_OR_ATTR)\n");
907965
PFSM_COND_MOVE(ps->t == TOKEN_LITERAL, PS_MAYBE_EQSIGN);
908966
FSM_COND_JMP(ps->t == TOKEN_SEMICOLON, PS_SEMICOLON);
909967
FSM_COND_JMP(ps->t == TOKEN_LBRACE, PS_LBRACE);
@@ -913,23 +971,65 @@ parse_cfg_entry(TfwCfgParserState *ps)
913971
}
914972

915973
FSM_STATE(PS_MAYBE_EQSIGN) {
974+
//pr_notice("FSM_STATE(PS_MAYBE_EQSIGN)\n");
916975
FSM_COND_JMP(ps->t == TOKEN_EQSIGN, PS_STORE_ATTR_PREV);
917976
FSM_JMP(PS_STORE_VAL_PREV);
918977
}
919978

979+
FSM_STATE(PS_STORE_VAL_PREV_REGEX) {
980+
//pr_notice("FSM_STATE(PS_STORE_VAL_PREV_REGEX)\n");
981+
/* name val1 val2;
982+
* ^
983+
* We are here (but still need to store val1)
984+
* and name or condition.
985+
*/
986+
T_DBG3("add value: %.*s\n", ps->prev_lit_len, ps->prev_lit);
987+
988+
//pr_notice("prev value: %.*s\n", ps->prev_lit_len, ps->prev_lit);
989+
//pr_notice("cur value: %.*s\n", ps->lit_len, ps->lit);
990+
991+
if (ps->e.ftoken && !strcmp(ps->e.ftoken, "location")) {
992+
ps->err = entry_set_name(&ps->e);
993+
994+
if (!ps->err) {
995+
if (ps->prev_t == TOKEN_REGEX)
996+
ps->err = entry_add_val(&ps->e, "regex",
997+
sizeof("regex"));
998+
if (ps->prev_t == TOKEN_REGEX_CI)
999+
ps->err = entry_add_val(&ps->e,
1000+
"regex_ci",
1001+
sizeof("regex_ci"));
1002+
}
1003+
FSM_COND_JMP(ps->err, PS_EXIT);
1004+
FSM_JMP(PS_VAL_OR_ATTR);
1005+
}
1006+
1007+
/*If it is not location*/
1008+
//pr_notice("ps->e.ftoken=%s\n", ps->e.ftoken);
1009+
ps->err = entry_set_cond(&ps->e, ps->prev_t,
1010+
ps->lit, ps->lit_len);
1011+
FSM_COND_JMP(ps->err, PS_EXIT);
1012+
PFSM_MOVE(PS_RULE_COND_END);
1013+
1014+
}
1015+
9201016
FSM_STATE(PS_STORE_VAL_PREV) {
1017+
//pr_notice("FSM_STATE(PS_STORE_VAL_PREV)\n");
9211018
/* name val1 val2;
9221019
* ^
9231020
* We are here (but still need to store val1). */
9241021
T_DBG3("add value: %.*s\n", ps->prev_lit_len, ps->prev_lit);
9251022

1023+
//pr_notice("prev value: %.*s\n", ps->prev_lit_len, ps->prev_lit);
1024+
//pr_notice("cur value: %.*s\n", ps->lit_len, ps->lit);
9261025
ps->err = entry_add_val(&ps->e, ps->prev_lit, ps->prev_lit_len);
9271026
FSM_COND_JMP(ps->err, PS_EXIT);
9281027

9291028
FSM_JMP(PS_VAL_OR_ATTR);
9301029
}
9311030

9321031
FSM_STATE(PS_STORE_ATTR_PREV) {
1032+
//pr_notice("FSM_STATE(PS_STORE_ATTR_PREV)\n");
9331033
/* name key = val;
9341034
* ^
9351035
* We are here. */
@@ -951,6 +1051,7 @@ parse_cfg_entry(TfwCfgParserState *ps)
9511051
}
9521052

9531053
FSM_STATE(PS_LBRACE) {
1054+
//pr_notice("FSM_STATE(PS_LBRACE)\n");
9541055
/* Simply exit on '{' leaving nested nodes untouched and
9551056
* surrounded with braces. The caller should detect it and parse
9561057
* them in a loop. */
@@ -959,6 +1060,7 @@ parse_cfg_entry(TfwCfgParserState *ps)
9591060
}
9601061

9611062
FSM_STATE(PS_SEMICOLON) {
1063+
//pr_notice("FSM_STATE(PS_SEMICOLON)\n");
9621064
/* Simply eat ';'. Don't MOVE because the next character may be
9631065
* '\0' and that triggers an error (because we expect more input
9641066
* tokens when we do _PFSM_MOVE()). */
@@ -967,11 +1069,18 @@ parse_cfg_entry(TfwCfgParserState *ps)
9671069
}
9681070

9691071
FSM_STATE(PS_EXIT) {
1072+
//pr_notice("FSM_STATE(PS_EXIT)\n");
9701073
/* Cleanup of entry is done in tfw_cfg_parse_mods() */
1074+
1075+
//pr_notice("ps->e.name=%s\n", ps->e.name);
1076+
//pr_notice("ps->e.line=%s\n", ps->e.line);
1077+
1078+
9711079
T_DBG3("pfsm: exit\n");
9721080
}
9731081
}
9741082

1083+
9751084
/*
9761085
* ------------------------------------------------------------------------
9771086
* Configuration Parser - TfwCfgSpec helpers.

fw/cfg.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,7 @@ typedef struct {
149149
const char *act;
150150
const char *val;
151151
bool inv;
152+
int regex;
152153
} TfwCfgRule;
153154

154155
typedef struct {
@@ -415,6 +416,12 @@ enum {
415416
TFW_CFG_B_KEEP, /* Keep an entry */
416417
};
417418

419+
enum {
420+
TFW_REGEX_NO = 0,
421+
TFW_REGEX_REGULAR,
422+
TFW_REGEX_CI,
423+
};
424+
418425
#define TFW_CFG_F_ADD (1 << TFW_CFG_B_ADD)
419426
#define TFW_CFG_F_DEL (1 << TFW_CFG_B_DEL)
420427
#define TFW_CFG_F_MOD (1 << TFW_CFG_B_MOD)

0 commit comments

Comments
 (0)