Skip to content

Commit

Permalink
Added support for JIT-compilation of patterns
Browse files Browse the repository at this point in the history
  • Loading branch information
mmottl committed Jul 22, 2021
1 parent 0c4ca03 commit efdf484
Show file tree
Hide file tree
Showing 4 changed files with 42 additions and 19 deletions.
8 changes: 8 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
### 7.5.0 (2021-07-22)

* Added support for JIT-compilation of patterns, which can greatly improve
matching performance for most patterns. Users need to explicitly add
the `jit_compile` flag to request JIT-compilation when creating regular
expressions.


### 7.4.6 (2020-08-04)

* Removed mistakenly kept base library configuration dependency.
Expand Down
16 changes: 10 additions & 6 deletions src/pcre.ml
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ type chtables
external maketables : unit -> chtables = "pcre_maketables_stub"

(* Internal use only! *)
external pcre_study : regexp -> unit = "pcre_study_stub"
external pcre_study : regexp -> jit_compile : bool -> unit = "pcre_study_stub"

external compile : (icflag [@untagged]) -> chtables option -> string -> regexp
= "pcre_compile_stub_bc" "pcre_compile_stub"
Expand All @@ -265,14 +265,16 @@ external set_imp_match_limit_recursion : regexp -> (int [@untagged]) -> regexp
[@@noalloc]

let regexp
?(study = true) ?limit ?limit_recursion
?(study = true)
?(jit_compile = false)
?limit ?limit_recursion
?(iflags = 0) ?flags ?chtables pat =
let rex =
match flags with
| Some flag_list -> compile (cflags flag_list) chtables pat
| _ -> compile iflags chtables pat
in
if study then pcre_study rex;
if study then pcre_study ~jit_compile rex;
let rex =
match limit with
| None -> rex
Expand All @@ -283,7 +285,8 @@ let regexp
| Some lim -> set_imp_match_limit_recursion rex lim

let regexp_or
?study ?limit ?limit_recursion ?(iflags = 0) ?flags ?chtables pats =
?study ?jit_compile ?limit ?limit_recursion ?(iflags = 0)
?flags ?chtables pats =
let check pat =
try ignore (regexp ~study:false ~iflags ?flags ?chtables pat)
with Error error -> raise (Regexp_or (pat, error))
Expand All @@ -293,7 +296,8 @@ let regexp_or
let cnv pat = "(?:" ^ pat ^ ")" in
String.concat "|" (List.rev (List.rev_map cnv pats))
in
regexp ?study ?limit ?limit_recursion ~iflags ?flags ?chtables big_pat
regexp ?study ?jit_compile ?limit ?limit_recursion ~iflags ?flags ?chtables
big_pat

let bytes_unsafe_blit_string str str_ofs bts bts_ofs len =
let str_bts = Bytes.unsafe_of_string str in
Expand Down Expand Up @@ -324,7 +328,7 @@ let quote s =
(* Matching of patterns and subpattern extraction *)

(* Default regular expression when none is provided by the user *)
let def_rex = regexp "\\s+"
let def_rex = regexp ~jit_compile:true "\\s+"

type substrings = string * int array

Expand Down
21 changes: 13 additions & 8 deletions src/pcre.mli
Original file line number Diff line number Diff line change
Expand Up @@ -231,21 +231,24 @@ val maketables : unit -> chtables

val regexp :
?study : bool ->
?jit_compile : bool ->
?limit : int ->
?limit_recursion : int ->
?iflags : icflag ->
?flags : cflag list ->
?chtables : chtables ->
string -> regexp
(** [regexp ?study ?limit ?limit_recursion ?iflags ?flags ?chtables pattern]
compiles [pattern] with [flags] when given, with [iflags] otherwise, and
with char tables [chtables]. If [study] is true, then the resulting regular
expression will be studied. If [limit] is specified, this sets a limit to
the amount of recursion and backtracking (only lower than the builtin
default!). If this limit is exceeded, [MatchLimit] will be raised during
matching.
(** [regexp ?jit_compile ?study ?limit ?limit_recursion ?iflags ?flags
?chtables pattern] compiles [pattern] with [flags] when given, with
[iflags] otherwise, and with char tables [chtables]. If [study] is true,
then the resulting regular expression will be studied. If [jit_compile]
is true, studying will also perform JIT-compilation of the pattern.
[If [limit] is specified, this sets a limit to the amount of recursion
and backtracking (only lower than the builtin default!). If this limit
is exceeded, [MatchLimit] will be raised during matching.
@param study default = true
@param jit_compile default = false
@param limit default = no extra limit other than default
@param limit_recursion default = no extra limit_recursion other than default
@param iflags default = no extra flags
Expand All @@ -261,6 +264,7 @@ val regexp :

val regexp_or :
?study : bool ->
?jit_compile : bool ->
?limit : int ->
?limit_recursion : int ->
?iflags : icflag ->
Expand All @@ -269,7 +273,8 @@ val regexp_or :
string list -> regexp
(** [regexp_or ?study ?limit ?limit_recursion ?iflags ?flags ?chtables patterns]
like {!regexp}, but combines [patterns] as alternatives (or-patterns) into
one regular expression. *)
one regular expression.
*)

val quote : string -> string
(** [quote str] @return the quoted string of [str]. *)
Expand Down
16 changes: 11 additions & 5 deletions src/pcre_stubs.c
Original file line number Diff line number Diff line change
Expand Up @@ -56,13 +56,18 @@

/* Error codes as defined for pcre 7.9, undefined in pcre 4.5 */
#ifndef PCRE_ERROR_PARTIAL
#define PCRE_ERROR_PARTIAL (-12)
# define PCRE_ERROR_PARTIAL (-12)
#endif
#ifndef PCRE_ERROR_BADPARTIAL
#define PCRE_ERROR_BADPARTIAL (-13)
# define PCRE_ERROR_BADPARTIAL (-13)
#endif
#ifndef PCRE_ERROR_RECURSIONLIMIT
#define PCRE_ERROR_RECURSIONLIMIT (-21)
# define PCRE_ERROR_RECURSIONLIMIT (-21)
#endif

/* Make sure to define JIT-compilation flag appropriately if unsupported */
#ifndef PCRE_STUDY_JIT_COMPILE
# define PCRE_STUDY_JIT_COMPILE 0
#endif

typedef const unsigned char *chartables; /* Type of chartable sets */
Expand Down Expand Up @@ -351,12 +356,13 @@ CAMLprim value pcre_compile_stub_bc(value v_opt, value v_tables, value v_pat)


/* Studies a regexp */
CAMLprim value pcre_study_stub(value v_rex)
CAMLprim value pcre_study_stub(value v_rex, value v_jit_compile)
{
/* If it has not yet been studied */
if (! get_studied(v_rex)) {
const char *error = NULL;
pcre_extra *extra = pcre_study(get_rex(v_rex), 0, &error);
int flags = Bool_val(v_jit_compile) ? PCRE_STUDY_JIT_COMPILE : 0;
pcre_extra *extra = pcre_study(get_rex(v_rex), flags, &error);
if (error != NULL) caml_invalid_argument((char *) error);
set_extra(v_rex, extra);
set_studied(v_rex, 1);
Expand Down

0 comments on commit efdf484

Please sign in to comment.