[Pkg-ocaml-maint-commits] [ocaml-re] 03/15: Imported Upstream version 1.4.0
Stéphane Glondu
glondu at moszumanska.debian.org
Fri Aug 5 11:15:32 UTC 2016
This is an automated email from the git hooks/post-receive script.
glondu pushed a commit to branch master
in repository ocaml-re.
commit 83a2bd9be14678a3360523dc38dbd6ff6a783bd9
Author: Stephane Glondu <steph at glondu.net>
Date: Fri Aug 5 12:49:57 2016 +0200
Imported Upstream version 1.4.0
---
CHANGES | 9 +++-
_oasis | 2 +-
lib/META | 16 +++----
lib/re.ml | 118 ++++++++++++++++++++++++++++++--------------------
lib/re.mli | 17 ++++++++
lib/re_automata.ml | 81 +++++++++++++++++++++++++---------
lib/re_automata.mli | 19 +++++++-
lib_test/META | 4 +-
lib_test/fort_unit.ml | 2 +
lib_test/test_re.ml | 55 +++++++++++++++++++----
setup.ml | 6 +--
11 files changed, 236 insertions(+), 93 deletions(-)
diff --git a/CHANGES b/CHANGES
index f2f8938..ce32718 100644
--- a/CHANGES
+++ b/CHANGES
@@ -1,8 +1,13 @@
-1.3.2 (04-14-2015)
+1.4.0 (12-May-2015)
+
+* Add Re.{mark,marked,mark_set}. Regexps can now be "marked" to query post
+ execution if they matched.
+
+1.3.2 (14-Apr-2015)
* Fix replacing 0 length matches (#55)
-1.3.1 (03-13-2015):
+1.3.1 (13-Mar-2015):
* Rename {Cset, Automata} to {Re_cset, Re_automata}
diff --git a/_oasis b/_oasis
index 2868ecc..55bae28 100644
--- a/_oasis
+++ b/_oasis
@@ -1,6 +1,6 @@
OASISFormat: 0.4
Name: re
-Version: 1.3.2
+Version: 1.4.0
Synopsis: Pure OCaml regular expression library
Authors: Jerome Vouillon, Thomas Gazagnaire, Anil Madhavapeddy
License: LGPL-2.0 with OCaml linking exception
diff --git a/lib/META b/lib/META
index 7db7077..50a0f19 100644
--- a/lib/META
+++ b/lib/META
@@ -1,6 +1,6 @@
# OASIS_START
-# DO NOT EDIT (digest: e997602454e61eb029a2d6e192406664)
-version = "1.3.2"
+# DO NOT EDIT (digest: 0ec3f389164db7817d6321d87434419c)
+version = "1.4.0"
description = "Pure OCaml regular expression library"
requires = "bytes"
archive(byte) = "re.cma"
@@ -9,7 +9,7 @@ archive(native) = "re.cmxa"
archive(native, plugin) = "re.cmxs"
exists_if = "re.cma"
package "str" (
- version = "1.3.2"
+ version = "1.4.0"
description = "Str-compatible regexps"
requires = "re re.emacs"
archive(byte) = "re_str.cma"
@@ -20,7 +20,7 @@ package "str" (
)
package "posix" (
- version = "1.3.2"
+ version = "1.4.0"
description = "POSIX-compatible regexps"
requires = "re"
archive(byte) = "re_posix.cma"
@@ -31,7 +31,7 @@ package "posix" (
)
package "perl" (
- version = "1.3.2"
+ version = "1.4.0"
description = "Perl-compatible regexps"
requires = "re"
archive(byte) = "re_perl.cma"
@@ -42,7 +42,7 @@ package "perl" (
)
package "pcre" (
- version = "1.3.2"
+ version = "1.4.0"
description = "subset of PCRE using the Re engine"
requires = "re re.perl"
archive(byte) = "re_pcre.cma"
@@ -53,7 +53,7 @@ package "pcre" (
)
package "glob" (
- version = "1.3.2"
+ version = "1.4.0"
description = "Shell glob regexps"
requires = "re"
archive(byte) = "re_glob.cma"
@@ -64,7 +64,7 @@ package "glob" (
)
package "emacs" (
- version = "1.3.2"
+ version = "1.4.0"
description = "Emacs-compatible regexps"
requires = "re"
archive(byte) = "re_emacs.cma"
diff --git a/lib/re.ml b/lib/re.ml
index 4ac29df..d83264c 100644
--- a/lib/re.ml
+++ b/lib/re.ml
@@ -22,6 +22,7 @@
module Cset = Re_cset
module Automata = Re_automata
+module MarkSet = Automata.PmarkSet
let rec first f l =
match l with
@@ -37,10 +38,21 @@ let rec iter n f v = if n = 0 then v else iter (n - 1) f (f v)
let unknown = -2
let break = -3
-type 'a match_info =
- [ `Match of 'a
- | `Failed
- | `Running ]
+(* Result of a successful match. *)
+type substrings = {
+ s : string ;
+ marks : Automata.mark_infos ;
+ pmarks : MarkSet.t ;
+ gpos : int array ;
+ gcount : int
+}
+
+type match_info =
+ | Match of substrings
+ | Failed
+ | Running
+
+type markid = MarkSet.elt
type state =
{ idx : int;
@@ -55,7 +67,7 @@ type state =
(* Transition table, indexed by color *)
mutable final :
(Automata.category *
- (Automata.idx * Automata.mark_infos match_info)) list;
+ (Automata.idx * Automata.status)) list;
(* Mapping from the category of the next character to
- the index where the next position should be saved
- possibly, the list of marks (and the corresponding indices)
@@ -102,6 +114,7 @@ type info =
mutable last : int
(* Position where the match should stop *) }
+
(****)
let cat_inexistant = 1
@@ -138,8 +151,8 @@ let count = ref 0
let mk_state ncol ((idx, _, _, _, _) as desc) =
let break_state =
match Automata.status desc with
- `Running -> false
- | _ -> true
+ Automata.Running -> false
+ | _ -> true
in
{ idx = if break_state then break else idx;
real_idx = idx;
@@ -342,10 +355,10 @@ let match_str groups partial re s pos len =
res
in
match res with
- `Match m ->
- `Match (s, m, info.positions, re.group_count)
- | (`Failed | `Running) as res ->
- res
+ Automata.Match (marks, pmarks) ->
+ Match { s ; marks; pmarks ; gpos = info.positions; gcount = re.group_count}
+ | Automata.Failed -> Failed
+ | Automata.Running -> Running
let mk_re init cols col_repr ncol lnl group_count =
{ initial = init;
@@ -419,6 +432,7 @@ type regexp =
| Intersection of regexp list
| Complement of regexp list
| Difference of regexp * regexp
+ | Pmark of markid * regexp
let rec is_charset r =
match r with
@@ -433,7 +447,8 @@ let rec is_charset r =
is_charset r
| Sequence _ | Repeat _ | Beg_of_line | End_of_line
| Beg_of_word | End_of_word | Beg_of_str | End_of_str
- | Not_bound | Last_end_of_line | Start | Stop | Group _ | Nest _ ->
+ | Not_bound | Last_end_of_line | Start | Stop
+ | Group _ | Nest _ | Pmark (_,_)->
false
(**** Colormap ****)
@@ -472,7 +487,7 @@ let colorize c regexp =
| Sem (_, r)
| Sem_greedy (_, r)
| Group r | No_group r
- | Nest r -> colorize r
+ | Nest r | Pmark (_,r) -> colorize r
| Case _ | No_case _
| Intersection _
| Complement _
@@ -539,6 +554,8 @@ let rec equal x1 x2 =
eq_list l1 l2
| Difference (x1', x1''), Difference (x2', x2'') ->
equal x1' x2' && equal x1'' x2''
+ | Pmark (m1, r1), Pmark (m2, r2) ->
+ Automata.Pmark.equal m1 m2 && equal r1 r2
| _ ->
false
@@ -690,6 +707,10 @@ let rec translate ids kind ign_group ign_case greedy pos cache (c:Bytes.t) r =
(A.seq ids `First (A.erase ids b e) cr, kind')
| Difference _ | Complement _ | Intersection _ | No_case _ | Case _ ->
assert false
+ | Pmark (i, r') ->
+ let (cr, kind') =
+ translate ids kind ign_group ign_case greedy pos cache c r' in
+ (A.seq ids `First (A.pmark ids i) cr, kind')
and trans_seq ids kind ign_group ign_case greedy pos cache c l =
match l with
@@ -775,6 +796,7 @@ let rec handle_case ign_case r =
| Difference (r, r') ->
Set (Cset.inter (as_set (handle_case ign_case r))
(Cset.diff cany (as_set (handle_case ign_case r'))))
+ | Pmark (i,r) -> Pmark (i,handle_case ign_case r)
(****)
@@ -811,7 +833,7 @@ let rec anchored r =
| Beg_of_str | Start ->
true
| Sem (_, r) | Sem_greedy (_, r) | Group r | No_group r | Nest r
- | Case r | No_case r ->
+ | Case r | No_case r | Pmark (_, r) ->
anchored r
(****)
@@ -863,6 +885,7 @@ let non_greedy r = Sem_greedy (`Non_greedy, r)
let group r = Group r
let no_group r = No_group r
let nest r = Nest r
+let mark r = let i = Automata.Pmark.gen () in (i,Pmark (i,r))
let set str =
let s = ref [] in
@@ -914,8 +937,6 @@ let no_case r = No_case r
(****)
-type substrings = (string * Automata.mark_infos * int array * int)
-
let compile r =
compile_1 (if anchored r then group r else seq [shortest (rep any); group r])
@@ -923,23 +944,23 @@ let exec ?(pos = 0) ?(len = -1) re s =
if pos < 0 || len < -1 || pos + len > String.length s then
invalid_arg "Re.exec";
match match_str true false re s pos len with
- `Match substr -> substr
- | _ -> raise Not_found
+ Match substr -> substr
+ | _ -> raise Not_found
let execp ?(pos = 0) ?(len = -1) re s =
if pos < 0 || len < -1 || pos + len > String.length s then
invalid_arg "Re.execp";
match match_str false false re s pos len with
- `Match substr -> true
+ Match substr -> true
| _ -> false
let exec_partial ?(pos = 0) ?(len = -1) re s =
if pos < 0 || len < -1 || pos + len > String.length s then
invalid_arg "Re.exec_partial";
match match_str false true re s pos len with
- `Match _ -> `Full
- | `Running -> `Partial
- | `Failed -> `Mismatch
+ Match _ -> `Full
+ | Running -> `Partial
+ | Failed -> `Mismatch
let rec find_mark (i : int) l =
match l with
@@ -948,20 +969,20 @@ let rec find_mark (i : int) l =
| (j, idx) :: r ->
if i = j then idx else find_mark i r
-let get (s, marks, pos, _) i =
+let get {s ; marks ; gpos} i =
if 2 * i + 1 >= Array.length marks then raise Not_found;
let m1 = marks.(2 * i) in
if m1 = -1 then raise Not_found;
- let p1 = pos.(m1) - 1 in
- let p2 = pos.(marks.(2 * i + 1)) - 1 in
+ let p1 = gpos.(m1) - 1 in
+ let p2 = gpos.(marks.(2 * i + 1)) - 1 in
String.sub s p1 (p2 - p1)
-let get_ofs (s, marks, pos, _) i =
+let get_ofs {s ; marks ; gpos} i =
if 2 * i + 1 >= Array.length marks then raise Not_found;
let m1 = marks.(2 * i) in
if m1 = -1 then raise Not_found;
- let p1 = pos.(m1) - 1 in
- let p2 = pos.(marks.(2 * i + 1)) - 1 in
+ let p1 = gpos.(m1) - 1 in
+ let p2 = gpos.(marks.(2 * i + 1)) - 1 in
(p1, p2)
type 'a gen = unit -> 'a option
@@ -983,12 +1004,12 @@ let all_gen ?(pos=0) ?len re s =
if !pos >= limit
then None (* no more matches *)
else match match_str true false re s !pos (limit - !pos) with
- | `Match substr ->
+ | Match substr ->
let p1, p2 = get_ofs substr 0 in
pos := if p1=p2 then p2+1 else p2;
Some substr
- | `Running
- | `Failed -> None
+ | Running
+ | Failed -> None
let all ?pos ?len re s =
let l = ref [] in
@@ -1041,7 +1062,7 @@ let split_full_gen ?(pos=0) ?len re s =
) else None
| `Idle ->
begin match match_str true false re s !pos (limit - !pos) with
- | `Match substr ->
+ | Match substr ->
let p1, p2 = get_ofs substr 0 in
pos := if p1=p2 then p2+1 else p2;
let old_i = !i in
@@ -1052,8 +1073,8 @@ let split_full_gen ?(pos=0) ?len re s =
state := `Yield (`Delim substr);
Some (`Text text)
) else Some (`Delim substr)
- | `Running -> None
- | `Failed ->
+ | Running -> None
+ | Failed ->
if !i < limit
then (
let text = String.sub s !i (limit - !i) in
@@ -1105,7 +1126,7 @@ let replace ?(pos=0) ?len ?(all=true) re ~f s =
let rec iter pos =
if pos < limit
then match match_str true false re s pos (limit-pos) with
- | `Match substr ->
+ | Match substr ->
let p1, p2 = get_ofs substr 0 in
(* add string between previous match and current match *)
Buffer.add_substring buf s pos (p1-pos);
@@ -1122,8 +1143,8 @@ let replace ?(pos=0) ?len ?(all=true) re ~f s =
p2+1)
else p2)
else Buffer.add_substring buf s p2 (limit-p2)
- | `Running -> ()
- | `Failed ->
+ | Running -> ()
+ | Failed ->
Buffer.add_substring buf s pos (limit-pos)
in
iter pos;
@@ -1132,20 +1153,20 @@ let replace ?(pos=0) ?len ?(all=true) re ~f s =
let replace_string ?pos ?len ?all re ~by s =
replace ?pos ?len ?all re s ~f:(fun _ -> by)
-let test (s, marks, pos, _) i =
+let test { s ; marks } i =
if 2 * i >= Array.length marks then false else
let idx = marks.(2 * i) in
idx <> -1
let dummy_offset = (-1, -1)
-let get_all_ofs (s, marks, pos, count) =
- let res = Array.make count dummy_offset in
+let get_all_ofs {s ; marks ; gpos ; gcount } =
+ let res = Array.make gcount dummy_offset in
for i = 0 to Array.length marks / 2 - 1 do
let m1 = marks.(2 * i) in
if m1 <> -1 then begin
- let p1 = pos.(m1) in
- let p2 = pos.(marks.(2 * i + 1)) in
+ let p1 = gpos.(m1) in
+ let p2 = gpos.(marks.(2 * i + 1)) in
res.(i) <- (p1 - 1, p2 - 1)
end
done;
@@ -1153,18 +1174,23 @@ let get_all_ofs (s, marks, pos, count) =
let dummy_string = ""
-let get_all (s, marks, pos, count) =
- let res = Array.make count dummy_string in
+let get_all {s ; marks ; gpos ; gcount } =
+ let res = Array.make gcount dummy_string in
for i = 0 to Array.length marks / 2 - 1 do
let m1 = marks.(2 * i) in
if m1 <> -1 then begin
- let p1 = pos.(m1) in
- let p2 = pos.(marks.(2 * i + 1)) in
+ let p1 = gpos.(m1) in
+ let p2 = gpos.(marks.(2 * i + 1)) in
res.(i) <- String.sub s (p1 - 1) (p2 - p1)
end
done;
res
+let marked {pmarks} p =
+ Automata.PmarkSet.mem p pmarks
+
+let mark_set s = s.pmarks
+
(**********************************)
(*
diff --git a/lib/re.mli b/lib/re.mli
index 5a15e48..1648c1d 100644
--- a/lib/re.mli
+++ b/lib/re.mli
@@ -79,6 +79,18 @@ val get_all_ofs : substrings -> (int * int) array
val test : substrings -> int -> bool
(** Test whether a group matched *)
+(** {2 Marks} *)
+
+type markid
+(** Mark id *)
+
+module MarkSet : Set.S with type elt = markid
+
+val marked : substrings -> markid -> bool
+(** Tell if a mark was matched. *)
+
+val mark_set : substrings -> MarkSet.t
+
(** {2 High Level Operations} *)
type 'a gen = unit -> 'a option
@@ -262,6 +274,11 @@ val nest : t -> t
(** when matching against [nest e], only the group matching in the
last match of e will be considered as matching *)
+
+
+val mark : t -> markid * t
+(** Mark a regexp. the markid can then be used to know if this regexp was used. *)
+
(** {2 Character sets} *)
val set : string -> t
diff --git a/lib/re_automata.ml b/lib/re_automata.ml
index 7b8ae0c..9a808e6 100644
--- a/lib/re_automata.ml
+++ b/lib/re_automata.ml
@@ -30,6 +30,20 @@ type category = int
type mark = int
type idx = int
+module Pmark : sig
+ type t = private int
+ val equal : t -> t -> bool
+ val compare : t -> t -> int
+ val gen : unit -> t
+end
+= struct
+ type t = int
+ let equal (x : int) (y : int) = x = y
+ let compare (x : int) (y : int) = compare x y
+ let r = ref 0
+ let gen () = incr r ; !r
+end
+
type expr = { id : int; def : def }
and def =
@@ -42,16 +56,22 @@ and def =
| Erase of int * int
| Before of category
| After of category
+ | Pmark of Pmark.t
let def e = e.def
-type mark_offsets = (int * int) list
+module PmarkSet = Set.Make(Pmark)
+
+type mark_offsets = { marks : (int * int) list ; pmarks : PmarkSet.t }
+
+let empty_mark = { marks = [] ; pmarks = PmarkSet.empty }
type e =
TSeq of e list * expr * sem
| TExp of mark_offsets * expr
| TMatch of mark_offsets
+
(****)
let print_kind ch k =
@@ -78,6 +98,8 @@ let rec print_expr ch e =
Format.fprintf ch "@[<3>(rep@ %a %a)@]" print_kind k print_expr e
| Mark i ->
Format.fprintf ch "@[<3>(mark@ %d)@]" i
+ | Pmark i ->
+ Format.fprintf ch "@[<3>(pmark@ %d)@]" (i :> int)
| Erase (b, e) ->
Format.fprintf ch "@[<3>(erase@ %d %d)@]" b e
| Before c ->
@@ -85,8 +107,9 @@ let rec print_expr ch e =
| After c ->
Format.fprintf ch "@[<3>(after@ %d)@]" c
+
let print_marks ch l =
- match l with
+ match l.marks with
[] ->
()
| (a, i) :: r ->
@@ -170,6 +193,8 @@ let rep ids kind sem x = mk_expr ids (Rep (kind, sem, x))
let mark ids m = mk_expr ids (Mark m)
+let pmark ids i = mk_expr ids (Pmark i)
+
let erase ids m m' = mk_expr ids (Erase (m, m'))
let before ids c = mk_expr ids (Before c)
@@ -188,7 +213,7 @@ let tseq kind x y rem =
let rec rename ids x =
match x.def with
- Cst _ | Eps | Mark _ | Erase _ | Before _ | After _ ->
+ Cst _ | Eps | Mark _ | Pmark _ | Erase _ | Before _ | After _ ->
mk_expr ids x.def
| Alt l ->
mk_expr ids (Alt (List.map (rename ids) l))
@@ -201,17 +226,20 @@ let rec rename ids x =
type hash = int
type mark_infos = int array
-type status = [`Failed | `Match of mark_infos | `Running]
+type status = Failed | Match of mark_infos * PmarkSet.t | Running
type state = int * category * e list * status option ref * hash
let dummy_state = (-1, -1, [], ref None, -1)
let hash_combine h accu = accu * 65599 + h
-let rec hash_marks l accu =
+let rec hash_marks_offset l accu =
match l with
[] -> accu
- | (a, i) :: r -> hash_marks r (hash_combine a (hash_combine i accu))
+ | (a, i) :: r -> hash_marks_offset r (hash_combine a (hash_combine i accu))
+
+let hash_marks m accu =
+ hash_marks_offset m.marks (hash_combine (Hashtbl.hash m.pmarks) accu)
let rec hash_e l accu =
match l with
@@ -230,7 +258,7 @@ let hash_state idx cat desc =
let mk_state idx cat desc = (idx, cat, desc, ref None, hash_state idx cat desc)
-let create_state cat e = mk_state 0 cat [TExp ([], e)]
+let create_state cat e = mk_state 0 cat [TExp (empty_mark, e)]
let rec equal_e l1 l2 =
match l1, l2 with
@@ -281,9 +309,9 @@ let rec mark_used_indices tbl l =
TSeq (l, _, _) ->
mark_used_indices tbl l
| TExp (marks, _) ->
- List.iter (fun (_, i) -> if i >= 0 then tbl.(i) <- true) marks
+ List.iter (fun (_, i) -> if i >= 0 then tbl.(i) <- true) marks.marks
| TMatch marks ->
- List.iter (fun (_, i) -> if i >= 0 then tbl.(i) <- true) marks)
+ List.iter (fun (_, i) -> if i >= 0 then tbl.(i) <- true) marks.marks)
l
let rec find_free tbl idx len =
@@ -347,14 +375,14 @@ let rec set_idx used idx l =
[] ->
[]
| TMatch marks :: r ->
- TMatch (marks_set_idx used idx marks) :: set_idx used idx r
+ TMatch {marks with marks = marks_set_idx used idx marks.marks} :: set_idx used idx r
| TSeq (l', x, kind) :: r ->
TSeq (set_idx used idx l', x, kind) :: set_idx used idx r
| TExp (marks, x) :: r ->
- TExp (marks_set_idx used idx marks, x) :: set_idx used idx r
+ TExp ({marks with marks = marks_set_idx used idx marks.marks}, x) :: set_idx used idx r
let filter_marks b e marks =
- List.filter (fun (i, _) -> i < b || i > e) marks
+ {marks with marks = List.filter (fun (i, _) -> i < b || i > e) marks.marks }
let rec delta_1 marks c cat' cat x rem =
(*Format.eprintf "%d at ." x.id;*)
@@ -383,7 +411,11 @@ let rec delta_1 marks c cat' cat x rem =
| Eps ->
TMatch marks :: rem
| Mark i ->
- TMatch ((i, -1) :: List.remove_assq i marks) :: rem
+ let marks = { marks with marks = (i, -1) :: List.remove_assq i marks.marks } in
+ TMatch marks :: rem
+ | Pmark i ->
+ let marks = { marks with pmarks = PmarkSet.add i marks.pmarks } in
+ TMatch marks :: rem
| Erase (b, e) ->
TMatch (filter_marks b e marks) :: rem
| Before cat'' ->
@@ -493,17 +525,22 @@ let rec restrict s l =
let rec remove_marks b e rem =
if b > e then rem else remove_marks b (e - 1) ((e, -2) :: rem)
-let rec merge_marks old nw =
+let rec merge_marks_offset old nw =
match nw with
[] ->
old
| (i, v) :: rem ->
- let nw' = merge_marks (List.remove_assq i old) rem in
+ let nw' = merge_marks_offset (List.remove_assq i old) rem in
if v = -2 then
nw'
else
(i, v) :: nw'
+let merge_marks old nw =
+ { marks = merge_marks_offset old.marks nw.marks ;
+ pmarks = PmarkSet.union old.pmarks nw.pmarks }
+
+
let rec prepend_marks_expr m e =
match e with
TSeq (l, e', s) -> TSeq (prepend_marks_expr_lst m l, e', s)
@@ -547,10 +584,12 @@ let rec deriv_1 all_chars categories marks cat x rem =
| Eps ->
prepend all_chars [TMatch marks] rem
| Mark i ->
- prepend all_chars [TMatch ((i, -1) :: List.remove_assq i marks)] rem
+ prepend all_chars [TMatch {marks with marks = ((i, -1) :: List.remove_assq i marks.marks)}] rem
+ | Pmark _ ->
+ prepend all_chars [TMatch marks] rem
| Erase (b, e) ->
prepend all_chars
- [TMatch (remove_marks b e (filter_marks b e marks))] rem
+ [TMatch {marks with marks = (remove_marks b e (filter_marks b e marks).marks)}] rem
| Before cat' ->
prepend (List.assq cat' categories) [TMatch marks] rem
| After cat' ->
@@ -569,7 +608,7 @@ and deriv_seq all_chars categories cat kind y z rem =
List.exists (fun x -> match x with TMatch _ -> true | _ -> false) xl)
y
then
- let z' = deriv_1 all_chars categories [] cat z [(all_chars, [])] in
+ let z' = deriv_1 all_chars categories empty_mark cat z [(all_chars, [])] in
List.fold_right
(fun (s, y) rem ->
match
@@ -648,9 +687,9 @@ let status (_, _, desc, status, _) =
| None ->
let st =
match desc with
- [] -> `Failed
- | TMatch m :: _ -> `Match (flatten_match m)
- | _ -> `Running
+ [] -> Failed
+ | TMatch m :: _ -> Match (flatten_match m.marks, m.pmarks)
+ | _ -> Running
in
status := Some st;
st
diff --git a/lib/re_automata.mli b/lib/re_automata.mli
index 37c6892..6be3100 100644
--- a/lib/re_automata.mli
+++ b/lib/re_automata.mli
@@ -28,6 +28,13 @@ type mark = int
type sem = [ `Longest | `Shortest | `First ]
type rep_kind = [ `Greedy | `Non_greedy ]
+module Pmark : sig
+ type t = private int
+ val equal : t -> t -> bool
+ val compare : t -> t -> int
+ val gen : unit -> t
+end
+
type expr
type def =
Cst of Re_cset.t
@@ -39,6 +46,7 @@ type def =
| Erase of mark * mark
| Before of category
| After of category
+ | Pmark of Pmark.t
val def : expr -> def
val print_expr : Format.formatter -> expr -> unit
@@ -52,6 +60,7 @@ val seq : ids -> sem -> expr -> expr -> expr
val eps : ids -> expr
val rep : ids -> rep_kind -> sem -> expr -> expr
val mark : ids -> mark -> expr
+val pmark : ids -> Pmark.t -> expr
val erase : ids -> mark -> mark -> expr
val before : ids -> category -> expr
val after : ids -> category -> expr
@@ -60,10 +69,16 @@ val rename : ids -> expr -> expr
(****)
+module PmarkSet : Set.S with type elt = Pmark.t
+
(* States of the automata *)
type idx = int
-type mark_offsets = (mark * idx) list
+type mark_offsets = {
+ marks : (mark * idx) list ;
+ pmarks : PmarkSet.t
+}
+
type e =
TSeq of e list * expr * sem
| TExp of mark_offsets * expr
@@ -73,7 +88,7 @@ val print_state : Format.formatter -> e list -> unit
type hash
type mark_infos = int array
-type status = [`Failed | `Match of mark_infos | `Running]
+type status = Failed | Match of mark_infos * PmarkSet.t | Running
type state =
idx * category * e list * status option ref * hash
val dummy_state : state
diff --git a/lib_test/META b/lib_test/META
index 8a12489..5ee983b 100644
--- a/lib_test/META
+++ b/lib_test/META
@@ -1,6 +1,6 @@
# OASIS_START
-# DO NOT EDIT (digest: 2885ff6f6d78fedd8087ea393239740f)
-version = "1.3.2"
+# DO NOT EDIT (digest: 47d093767821af6123fc65ec99e824e0)
+version = "1.4.0"
description = "Pure OCaml regular expression library"
requires = "oUnit"
archive(byte) = "fort_unit.cma"
diff --git a/lib_test/fort_unit.ml b/lib_test/fort_unit.ml
index e82dc66..8ed5d06 100644
--- a/lib_test/fort_unit.ml
+++ b/lib_test/fort_unit.ml
@@ -21,6 +21,8 @@ let collected_tests = ref []
let id x = x
let not_found () = raise Not_found
+let bool_printer i = Printf.sprintf "%b" i
+let int_printer i = Printf.sprintf "%d" i
let str_printer s = "\"" ^ String.escaped s ^ "\""
let ofs_printer (i0,i1) = Printf.sprintf "(%d,%d)" i0 i1
let list_printer f l =
diff --git a/lib_test/test_re.ml b/lib_test/test_re.ml
index 4ccce5e..1d8174c 100644
--- a/lib_test/test_re.ml
+++ b/lib_test/test_re.ml
@@ -17,16 +17,28 @@ let re_fail ?pos ?len r s =
(fun () -> get_all_ofs (exec ?pos ?len (compile r) s)) ()
;;
+let correct_mark ?pos ?len r s il1 il2 =
+ expect_equal_app
+ ~msg:(str_printer s)
+ ~printer:bool_printer
+ id true
+ (fun () ->
+ let subs = exec ?pos ?len (compile r) s in
+ List.for_all (marked subs) il1 &&
+ List.for_all (fun x -> not @@ marked subs x) il2
+ ) ()
+;;
+
(* Substring Extraction *)
-let _ =
+let _ =
let r =
- seq [group (char 'a');
- opt (group (char 'a'));
+ seq [group (char 'a');
+ opt (group (char 'a'));
group (char 'b')]
in
let m = exec (compile r) "ab" in
-
+
expect_pass "get" (fun () ->
expect_eq_str id "ab" (get m) 0;
expect_eq_str id "a" (get m) 1;
@@ -310,8 +322,8 @@ let _ =
expect_pass "group" (fun () ->
let r =
- seq [group (char 'a');
- opt (group (char 'a'));
+ seq [group (char 'a');
+ opt (group (char 'a'));
group (char 'b')]
in
expect_eq_arr_ofs
@@ -322,8 +334,8 @@ let _ =
expect_pass "no_group" (fun () ->
let r =
no_group (
- seq [group (char 'a');
- opt (group (char 'a'));
+ seq [group (char 'a');
+ opt (group (char 'a'));
group (char 'b')]
)
in
@@ -340,6 +352,33 @@ let _ =
re_match r "ba" [|(0,2); (1, 2)|];
);
+ expect_pass "mark" (fun () ->
+ let i, r = mark digit in
+ correct_mark r "0" [i] [];
+ );
+
+ expect_pass "mark seq" (fun () ->
+ let i, r = mark digit in
+ let r = seq [r; r] in
+ correct_mark r "02" [i] [] ;
+ );
+
+ expect_pass "mark rep" (fun () ->
+ let i, r = mark digit in
+ let r = rep r in
+ correct_mark r "02" [i] [];
+ );
+
+ expect_pass "mark alt" (fun () ->
+ let ia, ra = mark @@ char 'a' in
+ let ib, rb = mark @@ char 'b' in
+ let r = alt [ra ; rb] in
+ correct_mark r "a" [ia] [ib];
+ correct_mark r "b" [ib] [ia];
+ let r = rep r in
+ correct_mark r "ab" [ia; ib] [] ;
+ );
+
(* Character set *)
expect_pass "set" (fun () ->
diff --git a/setup.ml b/setup.ml
index 0f4a5d0..22d86b9 100644
--- a/setup.ml
+++ b/setup.ml
@@ -1,7 +1,7 @@
(* setup.ml generated for the first time by OASIS v0.2.1~alpha1 *)
(* OASIS_START *)
-(* DO NOT EDIT (digest: 3ef35ef7f2d7f78eb25b15c78f7786c8) *)
+(* DO NOT EDIT (digest: 3e45a4ad95116e03dae9381533aa4371) *)
(*
Regenerated by OASIS v0.4.5
Visit http://oasis.forge.ocamlcore.org for more information and
@@ -6981,7 +6981,7 @@ let setup_t =
alpha_features = ["compiled_setup_ml"];
beta_features = [];
name = "re";
- version = "1.3.2";
+ version = "1.4.0";
license =
OASISLicense.DEP5License
(OASISLicense.DEP5Unit
@@ -7681,7 +7681,7 @@ let setup_t =
};
oasis_fn = Some "_oasis";
oasis_version = "0.4.5";
- oasis_digest = Some "�mm\145\031�����s[\015���";
+ oasis_digest = Some "�\027�\145)�\158\016�\135�\011�`\012�";
oasis_exec = None;
oasis_setup_args = [];
setup_update = false
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-ocaml-maint/packages/ocaml-re.git
More information about the Pkg-ocaml-maint-commits
mailing list