Probabilistic and other Data Structures
tl;dr
- The code will be gradually improved and be committed to git finally.
- Performance considerations are not paramount here.
- The language is OCaml and it is imperative even though I will attempt to use functional Data structures.
- The repository is this
Development Environment
Probabilistic and other Data Structures
Bloom Filter
let jenkins ss : int32 =
let rec hash_accu ( accu, l ):int32 =
match l with
| [] ->
let hs = Int32.add accu (Int32.shift_left accu 3) in
let hs1 = Int32.logxor hs (Int32.shift_right_logical hs 11) in
Int32.add (Int32.shift_left hs1 15) hs1
| hd :: tl ->
let h = Int32.add accu hd in
let accu = Int32.add h (Int32.shift_left h 10) in
hash_accu (Int32.logxor accu (Int32.shift_right_logical accu 6), tl)
(* | [] -> *)
(* let hs = accu + (accu lsl 3) in *)
(* let hs1 = hs lxor (hs lsr 11) in *)
(* Int32.of_int (hs1 + (hs1 lsl 15)) *)
(* | hd :: tl ->let h = accu + hd in *)
(* let accu = h + (h lsl 10) in *)
(* hash_accu ((accu lxor (accu lsr 6) ), tl) *)
in
hash_accu ((Int32.of_int 0 ),ss)
Initial set of tests
As I mentioned I am not considering space allocation here as the focus is on working code.
let string_to_print_list s =
let str = s |> String.to_seq |> List.of_seq in
let int_list = List.map int_of_char str in
List.iter (fun c -> Printf.printf "%d\n" c) int_list
let string_to_int_list s =
let str = s |> String.to_seq |> List.of_seq in
let int_list = List.map int_of_char str in
List.map (fun c -> Int32.of_int c) int_list
let%expect_test _=
let hash = Bloomfilter.Ds.jenkins (string_to_int_list "Hello") in
Printf.printf "%d\n" (Int32.to_int hash);
[%expect {| 1901914092 |}]
let%expect_test _=
string_to_print_list "Hello";
[%expect {|
72
101
108
108
111 |}]
Initial version with a list of hash functions.
type 'hf element =
{ value : 'hf
; mutable next : 'hf element option
}
type 'hf t = 'hf element option ref
let insert_hashfunc t value =
let new_hashfunc = { next = !t; value } in
(match !t with
| Some old_hashfunc -> old_hashfunc.next
<- Some new_hashfunc
| None -> ());
t := Some new_hashfunc;
new_hashfunc
Test
let%expect_test "hash" =
let empty_list() : 'hf Bloomfilter.Ds.t = ref None in
let l = empty_list() in
let hf = Bloomfilter.Ds.insert_hashfunc l Bloomfilter.Ds.jenkins in
let hash = hf.value (string_to_int_list "Hello") in
Printf.printf "%d\n" (Int32.to_int hash);
[%expect {| 1901914092 |}]
Test for Bit set and get
let%expect_test "bitset" =
let empty_list() : 'hf Bloomfilter.Ds.t = ref None in
let l = empty_list() in
let hf = Bloomfilter.Ds.insert_hashfunc l Bloomfilter.Ds.jenkins in
let bit = Bloomfilter.Ds.set_indices (Bloomfilter.Ds.create_filter 9) "Hello" hf.value
in
Batteries.BitSet.print (BatInnerIO.output_channel stdout) bit ;
[%expect {| 0000000000001000 |}]
let%expect_test "bitget" =
let empty_list() : 'hf Bloomfilter.Ds.t = ref None in
let l = empty_list() in
let hf = Bloomfilter.Ds.insert_hashfunc l Bloomfilter.Ds.jenkins in
let bit = Bloomfilter.Ds.get_indices (Bloomfilter.Ds.create_filter 9) "Hello" hf.value in
Printf.printf "%s\n" (string_of_bool bit);
[%expect {| true |}]
Bit set and get
The code will be further refactored and committed to my repository.
let set_indices filt element hf =
let length = Batteries.BitSet.capacity filt.bits in
let hash = hf (string_to_int_list element) in
let () = Batteries.BitSet.set filt.bits ((Int32.to_int hash) mod length) in
filt.bits
let get_indices filt element hf =
let length = Batteries.BitSet.capacity filt.bits in
let hash = hf (string_to_int_list element) in
let () = Batteries.BitSet.set filt.bits ((Int32.to_int hash) mod length) in
let bit = Batteries.BitSet.mem filt.bits ((Int32.to_int hash ) mod length) in
bit
Splay Tree
Initial set of tests
type 'a r_tree = Leaf | Node of 'a node1
and 'a node1 = { value : 'a; left : 'a r_tree; right : 'a r_tree; }
let rec check_splay_tree t =
match t with
|Leaf -> false
| Node {left; value = v; right}->
match left, right with
| Node { left = _; value = v0; _}, Node {left = _; value = v1; _} -> v == v1 + v0 + 1
| Node { left ; _}, Leaf -> check_splay_tree left
| Leaf, Node { left = _ ;value = _; right} -> check_splay_tree right
| _ -> false
let insert=
Node {
value = 2;
left = Node {value = 1; left = Leaf; right = Leaf};
right = Node {value = 3; left = Leaf; right = Leaf}
}
let%expect_test _=
Printf.printf "%s" (string_of_bool (check_splay_tree insert));
[%expect {| false |}]
We can print a small tree like this for debugging.
let rec print_sTree (sTree : int s_tree ) (d : int) : unit =
match sTree with
|Leaf -> ()
| Node { left ;value ; right} ->
print_sTree right (d + 1);
for __i=0 to (d - 1) do
Printf.printf " "
done;
Printf.printf "%d\n" value;
print_sTree left (d+1)
dune runtest –auto-promote updates the test output automatically.
Core Splay algorithm
At this stage the compiler is happy but very less progress is made. There is a steep learning curve here as I have to learn the language deeply.
Insert Key into a binary tree
At this stage the mutable imperative style is hard to debug.Moreover None and Some Leaf are both used redundantly. This led to a bug.
let rec insert_key (k : int ) (t : int splay_tree option ref) : int splay_tree option ref=
match !t with
| None |Some Leaf ->
let new_node = Node { key = k; value = 0; left = None; right = None } in
t := Some new_node;
t
| Some tree ->
let insert_node tree =
match tree with
| Node old_key ->
begin match old_key with
| ok ->
if k > ok.key then(
match ok.right with
| None | Some Leaf ->
let r = ref (Some (Node { key = k ;value = 0 ; right = Some Leaf; left = Some Leaf} ))in
ok.right <- !r;
t
| Some _r ->
insert_key k (ref (ok.right ))
)
else
if k < ok.key then(
match ok.left with
| None ->
let l = ref (Some (Node { key = k ;value = 0 ; right = Some Leaf; left = Some Leaf} ))in
ok.left <- !l;
t
| Some _l ->
insert_key k (ref (ok.left));
)
else
t
end;
|Leaf ->t
in
insert_node tree
Porting SML to OCaml
I spent several days coding a Splay tree using my inefficient mutable ref data structure. It didn’t work satisfactily. Eventually I picked up basic SML and ported the SML code to OCaml. This was a great learning experience as I learnt how to use Functors and abstractions and modules.
let rec splay (l, v, r) (k:Params.key) =
match compare k (keyOf (v)) with
| EQUAL -> (l, v, r)
| LESS ->
(match l with
| Empty -> (l, v, r) (* not found *)
| Node (ll, lv, lr) ->
match compare k (keyOf (lv)) with
| EQUAL -> (ll, lv, Node(lr, v, r)) (* 1: zig *)
| LESS ->
(match ll with
| Empty -> (Empty, lv, Node(lr, v, r))
(* not found *)
| Node (lln, lvn, lrn) as n -> (* 2: zig-zig *)
let (lll, llv, llr) = splay (lln, lvn, lrn) k in
(lll,llv,Node(llr,lv,Node(lr,v,r)))
)
| GREATER ->
(match lr with
| Empty -> (ll, lv, Node(Empty, v, r))
|Node (lln, lvn, lrn) as n -> (* 3: zig-zag *)
let (lrl, lrv, lrr) = splay (lln, lvn, lrn) k in
(Node(ll,lv,lrl),lrv,Node(lrr,v,r))
))
| GREATER ->
(match r with
| Empty -> (l, v, r) (* not found *)
| Node (rl, rv, rr) ->
match compare k (keyOf (rv)) with
|EQUAL -> (Node(l,v,rl),rv,rr) (* 1: zag *)
| GREATER ->
(match rr with
| Empty -> (Node(l,v,rl),rv,rr) (* not found *)
| Node (lln, lvn, lrn) as n -> (* 3: zag-zag *)
let (rrl, rrv, rrr) = splay (lln, lvn, lrn) k in
(Node(Node(l,v,rl),rv,rrl),rrv,rrr)
)
| LESS ->
(match rl with
| Empty -> (Node(l,v,rl),rv,rr) (* not found *)
| Node (lln, lvn, lrn) as n -> (* 2: zag-zig *)
let (rll, rlv, rlr) = splay (lln, lvn, lrn) k in
(Node(l,v,rll),rlv,Node(rlr,rv,rr))
))
let size s tr = s
type 'b folder = ((elem*'b)->'b) -> 'b -> key -> set -> 'b
let rec add ((size,tr):set) (e:elem) = let
((l,v,r), b) = add_tree !tr e in
let node = splay (l,v,r) (keyOf(e)) in
let size' = if b then size else size+1
in
let _ = Printf.printf "Size %d" size' in
((size', ref (Node((l,v,r)))),b) and
add_tree (t: tree) (e: elem) :node * bool =
match t with
|Empty -> ((Empty, e, Empty), false)
| Node (l,v,r) ->
(match compare (keyOf(v)) (keyOf(e)) with
| EQUAL -> ((l,e,r),true)
(* | GREATER -> let (n',b) = add_tree l e in *)
(* ((Node(n'),v,r),b) *)
(* | LESS -> let (n',b) = add_tree r e in *)
(* ((l,v,Node(n')),b) *)
| GREATER -> let ((x,y,z),b) = add_tree l e in
((Node(x,y,z),v,r),b)
| LESS -> let ((x,y,z),b) = add_tree r e in
((l,v,Node (x,y,z)),b)
)
Range-Minimum-Query
I set up the basic code for this. There is no query now. Code is in Git.
let preprocess_a l mk =
let ps =
let k = 0 -- mk
and i = 0 -- (List.length l -1 ) in
(k,i) in
let v = Array.make ((List.length l) * ( mk + 1)) 0 in
List.iter (fun (k, i) ->
let () = Printf.printf "[mk %d] [k %d] [i %d]\n" mk k i in
let ind = indx (List.length l ) in
match k with
| 0 ->
let index = ind i k in
let value = List.nth l (ind i 0) in
(* let () = Printf.printf "Value set is %d [k %d] [i %d]\n" value k i in *)
let v' = Array.set v index value in
Array.iter (fun elem -> Printf.printf " %d " elem) v
| _ ->
let i' = i + (Batteries.Int.pow 2 ( k - 1)) in
let p1 = Array.get v ( ind i (k - 1) ) in
let p2 = Array.get v ( ind i' (k - 1)) in
(* let () = Printf.printf "p1 is %d p2 is %d [k %d] [i %d]\n" p1 p2 k i in *)
let v' = Array.set v (ind i k ) ( min p1 p2) in
Array.iter (fun elem -> Printf.printf " %d " elem) v
) (enum_to_list ps)