Bitcask
open Base
open Checkseum
open Stdlib
type entry = {
mutable checksum : int32;
key : Bytes.t list;
value : Bytes.t list
}
let entry key (value : Bytes.t list ) =
let rec bytes_concat final_bytes v =
match v with
| [] -> final_bytes
| hd :: tl -> bytes_concat (Bytes.cat final_bytes hd) tl
in
let final_bytes = bytes_concat Bytes.empty value
in
let new_entry = {
checksum = Crc32.to_int32 (Crc32.digest_bytes final_bytes 0 (Bytes.length final_bytes) Crc32.default) ;
key = key;
value = value;
}
in
new_entry
Adaptive Radix Tree
tl;dr
- The code will be gradually improved and be committed to git finally. The version on this page is compiled but unfinished until I mention it here.
- Performance considerations are not paramount here. But building lists like this again and again instread of using Array is not at all recommended.
let n_4keys = List.mapi (fun j el -> if i = j then
List.nth keys (i - 1)
else el) keys in (* TODO Array is mutable and needed here*)
- The algorithm seems complicated and nodes can be added wrongly. But I am trying to test is as thoroughly as possible.
- My OCaml code is still improving. Loops I code seem to distract from the underlying logic. It should be more functional with proper comments.
open Batteries
open Bigarray
module type Iterator =
sig
val has_next: 'n list -> bool
(* val next : 'n *)
end
module RADIX ( Iter : Iterator ) = struct
type meta =
| Prefix of (Bytes.t list) * int * int
[@@deriving show]
and
leaf_node =
Key of Bytes.t list
and
inner_node =
meta *
node_type *
Bytes.t list *
children
and
node_type =
Node4 of int
| Node16 of int
| Node48 of int
| Node256 of int
| Leaf of int
and
node =
| Inner_node of inner_node
| Leaf of leaf_node
| Empty
and
level =
Node of node *
int
and
iterator =
Tree of tree *
node *
int *
level list
and
tree =
Root of node
and
children = node CCArray.t
(* [@printer *)
(* fun fmt arr -> fprintf fmt "%a" (CCArray.pp pp_node) arr] *)
[@@deriving show] (* only one call to `deriving show` is not enough *)
let node4 = 0
let node16 = 1
let node48 = 2
let node256 = 3
let leaf = 44
let node4min = 2
let node4max = 4
let node16min = node4max + 1
let node16max = 16
let node48min = node16max + 1
let node48max = 48
let node256min = node48max + 1
let node256max = 256
let maxprefixlen = 10
let nullIdx = -1
type keyfield = (int32, int32_elt, c_layout) Array1.t
let char_list_to_byte_list cl =
let bl = [] in
List.map ( fun c -> bl @ [(Bytes.init 1 ( fun _ -> c ) )]) cl
let make_node_list nodemax node_type =
let rec loop_while node_list j_dx =
if j_dx < nodemax then(
let b = Bytes.create maxprefixlen |> Bytes.to_seq |> List.of_seq in
let b1 = Bytes.create node4max |> Bytes.to_seq |> List.of_seq in
let inn = (
Prefix((List.map List.hd (char_list_to_byte_list b)), 0 , 0), (* Redundant *)
node_type,
List.map List.hd (char_list_to_byte_list b1),
CCArray.make nodemax Empty
) in
loop_while (node_list @ [Inner_node inn]) (j_dx + 1 );
) else CCArray.of_list node_list
in
loop_while [] 0
let new_node4 =
let b = Bytes.create maxprefixlen |> Bytes.to_seq |> List.of_seq in
let b1 = Bytes.create node4max |> Bytes.to_seq |> List.of_seq in
let inn = (
Prefix ((List.map List.hd (char_list_to_byte_list b)), 0 , 0), (* Redundant *)
Node4 node4,
List.map List.hd (char_list_to_byte_list b1),
make_node_list node4max (Node4 node4))
in
inn
let new_node16 : inner_node =
let b = Bytes.create maxprefixlen |> Bytes.to_seq |> List.of_seq in
let b1 = Bytes.create node16max |> Bytes.to_seq |> List.of_seq in
let inn = (
Prefix((List.map List.hd (char_list_to_byte_list b)), 0 , 0), (* Redundant *)
Node16 node16,
List.map List.hd (char_list_to_byte_list b1),
make_node_list node16max (Node16 node16))
in
inn
let new_node48 : inner_node =
let b = Bytes.create maxprefixlen |> Bytes.to_seq |> List.of_seq in
let b1 = Bytes.create node16max |> Bytes.to_seq |> List.of_seq in
let inn = (
Prefix((List.map List.hd (char_list_to_byte_list b)), 0 , 0), (* Redundant *)
Node16 node16,
List.map List.hd (char_list_to_byte_list b1),
make_node_list node16max (Node16 node16))
in
inn
let new_node256 =
let b = Bytes.create maxprefixlen |> Bytes.to_seq |> List.of_seq in
let inn = (
Prefix((List.map List.hd (char_list_to_byte_list b)), 0 , 0), (* Redundant *)
Node256 node256, (* Keys can't be arbitrary *)
(* zero bytes.None is better *)
make_node_list node16max (Node256 node256))
in
inn
let trailing_zeros bitfield =
let rec count c =
if (Int32.logand (Int32.shift_right_logical bitfield c) (Int32.of_int 1 )) <> (Int32.of_int 1 ) then
count (c + 1)
else c
in count 0
let index n key =
match n with
| ( meta, node_type, keys, children ) ->
match node_type with
| node4 ->
match meta with
| Prefix (l, size , i2) ->
let rec loop_while j_dx=
if j_dx < size then(
if ((Bytes.get keys j_dx) == key) then
key
else
loop_while ( j_dx + 1 )
)
else(
(Char.chr 255)
)
in
loop_while 0
| node16 ->
match meta with
| Prefix (l, size , i2) ->
let mask = Int32.to_int (Int32.shift_left (Int32.of_int 1) size) - 1 in
let bitfield = Array1.create Int32 c_layout 1 in
bitfield.{0} <- Int32.of_int 0b00000000;
bitfield.{0} <- Int32.logand bitfield.{0} (Int32.of_int mask);
if (Int32.lognot bitfield.{0} ) == 0l then(
let rec loop_while j_dx=
if j_dx < size then(
if (Bytes.get keys j_dx) == key then
let bitfield = Int32.logor bitfield.{0} (Int32.shift_left 1l j_dx) in ()
else
loop_while ( j_dx + 1 )
)
in
loop_while 0;
if (Int32.lognot bitfield.{0} ) == 0l then(
Char.chr (trailing_zeros bitfield.{0})
)
else(
(Char.chr 255)
)
)
else
(Char.chr 255)
| node48 ->
let index = (Bytes.get keys (Char.code key)) in
if index > Char.chr 0 then
Char.chr (Char.code index - 1)
else
(Char.chr 255)
| node256 ->
key
let grow (n : inner_node ) =
match n with
| ( meta, node_type, keys, children ) ->
match node_type with
| node4 ->
let ( n_16meta, node_type, n_16keys, n_16children) = new_node16 in
(match meta with
| Prefix (l, i1, i2) ->
let rec loop_while j_dx=
let new_n_16keys = n_16keys in
if j_dx < i1 then(
let child = Array.get children j_dx in
match child with
| Empty -> loop_while ( j_dx + 1 )
| _ ->
let new_n_16keys = List.mapi (fun i el -> if i = j_dx then
(List.nth keys j_dx)
else el) keys in (* TODO Array is mutable and needed here*)
let c = Array.set n_16children j_dx child in
loop_while ( j_dx + 1 ) ;
) else(
(
n_16meta,
Node16 node16,
new_n_16keys,
n_16children)
)
in
loop_while 0
)
| node16 -> (*Create a Node48 and set values *)
let ( n_48meta, node_type, n_48keys, n_48children) = new_node48 in
match meta with
| Prefix (l, i1, i2) ->
let index = i1 in
let rec loop_while j_dx index =
if j_dx < i1 then(
let child = Array.get children j_dx in
match child with
| Empty -> loop_while ( j_dx + 1 ) ( index + 1 )
| _ ->
let n_48keys = List.mapi (fun i el -> if i = j_dx then
(List.nth keys (index + 1))
else el) keys in (* TODO Array is mutable and needed here*)
let c = Array.set n_48children index child in
loop_while ( j_dx + 1 ) ( index + 1 );
) else(
(
n_48meta,
Node48 node48,
n_48keys,
n_48children)
)
in
loop_while 0 index
let find_child key n = (* How do we check if n is None or Some ?*)
(* Is that check needed ? *)
let index = index n key in
match n with
| ( meta, node_type, keys, children ) ->
match node_type with
| node4, node16, node48 ->
if index >= (Char.chr 0) then
Array.get children (Char.code index); (* TODO Exception handler*)
| node256 ->
let child = Array.get children (Char.code key) (* TODO Exception handler*)
in child
let maxsize node_type =
match node_type with
| node4 -> node4max
| node16 -> node16max
| node48 -> node48max
| node256 -> node256max
let rec add_child key n =
let pp_ascii_bytes = Fmt.on_bytes (Fmt.ascii ()) in
let _ = Fmt.pr "ASCII representation (with Fmt.ascii): %a\n" pp_ascii_bytes key in
match n with
| ( meta, node_type, keys, children ) ->
let Prefix(l, size, len) = meta in
if size == maxsize node_type then(
let grow_n = grow n in
Printf.printf "Grow";
add_child key grow_n)
else (
match node_type with
| Node4 node4 ->
let ( n_4meta, node_type, n_4keys, n_4children) = n in
match meta with
| Prefix (l, i1, i2) ->
let idx =
let rec loop_while id_x=
if id_x < List.length keys && Bytes.compare key (List.nth keys id_x) >= 0 then
loop_while (id_x + 1)
else id_x
in
loop_while 0;
in
let rec loop_while i=
if Bytes.compare (List.nth keys (i - 1)) key > 0 then
let c = Array.get n_4children (i - 1) in
let _ = Array.set n_4children i c in
let n4_keys = List.mapi (fun j el -> if i = j then
List.nth keys (i - 1)
else el) keys in (* TODO Array is mutable and needed here*)
loop_while (i - 1)
in
loop_while (Array.length children);
let n4_keys =
if( idx < List.length keys ) then(
List.mapi (fun j el -> if j = idx then
key
else el) keys(* TODO Array is mutable and needed here*)
)
else(
keys
) in
let _ = if idx <= size && idx <
Array.length n_4children then
Array.set n_4children idx (Inner_node n) in
let _ = if idx <= size && idx <
Array.length n_4children then
Array.set n_4children idx (Inner_node n) in
(
Prefix (l, size + 1, i2), (* TODO Increment size of the parent and child properly*)
node_type,
n_4keys,
n_4children)
| node48 ->
let ( n_48meta, node_type, n_48keys, n_48children) = n in
match meta with
| Prefix (l, i1, i2) ->
let idx = 0 in
let rec loop_while idx=
if idx < Array.length children then
(*TODO How can I check if the element is a None or Some ?*)
(* It seems we add a new node if the entry is None. *)
loop_while (idx + 1);
in
loop_while idx;
let n_48keys = List.mapi (fun i el -> if i = (idx + 1) then
key
else el) keys in (* TODO Array is mutable and needed here*)
let _ = if idx <= size && idx <
Array.length n_48children then
Array.set n_48children idx (Inner_node n) in
(
Prefix (l, i1 + 1, i2),
node_type,
n_48keys,
n_48children)
)
end
module RADIXOp =
RADIX(struct
let has_next l=
true
end)
Test to view the tree
This is a convenient way to view the tree for debugging but not a testing procedure.
open Bitcask__Adaptive_radix_tree.RADIXOp
let%expect_test _=
let n = new_node4 in
match n with
| ( _, _, _, children ) ->
match children with
| node ->
Array.iter (fun n -> Printf.printf "%s" (Format.asprintf "%a" pp_node n)) node;
[%expect {|
(Adaptive_radix_tree.RADIX.Inner_node
((Adaptive_radix_tree.RADIX.Prefix (
["\000"; "\000"; "\000"; "\000"; "\000"; "\000"; "\000"; "\000";
"\000"; "\000"],
0, 0)),
(Adaptive_radix_tree.RADIX.Node4 0), ["\000"; "\000"; "\000"; "\000"],
Adaptive_radix_tree.RADIX.Empty, Adaptive_radix_tree.RADIX.Empty,
Adaptive_radix_tree.RADIX.Empty, Adaptive_radix_tree.RADIX.Empty))(Adaptive_radix_tree.RADIX.Inner_node
((Adaptive_radix_tree.RADIX.Prefix (
["\000"; "\000"; "\000"; "\000"; "\000"; "\000"; "\000"; "\000";
"\000"; "\000"],
0, 0)),
(Adaptive_radix_tree.RADIX.Node4 0), ["\000"; "\000"; "\000"; "\000"],
Adaptive_radix_tree.RADIX.Empty, Adaptive_radix_tree.RADIX.Empty,
Adaptive_radix_tree.RADIX.Empty, Adaptive_radix_tree.RADIX.Empty))(Adaptive_radix_tree.RADIX.Inner_node
((Adaptive_radix_tree.RADIX.Prefix (
["\000"; "\000"; "\000"; "\000"; "\000"; "\000"; "\000"; "\000";
"\000"; "\000"],
0, 0)),
(Adaptive_radix_tree.RADIX.Node4 0), ["\000"; "\000"; "\000"; "\000"],
Adaptive_radix_tree.RADIX.Empty, Adaptive_radix_tree.RADIX.Empty,
Adaptive_radix_tree.RADIX.Empty, Adaptive_radix_tree.RADIX.Empty))(Adaptive_radix_tree.RADIX.Inner_node
((Adaptive_radix_tree.RADIX.Prefix (
["\000"; "\000"; "\000"; "\000"; "\000"; "\000"; "\000"; "\000";
"\000"; "\000"],
0, 0)),
(Adaptive_radix_tree.RADIX.Node4 0), ["\000"; "\000"; "\000"; "\000"],
Adaptive_radix_tree.RADIX.Empty, Adaptive_radix_tree.RADIX.Empty,
Adaptive_radix_tree.RADIX.Empty, Adaptive_radix_tree.RADIX.Empty))
|}]
let%expect_test _= (* Add 4 children to Node type 4*)
let n = new_node4 in
let new_node =
let rec loop_while node (idx : int) (key : bytes) =
if idx < 4 then(
let k = Bytes.get key 0 in
let k_incr = Char.chr (Char.code k + 1) in (* increment the char *)
let key = Bytes.make 1 k_incr in
let newer_node = add_child key node in
loop_while newer_node (idx + 1) key;
)else node
in
loop_while n 0 (Bytes.make 1 (Char.chr 48));
in
match new_node with
| ( Prefix(_, new_size, _), _, _, children ) ->
Printf.printf "Size is %d\n" new_size;
Printf.printf "Size of children %d\n" (Array.length children);
Array.iter (fun n -> Printf.printf "%s" (Format.asprintf "%a" pp_node n)) children;
[%expect {|
ASCII representation (with Fmt.ascii): 1
ASCII representation (with Fmt.ascii): 2
ASCII representation (with Fmt.ascii): 3
ASCII representation (with Fmt.ascii): 4
Size is 4
Size of children 4
(Adaptive_radix_tree.RADIX.Inner_node
((Adaptive_radix_tree.RADIX.Prefix (
["\000"; "\000"; "\000"; "\000"; "\000"; "\000"; "\000"; "\000";
"\000"; "\000"],
0, 0)),
(Adaptive_radix_tree.RADIX.Node4 0), ["\000"; "\000"; "\000"; "\000"],
Adaptive_radix_tree.RADIX.Empty, Adaptive_radix_tree.RADIX.Empty,
Adaptive_radix_tree.RADIX.Empty, Adaptive_radix_tree.RADIX.Empty))(Adaptive_radix_tree.RADIX.Inner_node
((Adaptive_radix_tree.RADIX.Prefix (
["\000"; "\000"; "\000"; "\000"; "\000"; "\000"; "\000"; "\000";
"\000"; "\000"],
0, 0)),
(Adaptive_radix_tree.RADIX.Node4 0), ["\000"; "\000"; "\000"; "\000"],
Adaptive_radix_tree.RADIX.Empty, Adaptive_radix_tree.RADIX.Empty,
Adaptive_radix_tree.RADIX.Empty, Adaptive_radix_tree.RADIX.Empty))(Adaptive_radix_tree.RADIX.Inner_node
((Adaptive_radix_tree.RADIX.Prefix (
["\000"; "\000"; "\000"; "\000"; "\000"; "\000"; "\000"; "\000";
"\000"; "\000"],
0, 0)),
(Adaptive_radix_tree.RADIX.Node4 0), ["\000"; "\000"; "\000"; "\000"],
Adaptive_radix_tree.RADIX.Empty, Adaptive_radix_tree.RADIX.Empty,
Adaptive_radix_tree.RADIX.Empty, Adaptive_radix_tree.RADIX.Empty))(Adaptive_radix_tree.RADIX.Inner_node
((Adaptive_radix_tree.RADIX.Prefix (
["\000"; "\000"; "\000"; "\000"; "\000"; "\000"; "\000"; "\000";
"\000"; "\000"],
0, 0)),
(Adaptive_radix_tree.RADIX.Node4 0), ["\000"; "\000"; "\000"; "\000"],
Adaptive_radix_tree.RADIX.Empty, Adaptive_radix_tree.RADIX.Empty,
Adaptive_radix_tree.RADIX.Empty, Adaptive_radix_tree.RADIX.Empty))
|}]
I would add this section of code to the list of inefficiences but I am not sure if I am using the correct API. The following code increments a bytes type by 1. This is only tested lightly but the keys are updated.
if idx < 4 then(
let k = Bytes.get key 0 in
let k_incr = Char.chr (Char.code k + 1) in (* increment the char *)
let key = Bytes.make 1 k_incr in
let newer_node = add_child key node in
loop_while newer_node (idx + 1) key;
)else node
in
loop_while n 0 (Bytes.make 1 (Char.chr 48));
Written on June 6, 2025