Bitcask

open Base
open Checkseum
open Stdlib


type entry = {
    mutable checksum : int32;
	key      : Bytes.t list;
	value    : Bytes.t list
}

let entry key (value : Bytes.t list ) =
    let rec bytes_concat final_bytes v =
      match v with
      | [] -> final_bytes
      | hd :: tl -> bytes_concat (Bytes.cat final_bytes hd) tl
    in
    let final_bytes = bytes_concat Bytes.empty value
    in

    let new_entry = {
        checksum = Crc32.to_int32 (Crc32.digest_bytes final_bytes 0 (Bytes.length final_bytes) Crc32.default)  ;
		key = key;
		value = value;
	}
    in
    new_entry

Adaptive Radix Tree

tl;dr

  1. The code will be gradually improved and be committed to git finally. The version on this page is compiled but unfinished until I mention it here.
  2. Performance considerations are not paramount here. But building lists like this again and again instread of using Array is not at all recommended.
     let n_4keys = List.mapi (fun j el -> if i = j then
                                   List.nth keys (i - 1)
                                  else el) keys in (* TODO Array is mutable and needed here*)
   
  1. The algorithm seems complicated and nodes can be added wrongly. But I am trying to test is as thoroughly as possible.
  2. My OCaml code is still improving. Loops I code seem to distract from the underlying logic. It should be more functional with proper comments.
open Batteries
open Bigarray
module type Iterator =
sig
  val has_next: 'n list -> bool
  (* val next : 'n *)

end


 module RADIX ( Iter : Iterator ) = struct
    type meta =
    | Prefix of (Bytes.t list)  * int * int
    [@@deriving show]
    and
	leaf_node =
		Key of Bytes.t list
    and
	inner_node =
		meta *
		node_type *
		Bytes.t list *
		children
    and
    node_type =
      Node4 of int
    | Node16 of int
    | Node48 of int
    | Node256 of int
    | Leaf of int
    and
	node =
	  | Inner_node of inner_node
	  | Leaf of leaf_node
      | Empty
    and
	level =
		Node of  node *
		int
    and
	iterator =
		Tree of  tree *
		node *
		int *
		level list
    and
	tree =
		Root of node
    and
    children = node CCArray.t
      (* [@printer *)
      (*   fun fmt arr -> fprintf fmt "%a" (CCArray.pp pp_node) arr] *)
    [@@deriving show] (* only one call to `deriving show` is not enough *)


	let node4 = 0
	let  node16 = 1
	let  node48 = 2
	let  node256 = 3
	let leaf = 44

	let  node4min = 2
	let  node4max = 4

	let  node16min = node4max + 1
	let  node16max = 16

	let  node48min = node16max + 1
	let node48max = 48

	let node256min = node48max + 1
	let node256max = 256

	let maxprefixlen = 10

	let nullIdx = -1
    type keyfield = (int32, int32_elt, c_layout) Array1.t

let char_list_to_byte_list cl =
    let bl  = [] in
    List.map ( fun c ->   bl @ [(Bytes.init 1 ( fun _ -> c )  )]) cl

let make_node_list nodemax node_type =
  let rec loop_while node_list j_dx =
      if j_dx < nodemax then(
		let b = Bytes.create maxprefixlen |> Bytes.to_seq |> List.of_seq in
		let b1 = Bytes.create node4max |> Bytes.to_seq |> List.of_seq in
	    let inn = (
		 Prefix((List.map List.hd (char_list_to_byte_list b)), 0 , 0), (*  Redundant *)
		 node_type,
         List.map List.hd (char_list_to_byte_list b1),
         CCArray.make nodemax Empty
		 ) in
		loop_while (node_list @ [Inner_node inn]) (j_dx + 1 );
      ) else CCArray.of_list node_list
  in
  loop_while [] 0

let new_node4 =

	let b = Bytes.create maxprefixlen |> Bytes.to_seq |> List.of_seq in
	let b1 = Bytes.create node4max |> Bytes.to_seq |> List.of_seq in
	let inn = (
		 Prefix ((List.map List.hd (char_list_to_byte_list b)), 0 , 0), (*  Redundant *)
		 Node4 node4,
         List.map List.hd (char_list_to_byte_list b1),
		 make_node_list node4max (Node4 node4))
	in
	inn

let new_node16 : inner_node =
	let b = Bytes.create maxprefixlen |> Bytes.to_seq |> List.of_seq in
	let b1 = Bytes.create node16max |> Bytes.to_seq |> List.of_seq in
	let inn = (
		 Prefix((List.map List.hd (char_list_to_byte_list b)), 0 , 0), (*  Redundant *)
		 Node16 node16,
         List.map List.hd (char_list_to_byte_list b1),
		 make_node_list node16max (Node16 node16))
	in
	inn

let new_node48 : inner_node =
	let b = Bytes.create maxprefixlen |> Bytes.to_seq |> List.of_seq in
	let b1 = Bytes.create node16max |> Bytes.to_seq |> List.of_seq in
	let inn = (
		 Prefix((List.map List.hd (char_list_to_byte_list b)), 0 , 0), (*  Redundant *)
		 Node16 node16,
         List.map List.hd (char_list_to_byte_list b1),
		 make_node_list node16max (Node16 node16))
	in
	inn

let new_node256 =
	let b = Bytes.create maxprefixlen |> Bytes.to_seq |> List.of_seq in
	let inn = (
		Prefix((List.map List.hd (char_list_to_byte_list b)), 0 , 0), (*  Redundant *)
		Node256 node256,                                              (* Keys can't be arbitrary *)
                                                                   (* zero bytes.None is better *)
		make_node_list node16max (Node256 node256))
	in
	inn

let trailing_zeros bitfield =

  let rec count c =
    if (Int32.logand (Int32.shift_right_logical bitfield c) (Int32.of_int 1 )) <> (Int32.of_int 1 ) then
      count (c + 1)
    else c
  in count 0

let index n key =
	match n with
	  | ( meta, node_type, keys, children ) ->
	   match node_type with
       | node4 ->
         match meta with
          | Prefix (l, size , i2) ->
            let rec loop_while j_dx=
             if j_dx < size   then(
			  if ((Bytes.get keys j_dx) == key) then
                 key
              else
                loop_while ( j_dx + 1 )
             )
			 else(
		        (Char.chr 255)
             )
            in
            loop_while 0


      | node16 ->
         match meta with
          | Prefix (l, size , i2) ->
		    let mask =  Int32.to_int (Int32.shift_left (Int32.of_int 1) size) - 1 in
            let bitfield = Array1.create Int32 c_layout 1 in
            bitfield.{0} <- Int32.of_int 0b00000000;
		    bitfield.{0} <- Int32.logand  bitfield.{0} (Int32.of_int  mask);
		    if (Int32.lognot bitfield.{0} ) == 0l then(
            let rec loop_while j_dx=
             if j_dx < size   then(
			  if (Bytes.get keys j_dx) == key then
				let bitfield  =  Int32.logor bitfield.{0}  (Int32.shift_left 1l j_dx) in ()
              else
                loop_while ( j_dx + 1 )
             )
            in
            loop_while 0;

		   if (Int32.lognot bitfield.{0} ) == 0l then(
			Char.chr (trailing_zeros bitfield.{0})
           )
		   else(
		        (Char.chr 255)
           )
          )
		   else
		        (Char.chr 255)

        
      | node48 ->
		let index = (Bytes.get keys (Char.code key)) in
		if index > Char.chr 0 then
			Char.chr (Char.code index - 1)
		else
		    (Char.chr 255)

      | node256 ->
		key

let grow (n : inner_node ) =
	match n with
	  | ( meta, node_type, keys, children ) ->
	   match node_type with
       | node4 ->
         let (  n_16meta, node_type,  n_16keys,  n_16children) = new_node16 in
           (match meta with
            | Prefix (l, i1, i2) ->
            let rec loop_while j_dx=
            let new_n_16keys =  n_16keys in
            if j_dx < i1  then(
			 let child = Array.get children j_dx in
			 match child with
               | Empty -> loop_while ( j_dx + 1 )
               | _ ->
                let new_n_16keys = List.mapi (fun i el -> if i = j_dx then
                                         (List.nth keys j_dx)
                                         else el) keys in (* TODO Array is mutable and needed here*)
                let c = Array.set n_16children j_dx child in
                loop_while ( j_dx + 1 ) ;
             ) else(
	           (
		           n_16meta,
		           Node16 node16,
                   new_n_16keys,
                   n_16children)
            )
            in
            loop_while 0
            )
	   | node16 ->     (*Create a Node48 and set values  *)
         let (  n_48meta, node_type,  n_48keys,  n_48children) = new_node48 in
         match meta with
          | Prefix (l, i1, i2) ->
           let index = i1 in
           let rec loop_while j_dx index =
            if j_dx < i1  then(
			 let child = Array.get children j_dx in
			 match child with
               | Empty -> loop_while ( j_dx + 1 ) ( index + 1 )
               | _ ->
                let n_48keys = List.mapi (fun i el -> if i = j_dx then
                                         (List.nth keys (index + 1))
                                         else el) keys in (* TODO Array is mutable and needed here*)
                let c = Array.set n_48children index child in
                loop_while ( j_dx + 1 ) ( index + 1 );
           ) else(
	             (
		         n_48meta,
		         Node48 node48,
                 n_48keys,
                 n_48children)
           )
          in
          loop_while 0 index

let find_child key n =          (*  How do we check if n is None or Some ?*)
                                (* Is that check needed ? *)

   let index = index n key in

   match n with
	  | ( meta, node_type, keys, children ) ->
        match node_type with
        | node4, node16, node48 ->
		if index >= (Char.chr 0) then
			Array.get children (Char.code index); (*  TODO Exception handler*)

	    | node256 ->
		let child = Array.get children (Char.code key) (*  TODO Exception handler*)
		in child

let maxsize node_type =
	 match node_type with
      | node4 -> node4max
      | node16 -> node16max
      | node48 -> node48max
      | node256 -> node256max

let rec add_child key n =

    let pp_ascii_bytes = Fmt.on_bytes (Fmt.ascii ()) in
    let _ = Fmt.pr "ASCII representation (with Fmt.ascii): %a\n" pp_ascii_bytes  key in
	match n with
	  | ( meta, node_type, keys, children ) ->
          let Prefix(l, size, len) = meta in
          if size == maxsize node_type then(
            let grow_n = grow n in
            Printf.printf "Grow";
            add_child key grow_n)
          else (
	   match node_type with
      | Node4 node4 ->
        let (  n_4meta, node_type,  n_4keys,  n_4children) = n in
        match meta with
          | Prefix (l, i1, i2) ->
        let idx =
            let rec loop_while id_x=
            if id_x < List.length keys && Bytes.compare key (List.nth keys id_x) >= 0 then
                loop_while (id_x + 1)
              else  id_x
            in
            loop_while 0;
            in

            let rec loop_while i=


              if Bytes.compare (List.nth keys (i - 1)) key  > 0 then
                let c = Array.get n_4children (i - 1) in
                let _ = Array.set n_4children i c in
                let n4_keys = List.mapi (fun j el -> if i = j then
                                   List.nth keys (i - 1)
                                  else el) keys in (* TODO Array is mutable and needed here*)
             loop_while (i - 1)
            in
            loop_while (Array.length children);
                let n4_keys =
                if( idx < List.length keys ) then(
                 List.mapi (fun j el -> if j = idx then
                                        key
                                        else el) keys(* TODO Array is mutable and needed here*)
                )
                else(
                   keys
                 ) in
                let _ = if idx <= size && idx <
                    Array.length n_4children then
                    Array.set n_4children idx (Inner_node n) in
                let _ = if idx <= size && idx <
                    Array.length n_4children then
                    Array.set n_4children idx (Inner_node n) in
	             (
                 Prefix (l, size + 1, i2), (* TODO Increment size of the parent and child properly*)
		         node_type,
                 n_4keys,
                 n_4children)

      | node48 ->
        let (  n_48meta, node_type,  n_48keys,  n_48children) = n in
        match meta with
          | Prefix (l, i1, i2) ->
		    let idx = 0 in
            let rec loop_while idx=
            if idx < Array.length children then
             (*TODO  How can I check if the element is a None or Some ?*)
              (* It seems we add a new node if the entry is None. *)
             loop_while (idx + 1);
            in
            loop_while idx;
            let n_48keys = List.mapi (fun i el -> if i = (idx + 1) then
                                   key
                                  else el) keys in (* TODO Array is mutable and needed here*)
                let _ = if idx <= size && idx <
                    Array.length n_48children then
                    Array.set n_48children idx (Inner_node n) in
	             (
                 Prefix (l, i1 + 1, i2),
		         node_type,
                 n_48keys,
                 n_48children)
           )

end

module RADIXOp =
RADIX(struct

  let  has_next l=
     true


 end)

Test to view the tree

This is a convenient way to view the tree for debugging but not a testing procedure.

open Bitcask__Adaptive_radix_tree.RADIXOp

let%expect_test _=
  let n = new_node4 in
	match n with
	  | ( _, _, _, children ) ->
         match children with
          |  node ->
            Array.iter (fun n -> Printf.printf "%s" (Format.asprintf "%a" pp_node n)) node;
  [%expect {|
    (Adaptive_radix_tree.RADIX.Inner_node
       ((Adaptive_radix_tree.RADIX.Prefix (
           ["\000"; "\000"; "\000"; "\000"; "\000"; "\000"; "\000"; "\000";
             "\000"; "\000"],
           0, 0)),
        (Adaptive_radix_tree.RADIX.Node4 0), ["\000"; "\000"; "\000"; "\000"],
        Adaptive_radix_tree.RADIX.Empty, Adaptive_radix_tree.RADIX.Empty,
        Adaptive_radix_tree.RADIX.Empty, Adaptive_radix_tree.RADIX.Empty))(Adaptive_radix_tree.RADIX.Inner_node
       ((Adaptive_radix_tree.RADIX.Prefix (
           ["\000"; "\000"; "\000"; "\000"; "\000"; "\000"; "\000"; "\000";
             "\000"; "\000"],
           0, 0)),
        (Adaptive_radix_tree.RADIX.Node4 0), ["\000"; "\000"; "\000"; "\000"],
        Adaptive_radix_tree.RADIX.Empty, Adaptive_radix_tree.RADIX.Empty,
        Adaptive_radix_tree.RADIX.Empty, Adaptive_radix_tree.RADIX.Empty))(Adaptive_radix_tree.RADIX.Inner_node
       ((Adaptive_radix_tree.RADIX.Prefix (
           ["\000"; "\000"; "\000"; "\000"; "\000"; "\000"; "\000"; "\000";
             "\000"; "\000"],
           0, 0)),
        (Adaptive_radix_tree.RADIX.Node4 0), ["\000"; "\000"; "\000"; "\000"],
        Adaptive_radix_tree.RADIX.Empty, Adaptive_radix_tree.RADIX.Empty,
        Adaptive_radix_tree.RADIX.Empty, Adaptive_radix_tree.RADIX.Empty))(Adaptive_radix_tree.RADIX.Inner_node
       ((Adaptive_radix_tree.RADIX.Prefix (
           ["\000"; "\000"; "\000"; "\000"; "\000"; "\000"; "\000"; "\000";
             "\000"; "\000"],
           0, 0)),
        (Adaptive_radix_tree.RADIX.Node4 0), ["\000"; "\000"; "\000"; "\000"],
        Adaptive_radix_tree.RADIX.Empty, Adaptive_radix_tree.RADIX.Empty,
        Adaptive_radix_tree.RADIX.Empty, Adaptive_radix_tree.RADIX.Empty))
    |}]


let%expect_test _=  (* Add 4 children to Node type 4*)
  let n = new_node4 in
  let new_node =
            let rec loop_while node (idx : int) (key : bytes) =

            if idx < 4 then(
                    let k = Bytes.get key 0 in
                    let k_incr = Char.chr (Char.code k + 1) in  (* increment the char *)
                    let key = Bytes.make 1 k_incr in
		            let newer_node = add_child  key node in
                    loop_while newer_node (idx + 1) key;
            )else node
            in
            loop_while n 0 (Bytes.make 1  (Char.chr 48));
    in
	match new_node with
	     | (  Prefix(_, new_size, _), _, _, children ) ->
           Printf.printf "Size is %d\n" new_size;
           Printf.printf "Size of children %d\n" (Array.length children);

   Array.iter (fun n -> Printf.printf "%s" (Format.asprintf "%a" pp_node n)) children;
   [%expect {|
     ASCII representation (with Fmt.ascii): 1
     ASCII representation (with Fmt.ascii): 2
     ASCII representation (with Fmt.ascii): 3
     ASCII representation (with Fmt.ascii): 4
     Size is 4
     Size of children 4
     (Adaptive_radix_tree.RADIX.Inner_node
        ((Adaptive_radix_tree.RADIX.Prefix (
            ["\000"; "\000"; "\000"; "\000"; "\000"; "\000"; "\000"; "\000";
              "\000"; "\000"],
            0, 0)),
         (Adaptive_radix_tree.RADIX.Node4 0), ["\000"; "\000"; "\000"; "\000"],
         Adaptive_radix_tree.RADIX.Empty, Adaptive_radix_tree.RADIX.Empty,
         Adaptive_radix_tree.RADIX.Empty, Adaptive_radix_tree.RADIX.Empty))(Adaptive_radix_tree.RADIX.Inner_node
        ((Adaptive_radix_tree.RADIX.Prefix (
            ["\000"; "\000"; "\000"; "\000"; "\000"; "\000"; "\000"; "\000";
              "\000"; "\000"],
            0, 0)),
         (Adaptive_radix_tree.RADIX.Node4 0), ["\000"; "\000"; "\000"; "\000"],
         Adaptive_radix_tree.RADIX.Empty, Adaptive_radix_tree.RADIX.Empty,
         Adaptive_radix_tree.RADIX.Empty, Adaptive_radix_tree.RADIX.Empty))(Adaptive_radix_tree.RADIX.Inner_node
        ((Adaptive_radix_tree.RADIX.Prefix (
            ["\000"; "\000"; "\000"; "\000"; "\000"; "\000"; "\000"; "\000";
              "\000"; "\000"],
            0, 0)),
         (Adaptive_radix_tree.RADIX.Node4 0), ["\000"; "\000"; "\000"; "\000"],
         Adaptive_radix_tree.RADIX.Empty, Adaptive_radix_tree.RADIX.Empty,
         Adaptive_radix_tree.RADIX.Empty, Adaptive_radix_tree.RADIX.Empty))(Adaptive_radix_tree.RADIX.Inner_node
        ((Adaptive_radix_tree.RADIX.Prefix (
            ["\000"; "\000"; "\000"; "\000"; "\000"; "\000"; "\000"; "\000";
              "\000"; "\000"],
            0, 0)),
         (Adaptive_radix_tree.RADIX.Node4 0), ["\000"; "\000"; "\000"; "\000"],
         Adaptive_radix_tree.RADIX.Empty, Adaptive_radix_tree.RADIX.Empty,
         Adaptive_radix_tree.RADIX.Empty, Adaptive_radix_tree.RADIX.Empty))
     |}]

I would add this section of code to the list of inefficiences but I am not sure if I am using the correct API. The following code increments a bytes type by 1. This is only tested lightly but the keys are updated.

            if idx < 4 then(
                    let k = Bytes.get key 0 in
                    let k_incr = Char.chr (Char.code k + 1) in  (* increment the char *)
                    let key = Bytes.make 1 k_incr in
		            let newer_node = add_child  key node in
                    loop_while newer_node (idx + 1) key;
            )else node
            in
            loop_while n 0 (Bytes.make 1  (Char.chr 48));
Written on June 6, 2025