1 open bucket
  2 (* TEMPORARY ultimately, might wish not to open bucket *)
  3 
  4 (* -------------------------------------------------------------------------- *)
  5 
  6 (* The definition of a hash table. *)
  7 
  8 mutable data table k a =
  9   Table {
 10     (* The table's current number of elements. *)
 11     cardinal: int;
 12     (* The bucket array, whose length is always a power of 2. *)
 13     buckets: array (bucket k a);
 14     (* The hash function. *)
 15     hash: k -> int;
 16     (* The equality function. *)
 17     equal: (k, k) -> bool
 18   }
 19 
 20 (* -------------------------------------------------------------------------- *)
 21 
 22 (* Creation. *)
 23 
 24 (* [create_buckets] rounds up its [capacity] argument to the nearest power of
 25    2 above [n], then allocates a new bucket array. *)
 26 
 27 val create_buckets [k, a] (capacity: int) : array (bucket k a) =
 28   array::init (Up, array::above (16, capacity), fun (i: int) : bucket k a = BNil)
 29 
 30 (* Creation. *)
 31 
 32 val create [k, a] (capacity: int, hash: k -> int, equal: (k, k) -> bool) : table k a =
 33   Table {
 34     cardinal = 0;
 35     buckets = create_buckets capacity;
 36     hash = hash;
 37     equal = equal
 38   }
 39 
 40 (* -------------------------------------------------------------------------- *)
 41 
 42 (* Internal functions. *)
 43 
 44 (* [key_index] computes the array index that corresponds to the key [x]. *)
 45 
 46 (* The type assigned to [key_index] is much more complex than I would like:
 47    we have been forced to expand the permission [t @ table k a] and assign
 48    the name [buckets] to the value contained in the [buckets] field of [t].
 49    This allows us to express the fact that [t.buckets] is *not* modified by
 50    [key_index]. This fact, in turn, is required in order to type-check some
 51    clients. If we wrote just [t @ table k a], we would know that [t] remains
 52    a table, but we would not know that [t.buckets] is unmodified, so a client
 53    that reads [t.buckets] *before* invoking [key_index] and uses this value
 54    *after* invoking [key_index] would be ill-typed. *)
 55 
 56 (* TEMPORARY a "const" permission would be useful here *)
 57 
 58 val key_index [k, a, buckets : term] (
 59   t: Table { cardinal: int; buckets = buckets; hash: k -> int; equal: (k, k) -> bool },
 60   x: k
 61   | buckets @ array (bucket k a)
 62 ) : int =
 63   t.hash x & (array::length t.buckets - 1)
 64 
 65 (* [push] inserts a detached entry, which contains a valid key-value pair,
 66    into the table [t]. *)
 67 
 68 val push [k, a] (
 69   t: table k a,
 70   consumes entry: BCons { key: k; value: a; tail: unknown }
 71 ) : () =
 72   let i = key_index (t, entry.key) in
 73   array::update (t.buckets, i,
 74     fun (consumes b: bucket k a | consumes entry @ BCons { key: k; value: a; tail: unknown }) : bucket k a =
 75       entry.tail <- b;
 76       entry
 77   )
 78 
 79 (* [push_bucket t (_, b)] inserts the whole bucket [b] (that is, a list of
 80    entries), which contain valid key-value pairs, into the table [t]. For
 81    convenience, this function is curried. *)
 82 
 83 (* TEMPORARY a more lightweight/intuitive syntax for currying would be welcome! *)
 84 
 85 val push_bucket [k, a]
 86   (t: unknown) :
 87   (int, consumes bucket k a | t @ table k a) -> () =
 88   fun (_: int, consumes b: bucket k a | t @ table k a) : () =
 89     iter_bucket_down (b, fun (consumes entry: BCons { key: k; value: a; tail: unknown } | t @ table k a) : () =
 90       push (t, entry)
 91     )
 92 
 93 (* [resize] doubles the size of the hash table. A new buckets array is allocated.
 94    The existing entries are re-used: they are not re-allocated. *)
 95 
 96 val resize [k, a] (t: table k a) : () =
 97   let old_buckets = t.buckets in
 98   let new_size = array::length old_buckets * 2 in
 99   if new_size < array::max_length then begin
100     (* Update [t.buckets] immediately, so that [key_index] sees the new
101        bucket count. *)
102     t.buckets <- array::init (Up, new_size, fun (i: int) : bucket k a = BNil);
103     (* The array [old_buckets] is consumed by the following loop. This allows
104        us to grab the entries that we find in it and move them (without copy)
105        to the new array [t.buckets]. *)
106     array::transform (Up, old_buckets, push_bucket t)
107   end
108 
109 (* -------------------------------------------------------------------------- *)
110 
111 (* Merging one table into another. *)
112 
113 val merge [k, a] (consumes t1: table k a, t2: table k a) : () =
114   (* Update the cardinal of [t2]. *)
115   t2.cardinal <- t2.cardinal + t1.cardinal;
116   (* Add the entries of [t1] are added into [t2], without any memory
117      allocation. *)
118   array::transform (Up, t1.buckets, push_bucket t2);
119   (* Now, [t2] may need to be resized, possibly multiple times. It
120      would be preferable to resize [t2] just once, and to do it up
121      front, before transferring [t1] into [t2]. TEMPORARY *)
122   let rec loop (| t2 @ table k a) : () =
123     if t2.cardinal > 2 * array::length t2.buckets then begin
124       resize t2;
125       loop()
126     end
127   in
128   loop()
129 
130 (* TEMPORARY we could return a permission for t1 as an empty table. *)
131 
132 (* -------------------------------------------------------------------------- *)
133 
134 (* Clearing. *)
135 
136 val clear [k, a] (t: table k a) : () =
137   t.cardinal <- 0;
138   array::transform (Up, t.buckets, fun (i: int, _: bucket k a) : bucket k a = BNil)
139 
140 val reset [k, a] (t: table k a, capacity: int) : () =
141   t.cardinal <- 0;
142   t.buckets <- create_buckets capacity
143 
144 (* -------------------------------------------------------------------------- *)
145 
146 (* Insertion. *)
147 
148 val add [k, a] (consumes x: k, consumes v: a, t: table k a) : () =
149   push (t, BCons { key = x; value = v; tail = () });
150   t.cardinal <- t.cardinal + 1;
151   if t.cardinal > 2 * array::length t.buckets then
152     resize t
153 
154 (* -------------------------------------------------------------------------- *)
155 
156 (* Removal. *)
157 
158 (* In an earlier version of this code, instead of using an auxiliary reference
159    cell [r], we updated [t.cardinal] directly inside the function
160    [ok]. Type-checking this version of the code was more tricky, because [ok]
161    needed read/write access to [t.cardinal] at a moment when the ownership of
162    [t.buckets] had been taken away from [t] by [array::update]. This forced
163    [ok] to request (and preserve) a precise description of [t], so that the
164    type system could tell that [ok] did not modify the field [t.buckets]. *)
165 
166 val remove [k, a] (x: k, t: table k a) : option a =
167 
168   let equal = t.equal in
169 
170   (* Define an [ok] predicate that can be passed to [bucket::remove]. *)
171   let ok (key: k | x @ k) : bool =
172     equal (key, x)
173   in
174 
175   (* Allocate a reference to store the second result of [bucket::remove]. *)
176   let r = newref none in
177 
178   (* Partially apply [bucket::remove] to [ok]. Arrange for the second
179      result of [bucket::remove] to be stored in [r], so [remove] returns
180      just one result and is a suitable argument for [array::update]. *)
181   let remove (consumes b: bucket k a | x @ k * r @ ref (option a)) : bucket k a =
182     let b, v = bucket::remove (b, ok) in
183     r := v;
184     b
185   in
186 
187   (* Apply [remove] at the desired array index. *)
188   array::update
189     [post = (x @ k * r @ ref (option a))] (* WISH get rid of this *)
190     (t.buckets, key_index (t, x), remove);
191 
192   (* If the desired entry has been found and removed, update the table's
193      cardinal. Then, return the value that was removed. *)
194   if !r then
195     t.cardinal <- t.cardinal - 1;
196   !r
197 
198 (* TEMPORARY we need also [remove_all]? does ocaml offer it? *)
199 
200 (* -------------------------------------------------------------------------- *)
201 
202 (* Lookup. *)
203 
204 val mem [k, a] (x: k, t: table k a) : bool =
205   let equal = t.equal in
206   array::consult
207     [b = (bool | x @ k)]
208     (* WISH could we get rid of this type application? *)
209     (t.buckets, key_index (t, x), fun (b: bucket k a | x @ k) : bool =
210       bucket::mem (equal, x, b)
211     )
212 
213 (* We first define an internal version of [find] which locates the appropriate
214    bucket and passes it to a function [f]. *)
215 
216 val internal_find [k, a, answer] duplicable a => (
217   f: (bucket k a | x @ k) -> answer,
218   x: k,
219   t: table k a
220 ) : answer =
221   (* We use [array::consult] instead of [array::get] because [k] is not
222      duplicable. *)
223   array::consult
224     [pre = (x @ k), b = (answer | x @ k)] (* WISH get rid of this *)
225     (t.buckets, key_index (t, x), f)
226 
227 (* By instantiating [f] with a function that stops at the first match,
228    we obtain [find]. *)
229 
230 val find [k, a] duplicable a => (x: k, t: table k a) : option a =
231   let equal = t.equal in
232   let f (b: bucket k a | x @ k) : option a =
233     bucket::assoc (equal, x, b)
234   in
235   internal_find (f, x, t)
236 
237 (* By instantiating [f] with a function that searches for all matches,
238    we obtain [find_all]. *)
239 
240 val find_all [k, a] duplicable a => (x: k, t: table k a) : list::list a =
241   let equal = t.equal in
242   let f (b: bucket k a | x @ k) : list::list a =
243     bucket::assoc_all (equal, x, b)
244   in
245   internal_find (f, x, t)
246 
247 (* -------------------------------------------------------------------------- *)
248 
249 (* Update. *)
250 
251 (* [update (t, x, f)] looks for the key [x] in the table [t]. It calls the
252    user-supplied function [f] exactly once, and passes it either the value [v]
253    that is associated with the key [x], or nothing, if the key [x] does not
254    appear in the table. The function [f] returns either a new value, or
255    nothing. In the former case, the new value replaces the value [v]. In the
256    latter case, the key [x] is removed (if it was there at all). *)
257 
258 val option_cardinal [a] (o: option a) : int =
259   match o with
260   | None -> 0
261   | Some -> 1
262   end
263 
264 val update [k, a, pre : perm, post : perm] (
265   t: table k a,
266   consumes x: k,
267   f: (consumes (option a | pre)) -> (option a | post)
268   | consumes pre
269 ) : (| post) =
270 
271   (* Wrap [f] so as to record the increase or decrease in the table's cardinal. *)
272   let delta = newref () in
273   let f (consumes ov: option a | consumes (pre * delta @ ref ())) :
274         (option a | post * delta @ ref int) =
275     let decrease = option_cardinal ov in
276     let ov = f ov in
277     let increase = option_cardinal ov in
278     delta := increase - decrease;
279     ov
280   in
281 
282   (* Adapt [bucket::update]. *)
283   let equal = t.equal in
284   let update (consumes b: bucket k a | consumes (x @ k * pre * delta @ ref ())) : (bucket k a | post * delta @ ref int) =
285     bucket::update [post = (post * delta @ ref int)] (equal, b, x, f)
286     (* WISH could we get rid of this type application? without it, I get an error *)
287   in
288 
289   (* Call [array::update]. *)
290   array::update [post = (post * delta @ ref int)]
291     (t.buckets, key_index (t, x), update);
292     (* WISH could we get rid of this type application?
293        without it, the call to array::update succeeds
294        but we lose the permission for [delta] and the cardinal update (below) fails *)
295 
296   (* Update the table's cardinal. *)
297   t.cardinal <- t.cardinal + !delta
298 
299 (* -------------------------------------------------------------------------- *)
300 
301 (* Iteration. *)
302 
303 (* For simplicity, we assign [fold] a type that does not allow performing a
304    strong update on the keys or values. If we used a primitive operation on
305    arrays that allows performing a strong update on an array, then we could
306    do better. TEMPORARY *)
307 
308 val fold [k, a, b] (
309   t: table k a,
310   consumes seed: b,
311   f: (k, a, consumes b) -> b
312 ) : b =
313   array::fold (Up, t.buckets, seed, fun (bck: bucket k a, consumes accu: b) : b =
314     bucket::fold (bck, accu, f)
315   )
316 
317 val iter [k, a, p : perm] (
318   t: table k a,
319   f: (k, a | p) -> ()
320   | p
321 ) : () =
322   fold [b=(| p)] (t, (), fun (x: k, v: a, (| p)) : () =
323     f (x, v)
324   )
325   (* WISH could we get rid of this type application? *)
326 
327 (* -------------------------------------------------------------------------- *)
328 
329 (* Statistics. *)
330 
331 val cardinal [k, a] (t: table k a) : int =
332   t.cardinal
333 
334 data statistics = Statistics {
335   num_bindings: int;
336   num_buckets: int;
337   max_bucket_length: int;
338   bucket_histogram: array int
339 }
340 
341 val stats [k, a] (t: table k a) : statistics =
342   (* Compute the maximum bucket length. *)
343   let mbl =
344     array::fold (Up, t.buckets, 0, fun (b: bucket k a, accu: int) : int =
345       max (bucket::length b, accu)
346     )
347   in
348   (* Build a histogram, which maps bucket lengths to bucket counts. *)
349   let histo = array::create (mbl + 1, 0) in
350   array::iter (Up, t.buckets, fun (b: bucket k a | histo @ array int) : () =
351     let l = bucket::length b in
352     array::set (histo, l, array::get (histo, l) + 1)
353   );
354   (* Return a statistics record. *)
355   Statistics {
356     num_bindings = t.cardinal;
357     num_buckets  = array::length t.buckets;
358     max_bucket_length = mbl;
359     bucket_histogram = histo
360   }
361 
362 (*
363   Local Variables:
364   compile-command: "../mezzo hashtable.mz"
365   End:
366 *)