/home/jonathan/Code/mezzo/src/stdlib/persistentarray.mz.html

  1 (* Persistent arrays in the style of Filliâtre. *)
  2 
  3 (* ---------------------------------------------------------------------------- *)
  4 
  5 (* A persistent array is duplicable, but has mutable internal state.
  6    Furthermore, this state is shared: whenever a new version of the
  7    array is created, the new version shares much of its state with
  8    the previous version. Thus, internally, there exist a region of
  9    points, which together form the representation of several persistent
 10    arrays. *)
 11 
 12 data parray a =
 13   PArray {
 14     (* The region which this persistent array is part of. *)
 15     region: wref::wref (region a);
 16     (* The point in the region which represents this persistent array. *)
 17     point: dynamic (* point a, owned by region *)
 18   }
 19 
 20 (* A region serves as an adopter for a set of points. *)
 21 
 22 mutable data region a =
 23   Region adopts point a
 24 
 25 (* A point is either flat -- a primitive array -- or a pair of a pointer to
 26    another point (in the same region) and an undo instruction. We use three
 27    fields in either case, so as to allow tag updates in either direction. *)
 28 
 29 mutable data point a =
 30 | PFlat { contents: array a; unused1: (); unused2: () }
 31 | PUndo { previous: dynamic; offset: int; element:  a }
 32 
 33 (* One shortcoming of this encoding of persistent arrays is that we are
 34    not able to prove that [parray a] is covariant with respect to [a]. *)
 35 
 36 (* ---------------------------------------------------------------------------- *)
 37 
 38 (* Creation of a new persistent array. *)
 39 
 40 val create [a] duplicable a => (n: int, x: a) : parray a =
 41   let t = array::create (n, x) in
 42   let p = PFlat { contents = t; unused1 = (); unused2 = () } in
 43   let r : region a = Region in
 44   give p to r;
 45   PArray { region = wref::new r; point = p }
 46 
 47 (* ---------------------------------------------------------------------------- *)
 48 
 49 (* Bringing a point to a flat form. This is where the trickery lies. *)
 50 
 51 (* TEMPORARY the call from [revert_link] to [revert] is not a tail call:
 52    we effectively use the stack to reverse the list. In principle, we
 53    should be able to work in constant space. I guess we would need to
 54    use a ref cell to hold the head of the reversed list. Do this after
 55    we have set up a test suite for this module. *)
 56 
 57 (* [revert (r, p)] requires a region [r] and a point [p], which is a member of
 58    [r]. It takes [p] away from [r] and returns the underlying array in a state
 59    that corresponds to [p]. The point [p] itself is left in a temporary state
 60    where its tag is [PUndo] and its fields are uninitialized. *)
 61 
 62 (* This convention about the final state of [p] is adopted so as to avoid
 63    excessive writing. If the convention was that [p] should be left in a
 64    [PFlat] state, with correctly initialized fields, then every point along
 65    the chain would be overwritten twice: once from [PUndo] to [PFlat], and
 66    once from [PFlat] to [PUndo] in the other direction. We save a factor of
 67    two (rough estimate!) in memory traffic. *)
 68 
 69 val rec revert [a] duplicable a =>
 70   (r: region a, p: dynamic)
 71 : (array a | p @ PUndo { previous: unknown; offset: unknown; element: unknown })
 72 =
 73   (* Take [p] out of the region [r]. *)
 74   take p from r;
 75   match p with
 76   | PFlat ->
 77       (* This is the end of the line. Steal the underlying array,
 78          and change the tag of [p] to [PUndo]. *)
 79       let t = p.contents in
 80       tag of p <- PUndo;
 81       t
 82   | PUndo ->
 83       (* Continue below. *)
 84       revert_link (r, p)
 85   end
 86 
 87 (* The auxiliary function [revert_link (r, p)] performs the same task
 88    as [revert (r, p)], but assumes that [p] has already been taken
 89    away from [r] and is a valid [PUndo] point. We isolate this function
 90    so as to avoid code duplication: it has two call sites. *)
 91 
 92 and revert_link [a] duplicable a =>
 93   (r: region a, consumes p: PUndo { previous: dynamic; offset: int; element: a })
 94 : (array a | p @ PUndo { previous: unknown; offset: unknown; element: unknown })
 95 =
 96   let previous = p.previous in
 97   (* Make a recursive call on [previous]. Because the [previous]
 98      pointers cannot form a cycle, there is no need to give [p]
 99      back to the region before making this recursive call. *)
100   let t = revert (r, previous) in
101   (* Read the field [p.offset] now, after the recursive call, because
102      if we read it as part of the pattern matching construct above,
103      then the compiler might not be smart enough to figure out that
104      the read can be delayed. Same concern about [p.element]. *)
105   let i = p.offset in
106   (* Update the representation of [previous] so that it becomes
107      a valid point again. Its tag remains [PUndo], but its
108      [previous] field now points in the other direction. *)
109   previous.previous <- p;
110   previous.offset <- i;
111   previous.element <- array::get (t, i);
112   (* We are now done with [previous]. *)
113   give previous to r;
114   (* Update the underlying array. *)
115   array::set (t, i, p.element);
116   (* The point [p] is now in the desired state: its tag is [PUndo]
117      and its fields contain bogus values. *)
118   t
119 
120 (* This non-recursive wrapper function also requires a region [r] and
121    a point [p]. It also takes [p] away from [r]. It returns the point
122    [p] in a valid [PFlat] state. *)
123 
124 (* By convention, the value [t] returned by [reroot] is [p.contents].
125    This is not essential, but is convenient, and allows us to write
126    a cool return type that involves explicit sharing. *)
127 
128 val reroot [a] duplicable a => (r: region a, p: dynamic)
129   : (t: array a | p @ PFlat { contents = t; unused1: (); unused2: () })
130 =
131 
132   take p from r;
133   match p with
134   | PFlat ->
135       (* If [p] is already flat, there is nothing to do. *)
136       p.contents
137   | PUndo ->
138       let t = revert_link (r, p) in
139       (* [p] is now in an uninitialized state. Re-initialize it. *)
140       tag of p <- PFlat;
141       p.contents <- t;
142       p.unused1 <- ();
143       p.unused2 <- ();
144       t
145   end
146 
147 (* ---------------------------------------------------------------------------- *)
148 
149 (* Temporary access to the underlying array. *)
150 
151 (* TEMPORARY [borrow] is dangerous, because [f] is given read/write
152    access to the underlying array, which it must promise not to modify.
153    Thus, we do not publish it. If we had a [const] modifier, we would
154    be able to publish it. *)
155 
156 val borrow [a, b, p : perm] duplicable a => (
157   pa: parray a,
158   f: (array a | consumes p) -> b
159 | consumes p
160 ) : b =
161   (* Acquire the lock, so as to get access to the region [r]. *)
162   wref::borrow (pa.region, fun (r : region a | consumes p) : b =
163     (* Re-root the persistent array at [p]. *)
164     let p = pa.point in
165     let t = reroot (r, p) in
166     (* [p] is now flat. Pass the underlying array to [f]. *)
167     let result = f t in
168     (* Give [p] back to [r]. *)
169     give p to r;
170     (* Done. *)
171     result
172   )
173 
174 (* ---------------------------------------------------------------------------- *)
175 
176 (* Read access. *)
177 
178 val get [a] duplicable a => (pa: parray a, i: int) : a =
179   borrow (pa, fun (t: array a) : a =
180     array::get (t, i)
181   )
182 
183 (* ---------------------------------------------------------------------------- *)
184 
185 (* Write access. *)
186 
187 (* [set] cannot be defined in terms of [borrow], because it needs access not
188    only to the underlying array [t], but also to the point [p]. We could
189    define a slightly more general version of [borrow] that provides us with
190    [t] and [p]. We will see. *)
191 
192 val set [a] duplicable a => (pa: parray a, i: int, v: a) : parray a =
193   (* Acquire the lock, so as to get access to the region [r]. *)
194   wref::borrow (pa.region, fun (r : region a) : parray a =
195     (* Re-root the persistent array at [p]. *)
196     let p = pa.point in
197     let t = reroot (r, p) in
198     (* [p] is now flat. Access the underlying array [t], and update [p]. *)
199     tag of p <- PUndo;
200     p.offset <- i;
201     p.element <- array::get (t, i);
202     (* Update the underlying array. *)
203     array::set (t, i, v);
204     (* Create a new point [q]. *)
205     let q = PFlat { contents = t; unused1 = (); unused2 = () } in
206     give q to r;
207     (* Finish updating [p], and give it back. *)
208     p.previous <- q;
209     give p to r;
210     (* Wrap the resulting point as a new persistent array. *)
211     PArray { region = pa.region; point = q }
212   )
213 
214 (* This organization might seem somewhat inefficient, because a persistent
215    array involves both a lock (which is implicit in the weak reference) and
216    an adopter (the region, which is protected by the lock). Hence, there are
217    two barriers at runtime: we must first acquire the lock, then take the
218    desired points from the region. That said, we acquire the lock just once,
219    and then can take as many points as we like, while paying just one dynamic
220    check per point. One might wonder whether we could work with just one lock
221    and no region at all. Each point would contain a pointer to a lock, so
222    whenever we wish to follow a link from a point to another point, we would
223    check at runtime that the lock associated with the new point is the lock
224    that we already hold, and deduce that we can safely access this point.
225    This variant might be correct, but it is unclear at present how it would
226    be type-checked. The use of adoption and abandon may seem costly, but it
227    also serves to protect us from ``taking'' a single point twice. It is
228    unclear how a purely lock-based idiom would achieve this: when we reach
229    a new point, we have no way of testing whether we have already claimed
230    the ownership of this point. Anyway, this deserves further research! *)
231 
232 
233 (*
234 Local Variables:
235 compile-command: "../mezzo persistentarray.mz"
236 End:
237 *)