1 (* Persistent arrays in the style of FilliĆ¢tre. *) 2 3 (* ---------------------------------------------------------------------------- *) 4 5 (* A persistent array is duplicable, but has mutable internal state. 6 Furthermore, this state is shared: whenever a new version of the 7 array is created, the new version shares much of its state with 8 the previous version. Thus, internally, there exist a region of 9 points, which together form the representation of several persistent 10 arrays. *) 11 12 data parray a = 13 PArray { 14 (* The region which this persistent array is part of. *) 15 region: wref::wref (region a); 16 (* The point in the region which represents this persistent array. *) 17 point: dynamic (* point a, owned by region *) 18 } 19 20 (* A region serves as an adopter for a set of points. *) 21 22 mutable data region a = 23 Region adopts point a 24 25 (* A point is either flat -- a primitive array -- or a pair of a pointer to 26 another point (in the same region) and an undo instruction. We use three 27 fields in either case, so as to allow tag updates in either direction. *) 28 29 mutable data point a = 30 | PFlat { contents: array a; unused1: (); unused2: () } 31 | PUndo { previous: dynamic; offset: int; element: a } 32 33 (* One shortcoming of this encoding of persistent arrays is that we are 34 not able to prove that [parray a] is covariant with respect to [a]. *) 35 36 (* ---------------------------------------------------------------------------- *) 37 38 (* Creation of a new persistent array. *) 39 40 val create [a] duplicable a => (n: int, x: a) : parray a = 41 let t = array::create (n, x) in 42 let p = PFlat { contents = t; unused1 = (); unused2 = () } in 43 let r : region a = Region in 44 give p to r; 45 PArray { region = wref::new r; point = p } 46 47 (* ---------------------------------------------------------------------------- *) 48 49 (* Bringing a point to a flat form. This is where the trickery lies. *) 50 51 (* TEMPORARY the call from [revert_link] to [revert] is not a tail call: 52 we effectively use the stack to reverse the list. In principle, we 53 should be able to work in constant space. I guess we would need to 54 use a ref cell to hold the head of the reversed list. Do this after 55 we have set up a test suite for this module. *) 56 57 (* [revert (r, p)] requires a region [r] and a point [p], which is a member of 58 [r]. It takes [p] away from [r] and returns the underlying array in a state 59 that corresponds to [p]. The point [p] itself is left in a temporary state 60 where its tag is [PUndo] and its fields are uninitialized. *) 61 62 (* This convention about the final state of [p] is adopted so as to avoid 63 excessive writing. If the convention was that [p] should be left in a 64 [PFlat] state, with correctly initialized fields, then every point along 65 the chain would be overwritten twice: once from [PUndo] to [PFlat], and 66 once from [PFlat] to [PUndo] in the other direction. We save a factor of 67 two (rough estimate!) in memory traffic. *) 68 69 val rec revert [a] duplicable a => 70 (r: region a, p: dynamic) 71 : (array a | p @ PUndo { previous: unknown; offset: unknown; element: unknown }) 72 = 73 (* Take [p] out of the region [r]. *) 74 take p from r; 75 match p with 76 | PFlat -> 77 (* This is the end of the line. Steal the underlying array, 78 and change the tag of [p] to [PUndo]. *) 79 let t = p.contents in 80 tag of p <- PUndo; 81 t 82 | PUndo -> 83 (* Continue below. *) 84 revert_link (r, p) 85 end 86 87 (* The auxiliary function [revert_link (r, p)] performs the same task 88 as [revert (r, p)], but assumes that [p] has already been taken 89 away from [r] and is a valid [PUndo] point. We isolate this function 90 so as to avoid code duplication: it has two call sites. *) 91 92 and revert_link [a] duplicable a => 93 (r: region a, consumes p: PUndo { previous: dynamic; offset: int; element: a }) 94 : (array a | p @ PUndo { previous: unknown; offset: unknown; element: unknown }) 95 = 96 let previous = p.previous in 97 (* Make a recursive call on [previous]. Because the [previous] 98 pointers cannot form a cycle, there is no need to give [p] 99 back to the region before making this recursive call. *) 100 let t = revert (r, previous) in 101 (* Read the field [p.offset] now, after the recursive call, because 102 if we read it as part of the pattern matching construct above, 103 then the compiler might not be smart enough to figure out that 104 the read can be delayed. Same concern about [p.element]. *) 105 let i = p.offset in 106 (* Update the representation of [previous] so that it becomes 107 a valid point again. Its tag remains [PUndo], but its 108 [previous] field now points in the other direction. *) 109 previous.previous <- p; 110 previous.offset <- i; 111 previous.element <- array::get (t, i); 112 (* We are now done with [previous]. *) 113 give previous to r; 114 (* Update the underlying array. *) 115 array::set (t, i, p.element); 116 (* The point [p] is now in the desired state: its tag is [PUndo] 117 and its fields contain bogus values. *) 118 t 119 120 (* This non-recursive wrapper function also requires a region [r] and 121 a point [p]. It also takes [p] away from [r]. It returns the point 122 [p] in a valid [PFlat] state. *) 123 124 (* By convention, the value [t] returned by [reroot] is [p.contents]. 125 This is not essential, but is convenient, and allows us to write 126 a cool return type that involves explicit sharing. *) 127 128 val reroot [a] duplicable a => (r: region a, p: dynamic) 129 : (t: array a | p @ PFlat { contents = t; unused1: (); unused2: () }) 130 = 131 132 take p from r; 133 match p with 134 | PFlat -> 135 (* If [p] is already flat, there is nothing to do. *) 136 p.contents 137 | PUndo -> 138 let t = revert_link (r, p) in 139 (* [p] is now in an uninitialized state. Re-initialize it. *) 140 tag of p <- PFlat; 141 p.contents <- t; 142 p.unused1 <- (); 143 p.unused2 <- (); 144 t 145 end 146 147 (* ---------------------------------------------------------------------------- *) 148 149 (* Temporary access to the underlying array. *) 150 151 (* TEMPORARY [borrow] is dangerous, because [f] is given read/write 152 access to the underlying array, which it must promise not to modify. 153 Thus, we do not publish it. If we had a [const] modifier, we would 154 be able to publish it. *) 155 156 val borrow [a, b, p : perm] duplicable a => ( 157 pa: parray a, 158 f: (array a | consumes p) -> b 159 | consumes p 160 ) : b = 161 (* Acquire the lock, so as to get access to the region [r]. *) 162 wref::borrow (pa.region, fun (r : region a | consumes p) : b = 163 (* Re-root the persistent array at [p]. *) 164 let p = pa.point in 165 let t = reroot (r, p) in 166 (* [p] is now flat. Pass the underlying array to [f]. *) 167 let result = f t in 168 (* Give [p] back to [r]. *) 169 give p to r; 170 (* Done. *) 171 result 172 ) 173 174 (* ---------------------------------------------------------------------------- *) 175 176 (* Read access. *) 177 178 val get [a] duplicable a => (pa: parray a, i: int) : a = 179 borrow (pa, fun (t: array a) : a = 180 array::get (t, i) 181 ) 182 183 (* ---------------------------------------------------------------------------- *) 184 185 (* Write access. *) 186 187 (* [set] cannot be defined in terms of [borrow], because it needs access not 188 only to the underlying array [t], but also to the point [p]. We could 189 define a slightly more general version of [borrow] that provides us with 190 [t] and [p]. We will see. *) 191 192 val set [a] duplicable a => (pa: parray a, i: int, v: a) : parray a = 193 (* Acquire the lock, so as to get access to the region [r]. *) 194 wref::borrow (pa.region, fun (r : region a) : parray a = 195 (* Re-root the persistent array at [p]. *) 196 let p = pa.point in 197 let t = reroot (r, p) in 198 (* [p] is now flat. Access the underlying array [t], and update [p]. *) 199 tag of p <- PUndo; 200 p.offset <- i; 201 p.element <- array::get (t, i); 202 (* Update the underlying array. *) 203 array::set (t, i, v); 204 (* Create a new point [q]. *) 205 let q = PFlat { contents = t; unused1 = (); unused2 = () } in 206 give q to r; 207 (* Finish updating [p], and give it back. *) 208 p.previous <- q; 209 give p to r; 210 (* Wrap the resulting point as a new persistent array. *) 211 PArray { region = pa.region; point = q } 212 ) 213 214 (* This organization might seem somewhat inefficient, because a persistent 215 array involves both a lock (which is implicit in the weak reference) and 216 an adopter (the region, which is protected by the lock). Hence, there are 217 two barriers at runtime: we must first acquire the lock, then take the 218 desired points from the region. That said, we acquire the lock just once, 219 and then can take as many points as we like, while paying just one dynamic 220 check per point. One might wonder whether we could work with just one lock 221 and no region at all. Each point would contain a pointer to a lock, so 222 whenever we wish to follow a link from a point to another point, we would 223 check at runtime that the lock associated with the new point is the lock 224 that we already hold, and deduce that we can safely access this point. 225 This variant might be correct, but it is unclear at present how it would 226 be type-checked. The use of adoption and abandon may seem costly, but it 227 also serves to protect us from ``taking'' a single point twice. It is 228 unclear how a purely lock-based idiom would achieve this: when we reach 229 a new point, we have no way of testing whether we have already claimed 230 the ownership of this point. Anyway, this deserves further research! *) 231 232 233 (* 234 Local Variables: 235 compile-command: "../mezzo persistentarray.mz" 236 End: 237 *)