-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathLRUBoundedMap_CustomHAMT.hs
376 lines (341 loc) · 16.6 KB
/
LRUBoundedMap_CustomHAMT.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
{-# LANGUAGE BangPatterns, FlexibleContexts #-}
{-# OPTIONS_GHC -fno-full-laziness -funbox-strict-fields #-}
module LRUBoundedMap_CustomHAMT ( Map
, empty
, toList
, null
, size
, member
, notMember
, insert
, update
, delete
, lookup
, lookupNoLRU
, popOldest
, popNewest
, valid
) where
import Prelude hiding (lookup, null)
import qualified Data.Hashable as H
import Data.Hashable (Hashable)
import Data.Bits
import Data.Maybe
import Data.Word
import Data.Foldable (minimumBy, maximumBy)
import Data.List (find, partition, foldl')
import qualified Data.Vector as V
import qualified Data.Vector.Mutable as VM
import Control.Monad
import Control.Monad.Writer
import Control.DeepSeq (NFData(rnf))
-- Associative array implemented on top of a Hashed Array Mapped Trie (HAMT), based on the
-- implementation in Data.HashMap. Basically a prefix tree over the bits of key hashes, with a
-- higher than binary branching factor. Additional least / most recently used bounds for
-- subtrees are stored so the data structure can have an upper bound on the number of elements
-- and remove the least recently used one overflow. The other bound allows to retrieve the item
-- which was inserted / touched last
--
-- TODO: LRU / bounded aspect not working yet
--
-- TODO: There are lots of things to be optimized
data Map k v = Map { mLimit :: !Int
, mTick :: !Word64 -- We use a 'tick', which we keep incrementing, to keep
-- track of how old elements are relative to each other
, mSize :: !Int -- Cached to make size O(1) instead of O(n)
, mHAMT :: !(HAMT k v)
}
instance (NFData k, NFData v) => NFData (Map k v) where
rnf (Map l t s h) = rnf l `seq` rnf t `seq` rnf s `seq` rnf h
type Hash = Word
{-# INLINE hash #-}
hash :: H.Hashable a => a -> Hash
hash = fromIntegral . H.hash
data Leaf k v = L !k !v !Word64 -- LRU tick
instance (NFData k, NFData v) => NFData (Leaf k v) where
rnf (L k v _) = rnf k `seq` rnf v
data OldNew = OldNew !Int !Int
-- Note that we don't have bitmap indexing for partially filled nodes. This simplifies the code,
-- but comes at the expense of memory usage and access
data HAMT k v = Empty
| Node !OldNew !(V.Vector (HAMT k v))
| Leaf !Hash !(Leaf k v)
| Collision !Hash ![Leaf k v]
instance (NFData k, NFData v) => NFData (HAMT k v) where
rnf Empty = ()
rnf (Leaf _ l) = rnf l
rnf (Node m ch) = m `seq` rnf ch
rnf (Collision _ ch) = rnf ch
{-# INLINE bitsPerSubkey #-}
bitsPerSubkey :: Int
bitsPerSubkey = 4
{-# INLINE subkeyMask #-}
subkeyMask :: Hash
subkeyMask = (1 `shiftL` bitsPerSubkey) - 1
{-# INLINE maxChildren #-}
maxChildren :: Int
maxChildren = 1 `shiftL` bitsPerSubkey
-- Retrieve a leaf child index from a hash and a subkey offset
{-# INLINE indexNode #-}
indexNode :: Hash -> Int -> Int
indexNode h s = fromIntegral $ (h `shiftR` s) .&. subkeyMask
-- Insert a new element into the map, return the new map and the truncated
-- element (if over the limit)
{-# INLINEABLE insert #-}
insert :: (Eq k, Hashable k) => k -> v -> Map k v -> (Map k v, Maybe (k, v))
insert !k !v !m = insertInternal False k v m
data Pair a b = Pair !a !b
-- TODO: Made a terrible mess out of this function, split into insert / update case,
-- remove most of the strictness annotations, lots of optimization potential
-- TODO: No LRU update implemented
{-# INLINE insertInternal #-}
insertInternal :: (Eq k, Hashable k) => Bool -> k -> v -> Map k v -> (Map k v, Maybe (k, v))
insertInternal !updateOnly !kIns !vIns !m =
let go !h !k !v !_ Empty = if updateOnly
then Pair Empty False -- We're in update mode, no insert
else Pair (Leaf h $ L k v tick) True
go !h !k !v !s !t@(Leaf !lh !li@(L !lk !_ !_)) =
if h == lh
then if k == lk
then Pair (Leaf h $ L k v tick) False -- Update value
else -- We have a hash collision, insert
if updateOnly -- ...unless we're in update mode
then Pair t False
else Pair (Collision h [L k v tick, li]) True
else -- Expand leaf into interior node
if updateOnly
then Pair t False
else let !ia = indexNode h s
!ib = indexNode lh s
in Pair ( Node (OldNew 0 0) $! V.create $ do
vec <- VM.replicate maxChildren Empty
if ia /= ib -- Subkey collision?
then do VM.write vec ia $! Leaf h (L k v tick)
VM.write vec ib t
else do -- Collision, add one level
let !(Pair subtree _) = go h k v (s + bitsPerSubkey) t
VM.write vec ia $! subtree
return vec
) True
go !h !k !v !s !(Node _ ch) =
let !idx = indexNode h s
!subtree = ch `V.unsafeIndex` idx
!(Pair subtree' i) = -- Traverse into child with matching subkey
go h k v (s + bitsPerSubkey) subtree
in subtree' `seq` i `seq` Pair (Node (OldNew 0 0) $! ch V.// [(idx, subtree')]) i
go !h !k !v !s !t@(Collision colh ch) =
if updateOnly
then if h == colh
then let traverseUO [] = [] -- No append in update mode
traverseUO (l@(L lk _ _):xs) =
if lk == k
then L k v tick : xs
else l : traverseUO xs
in Pair (Collision h $! traverseUO ch) False
else Pair t False
else if h == colh
then let trav [] = [L k v tick] -- Append new leaf
trav (l@(L lk _ _):xs) =
if lk == k
then L k v tick : xs -- Update value
else l : trav xs
in Pair (Collision h $! trav ch)
(length ch /= length (trav ch)) -- TODO: Slow
else -- Expand collision into interior node
go h k v s . Node (OldNew 0 0) $! V.create $ do
vec <- VM.replicate maxChildren Empty
VM.write vec (indexNode colh s) t
return vec
!(Pair m' i') = go (hash kIns) kIns vIns 0 $ mHAMT m
!tick = mTick m
in m' `seq` i' `seq` mSize m `seq`
( m { mHAMT = m'
, mSize = mSize m + if i' then 1 else 0
, mTick = tick + 1
}
, Nothing
)
empty :: Int -> Map k v
empty limit | limit >= 1 = Map { mLimit = limit
, mTick = 0
, mSize = 0
, mHAMT = Empty
}
| otherwise = error "limit for LRUBoundedMap needs to be >= 1"
{-# INLINEABLE size #-}
size :: Map k v -> (Int, Int)
size m = (mSize m, mLimit m)
-- O(n) size-by-traversal
sizeTraverse :: Map k v -> Int
sizeTraverse m = go 0 $ mHAMT m
where go n Empty = n
go n (Leaf _ _) = n + 1
go n (Node _ ch) = V.foldl' (go) n ch
go n (Collision _ ch) = n + length ch
{-# INLINEABLE null #-}
null :: Map k v -> Bool
null m = case mHAMT m of Empty -> True; _ -> False
{-# INLINEABLE member #-}
member :: (Eq k, Hashable k) => k -> Map k v -> Bool
member k m = isJust . snd $ lookup k m
{-# INLINEABLE notMember #-}
notMember :: (Eq k, Hashable k) => k -> Map k v -> Bool
notMember k m = not $ member k m
{-# INLINEABLE toList #-}
toList :: Map k v -> [(k, v)]
toList m = go [] $ mHAMT m
where go l Empty = l
go l (Leaf _ (L k v _)) = (k, v) : l
go l (Node _ ch) = V.foldl' (\l' n -> go l' n) l ch
go l (Collision _ ch) = foldl' (\l' (L k v _) -> (k, v) : l') l ch
-- Lookup element, also update LRU (TODO: No LRU update implemented)
{-# INLINEABLE lookup #-}
lookup :: (Eq k, Hashable k) => k -> Map k v -> (Map k v, Maybe v)
lookup k' m = (m, go (hash k') k' 0 $ mHAMT m)
where go !_ !_ !_ Empty = Nothing
go h k _ (Leaf lh (L lk lv _))
| lh /= h = Nothing
| lk /= k = Nothing
| otherwise = Just lv
go h k s (Node _ ch) = go h k (s + bitsPerSubkey) (ch `V.unsafeIndex` indexNode h s)
go h k _ (Collision colh ch)
| colh == h = (\(L _ lv _) -> lv) <$> find (\(L lk _ _) -> lk == k) ch
| otherwise = Nothing
-- Lookup element
{-# INLINEABLE lookupNoLRU #-}
lookupNoLRU :: (Eq k, Hashable k) => k -> Map k v -> Maybe v
lookupNoLRU k' m = go (hash k') k' 0 $ mHAMT m
where go !_ !_ !_ Empty = Nothing
go h k _ (Leaf lh (L lk lv _))
| lh /= h = Nothing
| lk /= k = Nothing
| otherwise = Just lv
go h k s (Node _ ch) = go h k (s + bitsPerSubkey) (ch `V.unsafeIndex` indexNode h s)
go h k _ (Collision colh ch)
| colh == h = (\(L _ lv _) -> lv) <$> find (\(L lk _ _) -> lk == k) ch
| otherwise = Nothing
-- TODO: No LRU update implemented
{-# INLINEABLE delete #-}
delete :: (Eq k, Hashable k) => k -> Map k v -> (Map k v, Maybe v)
delete k' m =
let go !_ !_ !_ Empty = (Empty, Nothing)
go h k _ t@(Leaf lh (L lk lv _))
| lh /= h = (t, Nothing)
| lk /= k = (t, Nothing)
| otherwise = (Empty, Just lv)
go h k s (Node _ ch) =
let !idx = indexNode h s
!subtree = ch `V.unsafeIndex` idx
!(subtree', del') = go h k (s + bitsPerSubkey) subtree
!ch' = ch V.// [(idx, subtree')]
!used = -- Non-empty slots in the updated child vector
V.ifoldr (\i t' u -> case t' of Empty -> u; _ -> i : u) [] ch'
in case used of
[] -> (Empty, del') -- We removed the last element, delete node
(x:[]) -> -- If we deleted our second last element, we
-- also need to check whether the last child
-- is a leaf / collision
let !lst = ch' `V.unsafeIndex` x in case lst of
Leaf _ _ -> (lst, del') -- Replace node by leaf
Collision _ _ -> (lst, del') -- ...
_ -> (Node (OldNew 0 0) ch', del')
_ -> (Node (OldNew 0 0) ch', del')
go h k _ t@(Collision colh ch)
| colh == h = let (delch', ch') = partition (\(L lk _ _) -> lk == k) ch
in if length ch' == 1
then -- Deleted last remaining collision, it's a leaf node now
(Leaf h $ head ch', Just $ (\((L _ lv _):[]) -> lv) delch')
else (Collision h ch', (\(L _ lv _) -> lv) <$> listToMaybe delch')
| otherwise = (t, Nothing)
!(m', del) = go (hash k') k' 0 $ mHAMT m
in ( m { mHAMT = m'
, mSize = mSize m - if isJust del then 1 else 0
}
, del
)
popNewest, popOldest :: (Eq k, Hashable k) => Map k v -> (Map k v, Maybe (k, v))
popNewest = popInternal False
popOldest = popInternal True
-- Delete and return most / least recently used item
--
-- TODO: We first find the item and then delete it by key, could do this with a
-- single traversal instead
popInternal :: (Eq k, Hashable k) => Bool -> Map k v -> (Map k v, Maybe (k, v))
popInternal popOld m =
case go $ mHAMT m of
Just k -> let (m', Just v) = delete k m in (m', Just (k, v))
Nothing -> (m, Nothing)
where go Empty = Nothing
go (Leaf _ (L lk _ _)) = Just lk
go (Node (OldNew old new) ch) = go $ ch `V.unsafeIndex` if popOld then old else new
go (Collision _ ch) = Just . (\(L lk _ _) -> lk)
. ( if popOld
then minimumBy
else maximumBy
)
(\(L _ _ a) (L _ _ b) -> compare a b)
$ ch
{-# INLINEABLE update #-}
update :: (Eq k, Hashable k) => k -> v -> Map k v -> Map k v
update k v m =
case insertInternal True k v m of
(m', Nothing) -> m'
_ -> error "LRUBoundedMap.update: insertInternal truncated with updateOnly"
valid :: (Eq k, Hashable k, Eq v) => Map k v -> Maybe String
valid m =
let w =
execWriter $ do
when (mLimit m < 1) $
tell "Invalid limit (< 1)\n"
when ((fst $ size m) /= sizeTraverse m) $
tell "Mismatch beween cached and actual size\n"
--when ((fst $ size m) > mLimit m)
-- $ tell "Size over the limit\n"
let trav s t =
case t of
Leaf h (L k v _) -> checkKey h k v
Collision h ch -> do
when (length ch < 2) $
tell "Hash collision node with <2 children\n"
forM_ ch $ \(L lk lv _) -> checkKey h lk lv
Node _ ch -> do
let used =
V.ifoldr (\i t' u -> case t' of Empty -> u; _ -> i : u) [] ch
when (s + bitsPerSubkey > finiteBitSize (undefined :: Word)) $
tell "Subkey shift too large during traversal\n"
when (V.length ch /= maxChildren) $
tell "Node with a child vector /= maxChildren\n"
when (length used == 0) $
tell "Node with only empty children\n"
when (length used == 1) $
case ch V.! head used of
Leaf _ _ -> tell "Node with single Leaf child\n"
Collision _ _ -> tell "Node with single Collision child\n"
_ -> return ()
forM_ (V.toList ch) $ trav (s + bitsPerSubkey)
Empty -> return ()
checkKey h k v = do
when (hash k /= h) $
tell "Hash / key mismatch\n"
case snd $ lookup k m of
Nothing ->
tell "Can't lookup key found during traversal\n"
Just v' -> when (v /= v') .
tell $ "Lookup of key found during traversal yields " ++
"different value\n"
let (m', v') = delete k m
when ((fst $ size m') /= (fst $ size m) - 1) $
tell "Deleting key did not reduce size\n"
when (fromMaybe v v' /= v) $
tell "Delete returned wrong value\n"
in trav 0 $ mHAMT m
let keysL = map (fst) $ toList m
allDeleted = foldl' (\r k -> fst $ delete k r) m keysL
when (length keysL /= (fst $ size m)) $
tell "Length of toList does not match size\n"
unless (null allDeleted) $
tell "Deleting all elements does not result in an empty map\n"
unless ((fst $ size allDeleted) == 0) $
tell "Deleting all elements does not result in a zero size map\n"
in case w of [] -> Nothing
xs -> Just xs