| author | Evan Martin <martine@danga.com> | 2008-10-25 17:58:43 (GMT) |
|---|---|---|
| committer | Evan Martin <martine@danga.com> | 2008-10-25 17:58:43 (GMT) |
| commit | de769e8fad9ff2878ee41ed599aa646066fe3876 (patch) | |
| tree | 26f5ad4424f967eea8d6c0141eed21689ca86cab | |
| parent | b9244d0d27225fad81fbc69d20894d9eed79ec45 (diff) | |
use binary search for v2 index lookup
| -rw-r--r-- | Pack.hs | 23 | ||||
| -rw-r--r-- | Pack_test.hs | 15 |
2 files changed, 32 insertions, 6 deletions
@@ -6,6 +6,7 @@ module Pack ( , dumpPackIndex -- * Exposed for testing + , binarySearch , readDeltaOffset ) where @@ -175,6 +176,18 @@ dumpPackIndex file = do hash <- getByteString 20 return (ofs, Hash hash) +binarySearch :: (Monad m, Ord a) => (Int, Int) -> (Int -> m a) -> a + -> m (Maybe Int) +binarySearch (lo,hi) get target = search lo hi where + search lo hi | lo == hi = return Nothing + search lo hi = do + let mid = (lo + hi) `div` 2 + try <- get mid + case try `compare` target of + EQ -> return (Just mid) + LT -> search (mid+1) hi + GT -> search lo mid + -- Look for a given hash in a given pack index. findInPackIndex :: PackFile -> Hash -> IO (PackFile, Maybe Word32) findInPackIndex pack hash@(Hash hashbytes) = do @@ -224,18 +237,16 @@ findInPackIndex pack hash@(Hash hashbytes) = do getV2 (lower_bound, upper_bound) object_count = do -- We have a range we should search. - -- XXX linear scan for now; do binary search later. -- V2 format is a (20-byte sha-1) sorted list, -- followed by a CRC table and then a matching 4-byte offset list. - entries <- lookAhead $ do - skip (20 * lower_bound) - sequence $ replicate (upper_bound - lower_bound) (getByteString 20) - case elemIndex hashbytes entries of + target <- binarySearch (lower_bound, upper_bound) + (\i -> lookAhead $ do skip (20*i); getByteString 20) hashbytes + case target of Nothing -> return Nothing Just idx -> do skip (object_count * 20) -- Skip SHA-1 list. skip (object_count * 4) -- Skip CRC list. - skip ((lower_bound+idx) * 4) -- Skip to the entry we want. + skip (idx * 4) -- Skip to the entry we want. ofs <- getWord32be if ofs .&. 0x80000000 == 0 then return (Just ofs) diff --git a/Pack_test.hs b/Pack_test.hs index 4de2a34..46e68bb 100644 --- a/Pack_test.hs +++ b/Pack_test.hs @@ -1,11 +1,25 @@ import qualified Data.ByteString as B +import Control.Monad.Identity import Data.Binary.Strict.Get +import Data.Maybe import Data.Word import Test.HUnit import Pack import Shared +testSearch = test $ do + let l1 = [1,4,6,7,8,13,100] + -- Try searching the list for each item in the list. + let found = catMaybes $ map (searchList l1) l1 + assertEqual "all entries found" [0..(length l1-1)] found + -- Try searching the list for some items not in the list + let found = catMaybes $ map (searchList l1) [0, 3, 9, 54, 1001] + assertEqual "no non-entries found" [] found + where + searchList list target = runIdentity $ + binarySearch (0, length list) (\x -> Identity (list !! x)) target + assertParse :: (Show a, Eq a) => Get a -> [Word8] -> a -> Assertion assertParse parser bytes exp = case runGet parser (B.pack bytes) of @@ -24,6 +38,7 @@ tests = TestList [ -- Found in a real pack file. , TestCase $ assertParse readDeltaOffset [0x83, 0x6f] 623 ] + , TestLabel "binarySearch" $ testSearch ] main = runTestTT tests |
