summaryrefslogtreecommitdiff
authorEvan Martin <martine@danga.com>2008-10-25 17:58:43 (GMT)
committer Evan Martin <martine@danga.com>2008-10-25 17:58:43 (GMT)
commitde769e8fad9ff2878ee41ed599aa646066fe3876 (patch)
tree26f5ad4424f967eea8d6c0141eed21689ca86cab
parentb9244d0d27225fad81fbc69d20894d9eed79ec45 (diff)
use binary search for v2 index lookup
Diffstat
-rw-r--r--Pack.hs23
-rw-r--r--Pack_test.hs15
2 files changed, 32 insertions, 6 deletions
diff --git a/Pack.hs b/Pack.hs
index cae172d..ef00a6b 100644
--- a/Pack.hs
+++ b/Pack.hs
@@ -6,6 +6,7 @@ module Pack (
, dumpPackIndex
-- * Exposed for testing
+ , binarySearch
, readDeltaOffset
) where
@@ -175,6 +176,18 @@ dumpPackIndex file = do
hash <- getByteString 20
return (ofs, Hash hash)
+binarySearch :: (Monad m, Ord a) => (Int, Int) -> (Int -> m a) -> a
+ -> m (Maybe Int)
+binarySearch (lo,hi) get target = search lo hi where
+ search lo hi | lo == hi = return Nothing
+ search lo hi = do
+ let mid = (lo + hi) `div` 2
+ try <- get mid
+ case try `compare` target of
+ EQ -> return (Just mid)
+ LT -> search (mid+1) hi
+ GT -> search lo mid
+
-- Look for a given hash in a given pack index.
findInPackIndex :: PackFile -> Hash -> IO (PackFile, Maybe Word32)
findInPackIndex pack hash@(Hash hashbytes) = do
@@ -224,18 +237,16 @@ findInPackIndex pack hash@(Hash hashbytes) = do
getV2 (lower_bound, upper_bound) object_count = do
-- We have a range we should search.
- -- XXX linear scan for now; do binary search later.
-- V2 format is a (20-byte sha-1) sorted list,
-- followed by a CRC table and then a matching 4-byte offset list.
- entries <- lookAhead $ do
- skip (20 * lower_bound)
- sequence $ replicate (upper_bound - lower_bound) (getByteString 20)
- case elemIndex hashbytes entries of
+ target <- binarySearch (lower_bound, upper_bound)
+ (\i -> lookAhead $ do skip (20*i); getByteString 20) hashbytes
+ case target of
Nothing -> return Nothing
Just idx -> do
skip (object_count * 20) -- Skip SHA-1 list.
skip (object_count * 4) -- Skip CRC list.
- skip ((lower_bound+idx) * 4) -- Skip to the entry we want.
+ skip (idx * 4) -- Skip to the entry we want.
ofs <- getWord32be
if ofs .&. 0x80000000 == 0
then return (Just ofs)
diff --git a/Pack_test.hs b/Pack_test.hs
index 4de2a34..46e68bb 100644
--- a/Pack_test.hs
+++ b/Pack_test.hs
@@ -1,11 +1,25 @@
import qualified Data.ByteString as B
+import Control.Monad.Identity
import Data.Binary.Strict.Get
+import Data.Maybe
import Data.Word
import Test.HUnit
import Pack
import Shared
+testSearch = test $ do
+ let l1 = [1,4,6,7,8,13,100]
+ -- Try searching the list for each item in the list.
+ let found = catMaybes $ map (searchList l1) l1
+ assertEqual "all entries found" [0..(length l1-1)] found
+ -- Try searching the list for some items not in the list
+ let found = catMaybes $ map (searchList l1) [0, 3, 9, 54, 1001]
+ assertEqual "no non-entries found" [] found
+ where
+ searchList list target = runIdentity $
+ binarySearch (0, length list) (\x -> Identity (list !! x)) target
+
assertParse :: (Show a, Eq a) => Get a -> [Word8] -> a -> Assertion
assertParse parser bytes exp =
case runGet parser (B.pack bytes) of
@@ -24,6 +38,7 @@ tests = TestList [
-- Found in a real pack file.
, TestCase $ assertParse readDeltaOffset [0x83, 0x6f] 623
]
+ , TestLabel "binarySearch" $ testSearch
]
main = runTestTT tests