-
Notifications
You must be signed in to change notification settings - Fork 109
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Use a custom parsing monad instead of attoparsec.
All decoding benchmarks show significant speedups after this change. The biggest improvement is to decoding packed data which is 4-5x as fast as before. (See below for a full list of benchmark diffs.) This parsing monad follows the approach of, e.g., the `store` and `persist` packages. It requires that all data be in a *strict* `ByteString`, and uses simple pointer arithmetic internally to walk through its bytes. This effectively works against #62 (streaming parsers) since it needs to read all the input data before starting the parse. However, that issue has already existed since the beginning of this library for, e.g., submessages; see that bug for more details. So this change doesn't appear to be a regression. We also have freedom to later try out different implementations without changing the API, since `Parser` is opaque as of #294. The implementation of Parser differs from `store` and `persist` by using `ExceptT` to pass around errors internally, rather than exceptions (or closures, as in `attoparsec`). We may want to experiment with this later, but in my initial experiments I didn't see a significant improvement from those approaches. Benchmark results (the "time" output from Criterion): flat(602B)/decode/whnf: 13.14 μs (13.02 μs .. 13.29 μs) => 8.686 μs (8.514 μs .. 8.873 μs) nested(900B)/decode/whnf: 26.35 μs (25.85 μs .. 26.86 μs) => 14.01 μs (13.86 μs .. 14.18 μs) int32-packed(1003B)/decode/whnf: 36.23 μs (35.75 μs .. 36.69 μs) => 17.31 μs (17.11 μs .. 17.50 μs) int32-unpacked(2000B)/decode/whnf: 65.18 μs (64.19 μs .. 66.68 μs) => 19.35 μs (19.13 μs .. 19.58 μs) float-packed(4003B)/decode/whnf: 78.61 μs (77.53 μs .. 79.46 μs) => 19.56 μs (19.40 μs .. 19.76 μs) float-unpacked(5000B)/decode/whnf: 108.9 μs (107.8 μs .. 110.3 μs) => 22.29 μs (22.00 μs .. 22.66 μs) no-unused(10003B)/decode/whnf: 571.7 μs (560.0 μs .. 586.6 μs) => 356.5 μs (349.0 μs .. 365.0 μs) with-unused(10003B)/decode/whnf: 786.6 μs (697.8 μs .. 875.5 μs) => 368.3 μs (361.8 μs .. 376.4 μs)
- Loading branch information
Showing
4 changed files
with
188 additions
and
49 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
-- | A custom parsing monad, optimized for speed. | ||
{-# LANGUAGE BangPatterns #-} | ||
{-# LANGUAGE GeneralizedNewtypeDeriving #-} | ||
{-# LANGUAGE ScopedTypeVariables #-} | ||
module Data.ProtoLens.Encoding.Parser | ||
( Parser | ||
, runParser | ||
, atEnd | ||
, getWord8 | ||
, getWord32le | ||
, getBytes | ||
, (<?>) | ||
) where | ||
|
||
import Data.Bits (shiftL, (.|.)) | ||
import Foreign.Ptr | ||
import Foreign.Storable | ||
import Control.Monad.Trans.Except | ||
import Data.Word (Word8, Word32) | ||
import qualified Data.ByteString as B | ||
import qualified Data.ByteString.Unsafe as B | ||
import Data.ByteString (ByteString) | ||
import Control.Monad (ap) | ||
import Control.Monad.IO.Class | ||
import System.IO.Unsafe | ||
|
||
-- | A monad for parsing an input buffer. | ||
newtype Parser a = Parser | ||
{ unParser :: Ptr Word8 -- ^ End position of the input | ||
-> Ptr Word8 -- ^ Current position in the input | ||
-> ExceptT String IO (ParserResult a) } | ||
|
||
data ParserResult a = ParserResult | ||
{ _newPos :: !(Ptr Word8) -- ^ New position in the input | ||
, unParserResult :: a | ||
} | ||
|
||
instance Functor ParserResult where | ||
fmap f (ParserResult p x) = ParserResult p (f x) | ||
|
||
instance Functor Parser where | ||
fmap f (Parser g) = Parser $ \end cur -> fmap (fmap f) $ g end cur | ||
|
||
instance Applicative Parser where | ||
pure x = Parser $ \_ cur -> return $ ParserResult cur x | ||
(<*>) = ap | ||
|
||
instance Monad Parser where | ||
fail s = Parser $ \_ _ -> throwE s | ||
return = pure | ||
Parser f >>= g = Parser $ \end pos -> do | ||
ParserResult pos' x <- f end pos | ||
unParser (g x) end pos' | ||
|
||
-- | Evaluates a parser on the given input. | ||
-- | ||
-- If the parser does not consume all of the input, the rest of the | ||
-- input is discarded and the parser still succeeds. | ||
runParser :: Parser a -> ByteString -> Either String a | ||
runParser (Parser m) b = unsafePerformIO $ B.unsafeUseAsCStringLen b $ \(p, len) | ||
-> runExceptT $ fmap unParserResult $ m (p `plusPtr` len) (castPtr p) | ||
|
||
-- | Returns True if there is no more input left to consume. | ||
atEnd :: Parser Bool | ||
atEnd = Parser $ \end pos -> return $ ParserResult pos (pos == end) | ||
|
||
-- | Parse a one-byte word. | ||
getWord8 :: Parser Word8 | ||
getWord8 = withSized 1 "getWord8: Unexpected end of input" peek | ||
|
||
-- | Parser a 4-byte word in little-endian order. | ||
getWord32le :: Parser Word32 | ||
getWord32le = withSized 4 "getWord32le: Unexpected end of input" $ \pos -> do | ||
b1 <- peek pos | ||
b2 <- peek $ pos `plusPtr'` 1 | ||
b3 <- peek $ pos `plusPtr'` 2 | ||
b4 <- peek $ pos `plusPtr'` 3 | ||
return $ ((fromIntegral b4 `shiftL` 8 .|. fromIntegral b3) | ||
`shiftL` 8 .|. fromIntegral b2) `shiftL` 8 .|. fromIntegral b1 | ||
|
||
-- | Parse a 'B.ByteString' of the given length. | ||
getBytes :: Int -> Parser B.ByteString | ||
getBytes n = withSized n "getBytes: Unexpected end of input" $ \pos -> | ||
B.packCStringLen (castPtr pos, n) | ||
|
||
-- | Helper function for reading bytes from the current position and | ||
-- advancing the pointer. | ||
withSized :: Int -> String -> (Ptr Word8 -> IO a) -> Parser a | ||
withSized len message f = Parser $ \end pos -> | ||
let pos' = pos `plusPtr'` len | ||
in if pos' > end | ||
then throwE $ message | ||
else liftIO $ ParserResult pos' <$> f pos | ||
{-# INLINE withSized #-} | ||
|
||
-- | If the parser fails, prepend an error message. | ||
(<?>) :: Parser a -> String -> Parser a | ||
Parser m <?> msg = Parser $ \end p -> withExceptT (\s -> msg ++ ": " ++ s) $ m end p | ||
|
||
-- | Advance a pointer. Unlike 'plusPtr', preserves the type of the input. | ||
plusPtr' :: Ptr a -> Int -> Ptr a | ||
plusPtr' = plusPtr |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
-- | Unit and property tests for of our custom parsing monad. | ||
module Main (main) where | ||
|
||
import qualified Data.ByteString as B | ||
import Data.Either (isLeft) | ||
|
||
import Test.QuickCheck | ||
import Test.Framework (defaultMain, testGroup, Test) | ||
import Test.Framework.Providers.QuickCheck2 (testProperty) | ||
|
||
import Data.ProtoLens.Encoding.Bytes | ||
import Data.ProtoLens.Encoding.Parser | ||
|
||
main :: IO () | ||
main = defaultMain | ||
[ testGroup "getWord8" testGetWord8 | ||
, testGroup "getBytes" testGetBytes | ||
, testGroup "getWord32le" testGetWord32le | ||
, testGroup "failure" testFailure | ||
] | ||
|
||
testGetWord8 :: [Test] | ||
testGetWord8 = | ||
[ testProperty "atEnd" $ \ws -> runParser atEnd (B.pack ws) === Right (null ws) | ||
, testProperty "manyTillEnd" | ||
$ \ws -> runParser (manyTillEnd getWord8) (B.pack ws) === Right ws | ||
, testProperty "idem" $ \x -> x ==> x | ||
] | ||
|
||
testGetBytes :: [Test] | ||
testGetBytes = | ||
[ testProperty "many" | ||
$ \ws -> let | ||
packed = map B.pack ws | ||
in runParser (mapM (getBytes . B.length) packed) (B.concat packed) | ||
=== Right packed | ||
] | ||
|
||
testGetWord32le :: [Test] | ||
testGetWord32le = | ||
[ testProperty "align" | ||
$ \ws -> length ws `mod` 4 /= 0 ==> | ||
counterexampleF isLeft (runParser (manyTillEnd getWord32le) (B.pack ws)) | ||
, testProperty "manyTillEnd" $ \ws -> | ||
runParser (manyTillEnd getWord32le) (runBuilder $ foldMap putFixed32 ws) | ||
=== Right ws | ||
] | ||
|
||
testFailure :: [Test] | ||
testFailure = | ||
[ testProperty "fail-fast" $ \bs -> | ||
runParser (fail "abcde") (B.pack bs) | ||
=== (Left "abcde" :: Either String ()) | ||
, testProperty "<?>" $ \bs -> | ||
runParser (fail "abcde" <?> "fghij") (B.pack bs) | ||
=== (Left "fghij: abcde" :: Either String ()) | ||
] | ||
|
||
-- Since this is a test, just implement the slow stack-heavy way. | ||
manyTillEnd :: Parser a -> Parser [a] | ||
manyTillEnd p = do | ||
end <- atEnd | ||
if end | ||
then return [] | ||
else do | ||
x <- p | ||
xs <- manyTillEnd p | ||
return $ x : xs | ||
|
||
counterexampleF :: (Testable prop, Show a) => (a -> prop) -> a -> Property | ||
counterexampleF f x = counterexample (show x) $ f x |