Skip to content

Commit

Permalink
benchmark longlines.xml (commenting out unrelated tests)
Browse files Browse the repository at this point in the history
ocramz#65

stack --stack-yaml stack-lts-18.yaml build && stack --stack-yaml stack-lts-18.yaml bench
  • Loading branch information
unhammer committed Jun 21, 2023
1 parent 5cb8d5a commit ebf9062
Show file tree
Hide file tree
Showing 4 changed files with 95 additions and 102 deletions.
118 changes: 54 additions & 64 deletions bench/SpeedBigFiles.hs
Original file line number Diff line number Diff line change
Expand Up @@ -18,50 +18,52 @@ import qualified Data.ByteString.Lazy as L
import Data.List (delete)
import GHC.Generics
import System.FilePath.Posix
import qualified Text.XML.Expat.SAX as Hexpat
import qualified Text.XML.Expat.Tree as HexpatTree
import qualified Text.XML.Hexml as Hexml
import Text.XML.Light as XML
import Text.XML.Light.Input as XML
-- import qualified Text.XML.Expat.SAX as Hexpat
-- import qualified Text.XML.Expat.Tree as HexpatTree
-- import qualified Text.XML.Hexml as Hexml
-- import Text.XML.Light as XML
-- import Text.XML.Light.Input as XML
import qualified Xeno.Types
import qualified Xeno.SAX
import qualified Xeno.DOM
import qualified Xeno.DOM.Robust
import qualified Data.ByteString as S
#ifdef LIBXML2
import qualified Text.XML.LibXML.Parser as Libxml2
#endif
-- #ifdef LIBXML2
-- import qualified Text.XML.LibXML.Parser as Libxml2
-- #endif


main :: IO ()
main = defaultMain
[ benchFile allTests "46MB" "enwiki-20190901-pages-articles14.xml-p7697599p7744799.bz2"
, benchFile allTests "624MB" "enwiki-20190901-pages-articles-multistream1.xml-p10p30302.bz2"
, benchFile allTests "921MB" "1HTQ.xml.bz2"
, benchFile allTests "1.6Gb" "enwiki-20190901-pages-meta-current6.xml-p565314p892912.bz2"
, benchFile allExceptHexml "4Gb" "enwiki-20190901-pages-meta-current24.xml-p30503451p32003451.bz2"
-- [ benchFile ["xeno-dom"] "6MB" "shortlines.xml.bz2"
-- [ benchFile ["xeno-dom"] "6MB" "nowiki-20230520-pages-articles-multistream-index.txt.bz2"
[ benchFile ["xeno-dom"] "6MB" "longlines.xml.bz2"
-- , benchFile allTests "624MB" "enwiki-20190901-pages-articles-multistream1.xml-p10p30302.bz2"
-- , benchFile allTests "921MB" "1HTQ.xml.bz2"
-- , benchFile allTests "1.6Gb" "enwiki-20190901-pages-meta-current6.xml-p565314p892912.bz2"
-- , benchFile allExceptHexml "4Gb" "enwiki-20190901-pages-meta-current24.xml-p30503451p32003451.bz2"
-- , benchFile allExceptHexml "21Gb" "enwiki-20190901-pages-meta-history2.xml-p31255p31720.bz2"
]


allTests :: [String]
allTests = [ "hexml-dom"
, "xeno-sax"
, "xeno-sax-z"
-- , "xeno-sax-ex"
-- , "xeno-sax-ex-z"
, "xeno-dom"
, "xeno-dom-with-recovery"
-- XXX: "hexpact", "xml-dom" library don't work with big files; require too much memory
-- , "hexpat-sax"
-- , "hexpat-dom"
-- , "xml-dom"
-- , "libxml2-dom"
]
-- allTests :: [String]
-- allTests = [ "hexml-dom"
-- , "xeno-sax"
-- , "xeno-sax-z"
-- -- , "xeno-sax-ex"
-- -- , "xeno-sax-ex-z"
-- , "xeno-dom"
-- , "xeno-dom-with-recovery"
-- -- XXX: "hexpact", "xml-dom" library don't work with big files; require too much memory
-- -- , "hexpat-sax"
-- -- , "hexpat-dom"
-- -- , "xml-dom"
-- -- , "libxml2-dom"
-- ]


allExceptHexml :: [String]
allExceptHexml = "hexml-dom" `delete` allTests
-- allExceptHexml :: [String]
-- allExceptHexml = "hexml-dom" `delete` allTests


benchFile :: [String] -> String -> FilePath -> Benchmark
Expand All @@ -72,28 +74,29 @@ benchFile enabledTests size fn =

benchMethods :: [String] -> ByteString -> Xeno.Types.ByteStringZeroTerminated -> [Benchmark]
benchMethods enabledTests input inputz =
runBench "hexml-dom" (whnf Hexml.parse input)
++ runBench "xeno-sax" (whnf Xeno.SAX.validate input)
++ runBench "xeno-sax-z" (whnf Xeno.SAX.validate inputz)
++ runBench "xeno-sax-ex " (whnf Xeno.SAX.validateEx input)
++ runBench "xeno-sax-ex-z" (whnf Xeno.SAX.validateEx inputz)
++ runBench "xeno-dom" (whnf Xeno.DOM.parse input)
++ runBench "xeno-dom-with-recovery" (whnf Xeno.DOM.Robust.parse input)
++ runBench
"hexpat-sax"
(whnf
((Hexpat.parseThrowing Hexpat.defaultParseOptions :: L.ByteString -> [Hexpat.SAXEvent ByteString ByteString]) .
L.fromStrict)
input)
++ runBench
"hexpat-dom"
(whnf
((HexpatTree.parse' HexpatTree.defaultParseOptions :: ByteString -> Either HexpatTree.XMLParseError (HexpatTree.Node ByteString ByteString)))
input)
++ runBench "xml-dom" (nf XML.parseXMLDoc input)
#ifdef LIBXML2
++ runBench "libxml2-dom" (whnfIO (Libxml2.parseMemory input))
#endif
-- runBench "hexml-dom" (whnf Hexml.parse input)
-- ++ runBench "xeno-sax" (whnf Xeno.SAX.validate input)
-- ++ runBench "xeno-sax-z" (whnf Xeno.SAX.validate inputz)
-- ++ runBench "xeno-sax-ex " (whnf Xeno.SAX.validateEx input)
-- ++ runBench "xeno-sax-ex-z" (whnf Xeno.SAX.validateEx inputz)
-- ++
runBench "xeno-dom" (whnf Xeno.DOM.parse input)
-- ++ runBench "xeno-dom-with-recovery" (whnf Xeno.DOM.Robust.parse input)
-- ++ runBench
-- "hexpat-sax"
-- (whnf
-- ((Hexpat.parseThrowing Hexpat.defaultParseOptions :: L.ByteString -> [Hexpat.SAXEvent ByteString ByteString]) .
-- L.fromStrict)
-- input)
-- ++ runBench
-- "hexpat-dom"
-- (whnf
-- ((HexpatTree.parse' HexpatTree.defaultParseOptions :: ByteString -> Either HexpatTree.XMLParseError (HexpatTree.Node ByteString ByteString)))
-- input)
-- ++ runBench "xml-dom" (nf XML.parseXMLDoc input)
-- #ifdef LIBXML2
-- ++ runBench "libxml2-dom" (whnfIO (Libxml2.parseMemory input))
-- #endif
where
runBench name act
| name `elem` enabledTests = [bench name act]
Expand All @@ -107,16 +110,3 @@ readBZip2File fn = do
!bsz = Xeno.Types.BSZT $ bs `S.snoc` 0
return (bs, bsz)


deriving instance Generic Content
deriving instance Generic Element
deriving instance Generic CData
deriving instance Generic CDataKind
deriving instance Generic QName
deriving instance Generic Attr
instance NFData Content
instance NFData Element
instance NFData CData
instance NFData CDataKind
instance NFData QName
instance NFData Attr
Binary file added data/ex/longlines.xml.bz2
Binary file not shown.
3 changes: 3 additions & 0 deletions stack-lts-18.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
resolver: lts-18.19
packages:
- '.'

nix:
packages: [bzip2]
76 changes: 38 additions & 38 deletions xeno.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -46,55 +46,55 @@ library

default-language: Haskell2010

test-suite xeno-test
type: exitcode-stdio-1.0
hs-source-dirs: test
main-is: Main.hs
build-depends: base, xeno, hexml, hspec, bytestring
-- | DEBUG
, hspec
ghc-options: -Wall -threaded -rtsopts -with-rtsopts=-N
if flag(whitespace-around-equals)
cpp-options: -DWHITESPACE_AROUND_EQUALS
default-language: Haskell2010
-- test-suite xeno-test
-- type: exitcode-stdio-1.0
-- hs-source-dirs: test
-- main-is: Main.hs
-- build-depends: base, xeno, hexml, hspec, bytestring
-- -- | DEBUG
-- , hspec
-- ghc-options: -Wall -threaded -rtsopts -with-rtsopts=-N
-- if flag(whitespace-around-equals)
-- cpp-options: -DWHITESPACE_AROUND_EQUALS
-- default-language: Haskell2010

benchmark xeno-speed-bench
type: exitcode-stdio-1.0
hs-source-dirs: bench
main-is: Speed.hs
build-depends: base, xeno, hexml, criterion, bytestring, deepseq, ghc-prim, xml, hexpat
if flag(libxml2)
build-depends: libxml
ghc-options: -Wall -rtsopts -O2
if flag(libxml2)
cpp-options: -DLIBXML2
-- ghc-options: -DLIBXML2 -- Hackage started complaining about this
default-language: Haskell2010
-- benchmark xeno-speed-bench
-- type: exitcode-stdio-1.0
-- hs-source-dirs: bench
-- main-is: Speed.hs
-- build-depends: base, xeno, hexml, criterion, bytestring, deepseq, ghc-prim, xml, hexpat
-- if flag(libxml2)
-- build-depends: libxml
-- ghc-options: -Wall -rtsopts -O2
-- if flag(libxml2)
-- cpp-options: -DLIBXML2
-- -- ghc-options: -DLIBXML2 -- Hackage started complaining about this
-- default-language: Haskell2010

benchmark xeno-memory-bench
type: exitcode-stdio-1.0
hs-source-dirs: bench
main-is: Memory.hs
build-depends: base, xeno, weigh, bytestring, deepseq, hexml
ghc-options: -Wall -threaded -O2 -rtsopts -with-rtsopts=-N
default-language: Haskell2010
-- benchmark xeno-memory-bench
-- type: exitcode-stdio-1.0
-- hs-source-dirs: bench
-- main-is: Memory.hs
-- build-depends: base, xeno, weigh, bytestring, deepseq, hexml
-- ghc-options: -Wall -threaded -O2 -rtsopts -with-rtsopts=-N
-- default-language: Haskell2010

benchmark xeno-speed-big-files-bench
type: exitcode-stdio-1.0
hs-source-dirs: bench
main-is: SpeedBigFiles.hs
build-depends: base, xeno, hexml, criterion, bytestring, deepseq, ghc-prim, xml, hexpat, bzlib, filepath
build-depends: base, xeno, criterion, bytestring, deepseq, ghc-prim, bzlib, filepath
if flag(libxml2)
build-depends: libxml
ghc-options: -Wall -O2 -rtsopts "-with-rtsopts=-H8G -AL1G -A256m -M25G"
if flag(libxml2)
cpp-options: -DLIBXML2
default-language: Haskell2010

benchmark xeno-bench
type: exitcode-stdio-1.0
main-is: Bench.hs
hs-source-dirs: app
build-depends: base, xeno, weigh, bytestring, deepseq, hexml, bytestring-mmap, time
ghc-options: -O2 -threaded -rtsopts "-with-rtsopts=-N"
default-language: Haskell2010
-- benchmark xeno-bench
-- type: exitcode-stdio-1.0
-- main-is: Bench.hs
-- hs-source-dirs: app
-- build-depends: base, xeno, weigh, bytestring, deepseq, hexml, bytestring-mmap, time
-- ghc-options: -O2 -threaded -rtsopts "-with-rtsopts=-N"
-- default-language: Haskell2010

0 comments on commit ebf9062

Please sign in to comment.