diff --git a/.github/workflows/haskell.yml b/.github/workflows/haskell.yml index f5aca1c..877cbb6 100644 --- a/.github/workflows/haskell.yml +++ b/.github/workflows/haskell.yml @@ -67,6 +67,7 @@ jobs: CABAL_CHECK_RELAX: y CABAL_HACKAGE_MIRROR: "hackage.haskell.org:http://hackage.fpcomplete.com" CABAL_PROJECT: ${{ matrix.cabal_project }} + CABAL_BUILD_OPTIONS: ${{ matrix.cabal_build_options }} DISABLE_SDIST_PROJECT_CHECK: y # ------------------------------------------------------------------------ @@ -97,12 +98,6 @@ jobs: strategy: fail-fast: false matrix: - # The reason we have name here is to force an additional config instead - # of adding to an existing config while adding additional configs. - # Look at - # for more info about adding matrix elements. - # Adding any element to the list will increase the number of matrix - # elemets proportional to the cross product. include: - name: 9.2.1-linux command: cabal-v2 @@ -114,19 +109,26 @@ jobs: command: cabal-v2 runner: ubuntu-latest ghc_version: 9.0.1 - cabal_project: cabal.project + cabal_project: cabal.project.user - name: 8.10.7-linux command: cabal-v2 runner: ubuntu-latest ghc_version: 8.10.7 - cabal_project: cabal.project + cabal_project: cabal.project.user + + - name: 8.10.7-linux-fusion-plugin + command: cabal-v2 + runner: ubuntu-latest + ghc_version: 8.10.7 + cabal_project: cabal.project.user + cabal_build_options: "--flag fusion-plugin" - name: 9.2.1-macos ghc_version: 9.2.1 command: cabal-v2 runner: macos-latest - cabal_project: cabal.project + cabal_project: cabal.project.user steps: - uses: actions/checkout@v2 diff --git a/.packcheck.ignore b/.packcheck.ignore index 14759db..366286e 100644 --- a/.packcheck.ignore +++ b/.packcheck.ignore @@ -3,5 +3,6 @@ .gitignore appveyor.yml cabal.project +cabal.project.user cabal.project.Werror default.nix diff --git a/benchmark/Main.hs b/benchmark/Main.hs index 99ccfd4..e8d9112 100644 --- a/benchmark/Main.hs +++ b/benchmark/Main.hs @@ -1,22 +1,51 @@ {-# LANGUAGE TupleSections #-} -import Gauge +import Control.DeepSeq (NFData) import Streamly.Data.Fold (Fold) +import Streamly.Prelude (SerialT) import System.Random (randomRIO) +import qualified Streamly.Data.Fold as Fold import qualified Streamly.Internal.Data.Ring.Foreign as Ring import qualified Streamly.Prelude as Stream import qualified Streamly.Statistics as Statistics +import Gauge + {-# INLINE source #-} -source :: (Monad m, Stream.IsStream t) => Int -> Int -> t m Double +source :: (Monad m, Stream.IsStream t, Num a, Stream.Enumerable a) => + Int -> a -> t m a source len from = - Stream.enumerateFromTo (fromIntegral from) (fromIntegral (from + len)) + Stream.enumerateFromThenTo from (from + 1) (from + fromIntegral len) + +{-# INLINE sourceDescending #-} +sourceDescending :: (Monad m, Stream.IsStream t, Num a, Stream.Enumerable a) => + Int -> a -> t m a +sourceDescending len from = + Stream.enumerateFromThenTo + (from + fromIntegral len) + (from + fromIntegral (len - 1)) + from + +{-# INLINE sourceDescendingInt #-} +sourceDescendingInt :: (Monad m, Stream.IsStream t) => Int -> Int -> t m Int +sourceDescendingInt = sourceDescending + +{-# INLINE benchWith #-} +benchWith :: (Num a, NFData a) => + (Int -> a -> SerialT IO a) -> Int -> String -> Fold IO a a -> Benchmark +benchWith src len name f = + bench name + $ nfIO + $ randomRIO (1, 1 :: Int) >>= Stream.fold f . src len . fromIntegral {-# INLINE benchWithFold #-} benchWithFold :: Int -> String -> Fold IO Double Double -> Benchmark -benchWithFold len name f = - bench name $ nfIO $ randomRIO (1, 1) >>= Stream.fold f . source len +benchWithFold len name f = benchWith source len name f + +{-# INLINE benchWithFoldInt #-} +benchWithFoldInt :: Int -> String -> Fold IO Int Int -> Benchmark +benchWithFoldInt len name f = benchWith source len name f {-# INLINE benchWithPostscan #-} benchWithPostscan :: Int -> String -> Fold IO Double Double -> Benchmark @@ -37,22 +66,46 @@ main = (Ring.slidingWindow 100 Statistics.min) , benchWithFold numElements "min (window size 1000)" (Ring.slidingWindow 1000 Statistics.min) + , benchWith sourceDescendingInt numElements + "min descending (window size 1000)" + (Ring.slidingWindow 1000 Statistics.min) + , benchWithFold numElements "max (window size 100)" (Ring.slidingWindow 100 Statistics.max) , benchWithFold numElements "max (window size 1000)" (Ring.slidingWindow 1000 Statistics.max) + , benchWith sourceDescendingInt numElements + "max descending (window size 1000)" + (Ring.slidingWindow 1000 Statistics.max) + , benchWithFold numElements "range (window size 100)" (Ring.slidingWindow 100 Statistics.range) , benchWithFold numElements "range (window size 1000)" (Ring.slidingWindow 1000 Statistics.range) + + , benchWithFoldInt numElements "sumInt (window size 100)" + (Ring.slidingWindow 100 Statistics.sumInt) + , benchWithFoldInt numElements "sum for Int (window size 100)" + (Ring.slidingWindow 100 Statistics.sum) + , benchWithFold numElements "sum (window size 100)" (Ring.slidingWindow 100 Statistics.sum) - , benchWithFold numElements "sum (window size 1000" + , benchWithFold numElements "sum (window size 1000)" (Ring.slidingWindow 1000 Statistics.sum) + , benchWithFold numElements "sum (entire stream)" + (Statistics.noSlide Statistics.sum) + , benchWithFold numElements "sum (Data.Fold)" + (Fold.sum) + , benchWithFold numElements "mean (window size 100)" (Ring.slidingWindow 100 Statistics.mean) , benchWithFold numElements "mean (window size 1000)" (Ring.slidingWindow 1000 Statistics.mean) + , benchWithFold numElements "mean (entire stream)" + (Statistics.noSlide Statistics.mean) + , benchWithFold numElements "mean (Data.Fold)" + (Fold.mean) + , benchWithFold numElements "welfordMean (window size 100)" @@ -61,6 +114,107 @@ main = numElements "welfordMean (window size 1000)" (Ring.slidingWindow 1000 Statistics.welfordMean) + , benchWithFold + numElements + "welfordMean (entire stream)" + (Statistics.noSlide Statistics.welfordMean) + + , benchWithFold numElements "geometricMean (window size 100)" + (Ring.slidingWindow 100 Statistics.geometricMean) + , benchWithFold numElements "geometricMean (window size 1000)" + (Ring.slidingWindow 1000 Statistics.geometricMean) + , benchWithFold numElements "geometricMean (entire stream)" + (Statistics.noSlide Statistics.geometricMean) + + , benchWithFold numElements "harmonicMean (window size 100)" + (Ring.slidingWindow 100 Statistics.harmonicMean) + , benchWithFold numElements "harmonicMean (window size 1000)" + (Ring.slidingWindow 1000 Statistics.harmonicMean) + , benchWithFold numElements "harmonicMean (entire stream)" + (Statistics.noSlide Statistics.harmonicMean) + + , benchWithFold numElements "quadraticMean (window size 100)" + (Ring.slidingWindow 100 Statistics.quadraticMean) + , benchWithFold numElements "quadraticMean (window size 1000)" + (Ring.slidingWindow 1000 Statistics.quadraticMean) + , benchWithFold numElements "quadraticMean (entire stream)" + (Statistics.noSlide Statistics.quadraticMean) + + , benchWithFold numElements "powerSum 2 (window size 100)" + (Ring.slidingWindow 100 (Statistics.powerSum 2)) + , benchWithFold numElements "powerSum 2 (entire stream)" + (Statistics.noSlide (Statistics.powerSum 2)) + + , benchWithFold numElements "rawMoment 2 (window size 100)" + (Ring.slidingWindow 100 (Statistics.powerSum 2)) + , benchWithFold numElements "rawMoment 2 (entire stream)" + (Statistics.noSlide (Statistics.rawMoment 2)) + + , benchWithFold numElements "powerMean 1 (window size 100)" + (Ring.slidingWindow 100 (Statistics.powerMean 1)) + , benchWithFold numElements "powerMean 2 (window size 100)" + (Ring.slidingWindow 100 (Statistics.powerMean 2)) + , benchWithFold numElements "powerMean 10 (window size 100)" + (Ring.slidingWindow 100 (Statistics.powerMean 10)) + + , benchWithFold numElements "powerMeanFrac (-1) (window size 100)" + (Ring.slidingWindow 100 (Statistics.powerMeanFrac (-1))) + , benchWithFold numElements "powerMeanFrac 1 (window size 100)" + (Ring.slidingWindow 100 (Statistics.powerMeanFrac 1)) + , benchWithFold numElements "powerMeanFrac 2 (window size 100)" + (Ring.slidingWindow 100 (Statistics.powerMeanFrac 2)) + , benchWithFold numElements "powerMeanFrac 10 (window size 100)" + (Ring.slidingWindow 100 (Statistics.powerMeanFrac 10)) + + , benchWithFold numElements "ewma (entire stream)" + (Statistics.ewma 0.5) + , benchWithFold numElements "ewmaAfterMean (entire stream)" + (Statistics.ewmaAfterMean 10 0.5) + , benchWithFold numElements "ewmaRampUpSmoothing (entire stream)" + (Statistics.ewmaRampUpSmoothing 0.5 0.5) + + , benchWithFold numElements "variance (window size 100)" + (Ring.slidingWindow 100 (Statistics.variance)) + , benchWithFold numElements "variance (entire stream)" + (Statistics.noSlide (Statistics.variance)) + , benchWithFold numElements "variance (Data.Fold)" + (Fold.variance) + + , benchWithFold numElements "sampleVariance (window size 100)" + (Ring.slidingWindow 100 (Statistics.sampleVariance)) + , benchWithFold numElements "sampleVariance (entire stream)" + (Statistics.noSlide (Statistics.sampleVariance)) + + , benchWithFold numElements "stdDev (window size 100)" + (Ring.slidingWindow 100 (Statistics.stdDev)) + , benchWithFold numElements "stdDev (entire stream)" + (Statistics.noSlide (Statistics.stdDev)) + , benchWithFold numElements "stdDev (Data.Fold)" + (Fold.stdDev) + + , benchWithFold numElements "sampleStdDev (window size 100)" + (Ring.slidingWindow 100 (Statistics.sampleStdDev)) + , benchWithFold numElements "sampleStdDev (entire stream)" + (Statistics.noSlide (Statistics.sampleStdDev)) + + , benchWithFold numElements "stdErrMean (window size 100)" + (Ring.slidingWindow 100 (Statistics.stdErrMean)) + , benchWithFold numElements "stdErrMean (entire stream)" + (Statistics.noSlide (Statistics.stdErrMean)) + +-- These benchmarks take a lot of time/memory with fusion-plugin possibly +-- because of the use of Tee. +#ifndef FUSION_PLUGIN + , benchWithFold numElements "skewness (window size 100)" + (Ring.slidingWindow 100 (Statistics.skewness)) + , benchWithFold numElements "skewness (entire stream)" + (Statistics.noSlide (Statistics.skewness)) + + , benchWithFold numElements "kurtosis (window size 100)" + (Ring.slidingWindow 100 (Statistics.kurtosis)) + , benchWithFold numElements "kurtosis (entire stream)" + (Statistics.noSlide (Statistics.kurtosis)) +#endif ] , bgroup "scan" diff --git a/cabal.project b/cabal.project index 502095d..0a28ed2 100644 --- a/cabal.project +++ b/cabal.project @@ -1,8 +1,3 @@ packages: streamly-statistics.cabal -source-repository-package - type: git - location: https://github.com/composewell/streamly.git - tag: master - jobs: 1 diff --git a/cabal.project.user b/cabal.project.user new file mode 100644 index 0000000..f506ba8 --- /dev/null +++ b/cabal.project.user @@ -0,0 +1,8 @@ +packages: streamly-statistics.cabal + +source-repository-package + type: git + location: https://github.com/composewell/streamly.git + tag: master + +jobs: 1 diff --git a/default.nix b/default.nix index b2ae182..786b371 100644 --- a/default.nix +++ b/default.nix @@ -41,11 +41,10 @@ let haskellPackages = streamly = nixpkgs.haskell.lib.overrideCabal - (super.callHackageDirect - { pkg = "streamly"; - ver = "0.8.1"; - sha256 = "0ywyy7gxjnp32hx8kki0lfn94bnc9mzjh8g6mg65ff3vv28k2vdr"; - } {}) + (let src = fetchGit { + url = "git@github.com:composewell/streamly.git"; + rev = "8df3dc4c70f3f4a2daa3b5cada7390e0c44e8394"; + }; in super.callCabal2nix "streamly" src {}) (old: { librarySystemDepends = if builtins.currentSystem == "x86_64-darwin" diff --git a/src/Streamly/Statistics.hs b/src/Streamly/Statistics.hs index 14b55f7..888343b 100644 --- a/src/Streamly/Statistics.hs +++ b/src/Streamly/Statistics.hs @@ -1,37 +1,173 @@ -{-# LANGUAGE ScopedTypeVariables #-} +-- | +-- Module : Streamly.Statistics +-- Copyright : (c) 2020 Composewell Technologies +-- License : Apache-2.0 +-- Maintainer : streamly@composewell.com +-- Stability : experimental +-- Portability : GHC +-- +-- Statistical measures over a stream of data. All operations use numerically +-- stable floating point arithmetic. +-- +-- Measurements can be performed over the entire input stream or on a sliding +-- window of fixed or variable size. Where possible, measures are computed +-- online without buffering the input stream. +-- +-- Currently there is no overflow detection. +-- +-- References: +-- +-- * https://en.wikipedia.org/wiki/Statistics +-- * https://mathworld.wolfram.com/topics/ProbabilityandStatistics.html +-- Resources: +-- +-- This may be another useful resource for incremental (non-windowed) +-- computation: +-- +-- https://www.researchgate.net/publication/287152055_Incremental_Statistical_Measures +-- +-- Sample Statistics +-- +-- Terms +-- +-- Population: the complete data set from which statistical samples are taken. +-- +-- Sample: a subset of the population. +-- +-- https://en.wikipedia.org/wiki/Sample_(statistics) +-- +-- Estimator: +-- +-- Statistical measures can be computed either from the actual population +-- or from samples. Statistical measures computed from the samples provide an +-- estimate of the actual measures of the entire population. Measures computed +-- from samples may not truly reflect the actual measures and may have to be +-- adjusted for biases or errors. +-- +-- An "estimator" is a method or function to compute a statistical measure from +-- sampled data. For example, the sample variance is an esitmator of the +-- population variance. +-- +-- https://en.wikipedia.org/wiki/Estimator +-- +-- Bias: +-- +-- The result computed by an estimator may not be centered at the true value as +-- determined by computing the measure for the actual population. Such an +-- estimator is called a biased estimator. For example, notice how +-- 'sampleVariance' is adjusted for bias. +-- +-- https://en.wikipedia.org/wiki/Bias_of_an_estimator +-- +-- Consistency: +-- +-- https://en.wikipedia.org/wiki/Consistent_estimator + +{-# LANGUAGE ScopedTypeVariables #-} module Streamly.Statistics ( - -- * Descriptive functions - min + -- * Incremental Folds + -- | Folds of type @Fold m (a, Maybe a) b@ are incremental sliding window + -- folds. An input of type @(a, Nothing)@ indicates that the input element + -- @a@ is being inserted in the window without ejecting an old value + -- increasing the window size by 1. An input of type @(a, Just a)@ + -- indicates that the first element is being inserted in the window and the + -- second element is being removed from the window, the window size remains + -- the same. The window size can only increase and never decrease. + -- + -- You can compute the statistics over the entire stream using sliding + -- window folds by keeping the second element of the input tuple as + -- @Nothing@. + -- + lmap + , noSlide + + -- * Summary Statistics + -- | See https://en.wikipedia.org/wiki/Summary_statistics . + + -- ** Sums + , length + , sum + , sumInt + , powerSum + + -- ** Location + -- | See https://en.wikipedia.org/wiki/Location_parameter . + -- + -- See https://en.wikipedia.org/wiki/Central_tendency . + , min , max - , range + , rawMoment + , rawMomentFrac - -- * Statistics of a location - , kurtosis + -- Pythagorean means (https://en.wikipedia.org/wiki/Pythagorean_means) + , mean + , welfordMean + , weightedMean , geometricMean - , skewness + , harmonicMean + + , quadraticMean + + -- Generalized mean + , powerMean + , powerMeanFrac + + -- ** Weighted Means + -- | Exponential Smoothing. + , ewma + , ewmaAfterMean + , ewmaRampUpSmoothing + + -- ** Spread + -- | Second order central moment is a statistical measure of dispersion. + -- The \(k\)th moment about the mean (or \(k\)th central moment) is defined + -- as: + -- + -- \(\mu_k = \frac{1}{n}\sum_{i=1}^n {(x_{i}-\mu)}^k\) + -- + -- See https://mathworld.wolfram.com/CentralMoment.html . + -- + -- See https://en.wikipedia.org/wiki/Statistical_dispersion . + , range + , md + , variance , stdDev + + -- ** Shape + -- | Third and fourth order central moments are a measure of shape. + -- + -- See https://en.wikipedia.org/wiki/Shape_parameter . + -- + -- See https://en.wikipedia.org/wiki/Standardized_moment . + , skewness + , kurtosis + + -- XXX Move to Statistics.Sample or Statistics.Estimation module? + -- ** Estimation + , sampleVariance + , sampleStdDev , stdErrMean - , sum - , sumInt - , mean - , welfordMean ) where -import Control.Monad.IO.Class (MonadIO(..)) +import Control.Monad.IO.Class import Data.Bifunctor(bimap) import Data.Function ((&)) import Data.Maybe (fromMaybe) +import Foreign.Storable + import Streamly.Data.Fold.Tee(Tee(..), toFold) import Streamly.Internal.Data.Fold.Type (Fold(..), Step(..)) -import Streamly.Internal.Data.Tuple.Strict +import Streamly.Internal.Data.Ring.Foreign (slidingWindow) +import Streamly.Internal.Data.Tuple.Strict (Tuple'(..), Tuple3'(..)) import qualified Deque.Strict as DQ import qualified Streamly.Data.Fold as Fold -import Prelude hiding (sum, min, max) +import Prelude hiding (length, sum, min, max) + -- XXX Make the following more numerically stable. Try to extend welfordMean -- XXX method. @@ -43,12 +179,176 @@ import Prelude hiding (sum, min, max) -- XXX * https://www.johndcook.com/blog/skewness_kurtosis/ -- XXX * Art of Computer Programming, Volume 2: Seminumerical Algorithms -- XXX (3rd Edition), Page 232 +-- TODO: Overflow checks. Would be good if we can directly replace the +-- operations with overflow checked operations. +-- +-- See https://hackage.haskell.org/package/safe-numeric +-- See https://hackage.haskell.org/package/safeint +-- +-- TODO We have many of these functions in Streamly.Data.Fold as well. Need to +-- think about deduplication. + +------------------------------------------------------------------------------- +-- Utilities +------------------------------------------------------------------------------- + +-- XXX Move these and basic functions to Fold.Sliding in streamly. +-- +-- | Map a function on the incoming as well as outgoing element of a rolling +-- window fold. +-- +-- >>> lmap f = Fold.lmap (bimap f (f <$>)) +-- +{-# INLINE lmap #-} +lmap :: (c -> a) -> Fold m (a, Maybe a) b -> Fold m (c, Maybe c) b +lmap f = Fold.lmap (bimap f (f <$>)) + +-- | Convert a rolling fold to a normal fold using the entire input stream as a +-- single window. +-- +-- >>> noSlide f = Fold.lmap (, Nothing) f +-- +{-# INLINE noSlide #-} +noSlide :: Fold m (a, Maybe a) b -> Fold m a b +noSlide f = Fold.lmap (, Nothing) f + +------------------------------------------------------------------------------- +-- Sum +------------------------------------------------------------------------------- + +-- XXX Overflow. +-- +-- | The sum of all the elements in a rolling window. The input elements are +-- required to be intergal numbers. +-- +-- This was written in the hope that it would be a tiny bit faster than 'sum' +-- for 'Integral' values. But turns out that 'sum' is 2% faster than this even +-- for intergal values! +-- +-- /Internal/ +-- +{-# INLINE sumInt #-} +sumInt :: forall m a. (Monad m, Integral a) => Fold m (a, Maybe a) a +sumInt = Fold step initial extract + + where + + initial = return $ Partial (0 :: a) -data Tuple5' a b c d e = Tuple5' !a !b !c !d !e deriving Show + step s (a, ma) = + return + $ Partial + $ case ma of + Nothing -> s + a + Just old -> s + a - old + + extract = return --- | The minimum element in the window. +-- XXX Overflow. +-- +-- | Sum of all the elements in a rolling window: +-- +-- \(S = \sum_{i=1}^n x_{i}\) +-- +-- This is the first power sum. +-- +-- >>> sum = powerSum 1 +-- +-- Uses Kahan-Babuska-Neumaier style summation for numerical stability of +-- floating precision arithmetic. +-- +-- /Space/: \(\mathcal{O}(1)\) +-- +-- /Time/: \(\mathcal{O}(n)\) +-- +{-# INLINE sum #-} +sum :: forall m a. (Monad m, Num a) => Fold m (a, Maybe a) a +sum = Fold step initial extract + + where + + initial = + return + $ Partial + $ Tuple' + (0 :: a) -- running sum + (0 :: a) -- accumulated rounding error + + step (Tuple' total err) (new, mOld) = + let incr = + case mOld of + -- XXX new may be large and err may be small we may lose it + Nothing -> new - err + -- XXX if (new - old) is large we may lose err + Just old -> (new - old) - err + -- total is large and incr may be small, we may round incr here but + -- we will accumulate the rounding error in err1 in the next step. + total1 = total + incr + -- Accumulate any rounding error in err1 + -- XXX In the Nothing case above we may lose err, therefore we + -- should use ((total1 - total) - new) + err here. + -- Or even in the just case if (new - old) is large we may lose + -- err, so we should use ((total1 - total) + (old - new)) + err. + err1 = (total1 - total) - incr + in return $ Partial $ Tuple' total1 err1 + + extract (Tuple' total _) = return total + +-- | The number of elements in the rolling window. +-- +-- This is the \(0\)th power sum. +-- +-- >>> length = powerSum 0 +-- +{-# INLINE length #-} +length :: (Monad m, Num b) => Fold m (a, Maybe a) b +length = Fold.foldl' step 0 + + where + + step w (_, Nothing) = w + 1 + step w _ = w + +-- | Sum of the \(k\)th power of all the elements in a rolling window: +-- +-- \(S_k = \sum_{i=1}^n x_{i}^k\) +-- +-- >>> powerSum k = lmap (^ k) sum +-- +-- /Space/: \(\mathcal{O}(1)\) +-- +-- /Time/: \(\mathcal{O}(n)\) +{-# INLINE powerSum #-} +powerSum :: (Monad m, Num a) => Int -> Fold m (a, Maybe a) a +powerSum k = lmap (^ k) sum + +-- | Like 'powerSum' but powers can be negative or fractional. This is slower +-- than 'powerSum' for positive intergal powers. +-- +-- >>> powerSumFrac p = lmap (** p) sum +-- +{-# INLINE powerSumFrac #-} +powerSumFrac :: (Monad m, Floating a) => a -> Fold m (a, Maybe a) a +powerSumFrac p = lmap (** p) sum + +------------------------------------------------------------------------------- +-- Location +------------------------------------------------------------------------------- + +-- Theoretically, we can approximate minimum in a rolling window by using a +-- 'powerMean' with sufficiently large negative power. +-- +-- XXX If we need to know the minimum in the window only once in a while then +-- we can use linear search when it is extracted and not pay the cost all the +-- time. +-- +-- | The minimum element in a rolling window. +-- +-- If you want to compute the min of the entire stream Fold.min from streamly +-- package would be much faster. +-- +-- /Time/: \(\mathcal{O}(n*w)\) where \(w\) is the window size. -- --- /Time complexity/: @O(n^2)@ where @n@ is the window size. {-# INLINE min #-} min :: (Monad m, Ord a) => Fold m (a, Maybe a) a min = Fold step initial extract @@ -89,9 +389,16 @@ min = Fold step initial extract $ fromMaybe (0, error "min: Empty stream") $ DQ.head q --- | The maximum element in the window. +-- Theoretically, we can approximate maximum in a rolling window by using a +-- 'powerMean' with sufficiently large positive power. +-- +-- | The maximum element in a rolling window. +-- +-- If you want to compute the max of the entire stream Fold.max from streamly +-- package would be much faster. +-- +-- /Time/: \(\mathcal{O}(n*w)\) where \(w\) is the window size. -- --- /Time complexity/: @O(n^2)@ where @n@ is the window size. {-# INLINE max #-} max :: (Monad m, Ord a) => Fold m (a, Maybe a) a max = Fold step initial extract @@ -134,140 +441,440 @@ max = Fold step initial extract $ fromMaybe (0, error "max: Empty stream") $ DQ.head q --- | Range. The difference between the largest and smallest elements of a --- window. -{-# INLINE range #-} -range :: (Monad m, Num a, Ord a) => Fold m (a, Maybe a) a -range = Fold.teeWith (-) max min +-- | Arithmetic mean of elements in a sliding window: +-- +-- \(\mu = \frac{\sum_{i=1}^n x_{i}}{n}\) +-- +-- This is also known as the Simple Moving Average (SMA) when used in the +-- sliding window and Cumulative Moving Avergae (CMA) when used on the entire +-- stream. +-- +-- Mean is the same as the first raw moment. +-- +-- \(\mu = \mu'_1\) +-- +-- >>> mean = rawMoment 1 +-- >>> mean = powerMean 1 +-- >>> mean = Fold.teeWith (/) sum length +-- +-- /Space/: \(\mathcal{O}(1)\) +-- +-- /Time/: \(\mathcal{O}(n)\) +{-# INLINE mean #-} +mean :: forall m a. (Monad m, Fractional a) => Fold m (a, Maybe a) a +mean = Fold.teeWith (/) sum length --- | The sum of all the elements in the window. -{-# INLINE sumInt #-} -sumInt :: forall m a. (MonadIO m, Num a) => Fold m (a, Maybe a) a -sumInt = Fold step initial extract +-- | Same as 'mean' but uses Welford's algorithm to compute the mean +-- incrementally. +-- +-- It maintains a running mean instead of a running sum and adjusts the mean +-- based on a new value. This is slower than 'mean' because of using the +-- division operation on each step and it is numerically unstable (as of now). +-- The advantage over 'mean' could be no overflow if the numbers are large, +-- because we do not maintain a sum, but that is a highly unlikely corner case. +-- +-- /Internal/ +{-# INLINE welfordMean #-} +welfordMean :: forall m a. (Monad m, Fractional a) => Fold m (a, Maybe a) a +welfordMean = Fold step initial extract where - initial = return $ Partial (0 :: a) + initial = + return + $ Partial + $ Tuple' + (0 :: a) -- running mean + (0 :: Int) -- count of numbers in the window - step s (a, ma) = return $ Partial $ - case ma of - Nothing -> s + a - Just old -> s + a - old + step (Tuple' x w) (y, my) = + return + $ Partial + $ case my of + Nothing -> + let w1 = fromIntegral (w + 1) + in Tuple' (x + (y - x) / w1) (w + 1) + Just old -> + let w1 = fromIntegral w + -- XXX can we use x + (y - old) / w1 ? + -- XXX We can carry the rounding errors to provide + -- numerical stability like in 'mean'. + in Tuple' (x + (y - x) / w1 + (x - old) / w1) w - extract = return + extract (Tuple' x _) = return x --- | The sum of all the elements in the window. This uses Kahan-Babuska-Neumaier --- summation. -{-# INLINE sum #-} -sum :: forall m a. (MonadIO m, Num a) => Fold m (a, Maybe a) a -sum = Fold step initial extract +-- XXX We may have chances of overflow if the powers are high or the numbers +-- are large. A limited mitigation could be to use welford style avg +-- computation. Do we need an overflow detection? +-- +-- | Raw moment is the moment about 0. The \(k\)th raw moment is defined as: +-- +-- \(\mu'_k = \frac{\sum_{i=1}^n x_{i}^k}{n}\) +-- +-- >>> rawMoment k = Fold.teeWith (/) (powerSum p) length +-- +-- See https://en.wikipedia.org/wiki/Moment_(mathematics) . +-- +-- /Space/: \(\mathcal{O}(1)\) +-- +-- /Time/: \(\mathcal{O}(n)\) +{-# INLINE rawMoment #-} +rawMoment :: (Monad m, Fractional a) => Int -> Fold m (a, Maybe a) a +rawMoment k = Fold.teeWith (/) (powerSum k) length + +-- | Like 'rawMoment' but powers can be negative or fractional. This is +-- slower than 'rawMoment' for positive intergal powers. +-- +-- >>> rawMomentFrac p = Fold.teeWith (/) (powerSumFrac p) length +-- +{-# INLINE rawMomentFrac #-} +rawMomentFrac :: (Monad m, Floating a) => a -> Fold m (a, Maybe a) a +rawMomentFrac k = Fold.teeWith (/) (powerSumFrac k) length + +-- XXX Overflow can happen when large powers or large numbers are used. We can +-- keep a running mean instead of running sum but that won't mitigate the +-- overflow possibility by much. The overflow can still happen when computing +-- the mean incrementally. + +-- | The \(k\)th power mean of numbers \(x_1, x_2, \ldots, x_n\) is: +-- +-- \(M_k = \left( \frac{1}{n} \sum_{i=1}^n x_i^k \right)^{\frac{1}{k}}\) +-- +-- \(powerMean(k) = (rawMoment(k))^\frac{1}{k}\) +-- +-- >>> powerMean k = (** (1 / fromIntegral k)) <$> rawMoment k +-- +-- All other means can be expressed in terms of power mean. It is also known as +-- the generalized mean. +-- +-- See https://en.wikipedia.org/wiki/Generalized_mean +-- +{-# INLINE powerMean #-} +powerMean :: (Monad m, Floating a) => Int -> Fold m (a, Maybe a) a +powerMean k = (** (1 / fromIntegral k)) <$> rawMoment k + +-- | Like 'powerMean' but powers can be negative or fractional. This is +-- slower than 'powerMean' for positive intergal powers. +-- +-- >>> powerMeanFrac k = (** (1 / k)) <$> rawMomentFrac k +-- +{-# INLINE powerMeanFrac #-} +powerMeanFrac :: (Monad m, Floating a) => a -> Fold m (a, Maybe a) a +powerMeanFrac k = (** (1 / k)) <$> rawMomentFrac k + +-- | The harmonic mean of the positive numbers \(x_1, x_2, \ldots, x_n\) is +-- defined as: +-- +-- \(HM = \frac{n}{\frac1{x_1} + \frac1{x_2} + \cdots + \frac1{x_n}}\) +-- +-- \(HM = \left(\frac{\sum\limits_{i=1}^n x_i^{-1}}{n}\right)^{-1}\) +-- +-- >>> harmonicMean = Fold.teeWith (/) length (lmap recip sum) +-- >>> harmonicMean = powerMeanFrac (-1) +-- +-- See https://en.wikipedia.org/wiki/Harmonic_mean . +-- +{-# INLINE harmonicMean #-} +harmonicMean :: (Monad m, Fractional a) => Fold m (a, Maybe a) a +harmonicMean = Fold.teeWith (/) length (lmap recip sum) + +-- | The quadratic mean or root mean square (rms) of the numbers +-- \(x_1, x_2, \ldots, x_n\) is defined as: +-- +-- \(RMS = \sqrt{ \frac{1}{n} \left( x_1^2 + x_2^2 + \cdots + x_n^2 \right) }.\) +-- +-- >>> quadraticMean = powerMean 2 +-- +-- See https://en.wikipedia.org/wiki/Root_mean_square . +-- +{-# INLINE quadraticMean #-} +quadraticMean :: (Monad m, Floating a) => Fold m (a, Maybe a) a +quadraticMean = powerMean 2 + +------------------------------------------------------------------------------- +-- Weighted Means +------------------------------------------------------------------------------- + +-- XXX Compute this in a sliding window? +-- +-- | @ewma smoothingFactor@. +-- +-- @ewma@ of an empty stream is 0. +-- +-- Exponential weighted moving average, \(s_n\), of \(n\) values, +-- \(x_1,\ldots,x_n\), is defined recursively as: +-- +-- \(\begin{align} s_0& = x_0\\ s_n & = \alpha x_{n} + (1-\alpha)s_{n-1},\quad n>0 \end{align}\) +-- +-- If we expand the recursive term it becomes an exponential series: +-- +-- \(s_n = \alpha \left[x_n + (1-\alpha)x_{n-1} + (1-\alpha)^2 x_{n-2} + \cdots + (1-\alpha)^{n-1} x_1 \right] + (1-\alpha)^n x_0\) +-- +-- where \(\alpha\), the smoothing factor, is in the range \(0 <\alpha < 1\). +-- More the value of \(\alpha\), the more weight is given to newer values. As +-- a special case if it is 0 then the weighted sum would always be the same as +-- the oldest value, if it is 1 then the sum would always be the same as the +-- newest value. +-- +-- See https://en.wikipedia.org/wiki/Moving_average +-- +-- See https://en.wikipedia.org/wiki/Exponential_smoothing +-- +{-# INLINE ewma #-} +ewma :: Monad m => Double -> Fold m Double Double +ewma k = extract <$> Fold.foldl' step (Tuple' 0 1) where - initial = return $ Partial $ Tuple' (0 :: a) (0 :: a) + step (Tuple' x0 k1) x = Tuple' (ewmaStep k1 x0 x) k - step (Tuple' s c) (a, ma) = - let y = - case ma of - Nothing -> a - c - Just old -> a - old - c - t = s + y - c1 = (t - s) - y - in return $ Partial $ Tuple' t c1 + extract (Tuple' x _) = x - extract (Tuple' s _) = return s +-- XXX It can perhaps perform better if implemented as a custom fold? +-- +-- | @ewma n k@ is like 'ewma' but uses the mean of the first @n@ values and +-- then uses that as the initial value for the @ewma@ of the rest of the +-- values. +-- +-- This can be used to reduce the effect of volatility of the initial value +-- when k is too small. +-- +{-# INLINE ewmaAfterMean #-} +ewmaAfterMean :: Monad m => Int -> Double -> Fold m Double Double +ewmaAfterMean n k = + Fold.concatMap (\i -> (Fold.foldl' (ewmaStep k) i)) (Fold.take n Fold.mean) --- | Window Size. It computes the size of the sliding window. -{-# INLINE windowSize #-} -windowSize :: MonadIO m => Fold m (a, Maybe a) Int -windowSize = Fold.foldl' step initial +-- | @ewma n k@ is like 'ewma' but uses 1 as the initial smoothing factor and +-- then exponentially smooths it to @k@ using @n@ as the smoothing factor. +-- +-- This is significantly faster than 'ewmaAfterMean'. +-- +{-# INLINE ewmaRampUpSmoothing #-} +ewmaRampUpSmoothing :: Monad m => Double -> Double -> Fold m Double Double +ewmaRampUpSmoothing n k1 = extract <$> Fold.foldl' step initial where - initial = 0 :: Int + initial = Tuple' 0 1 - step w (_, ma) = - case ma of - Nothing -> w + 1 - _ -> w + step (Tuple' x0 k0) x1 = + let x = ewmaStep k0 x0 x1 + k = ewmaStep n k0 k1 + in Tuple' x k --- | Arithmetic mean. This uses Kahan-Babuska-Neumaier --- summation, so is more accurate than 'welfordMean' unless the input --- values are very large. -{-# INLINE mean #-} -mean :: forall m a. (MonadIO m, Fractional a) => Fold m (a, Maybe a) a -mean = Fold.teeWith (/) sum (fromIntegral <$> windowSize) + extract (Tuple' x _) = x -{-# INLINE powerSum #-} -powerSum :: forall m a. (MonadIO m, Num a) => Int -> Fold m (a, Maybe a) a -powerSum i = Fold.lmap (\(a, ma) -> (a ^ i, (^i) <$> ma)) sum +------------------------------------------------------------------------------- +-- Spread/Dispersion +------------------------------------------------------------------------------- -{-# INLINE powerSumAvg #-} -powerSumAvg :: forall m a. (MonadIO m, Fractional a) - => Int -> Fold m (a, Maybe a) a -powerSumAvg i = Fold.teeWith (/) (powerSum i) (fmap fromIntegral windowSize) +-- | The difference between the max and min elements of a rolling window. +-- +-- >>> range = Fold.teeWith (-) max min +-- +-- If you want to compute the range of the entire stream @Fold.teeWith (-) +-- Fold.max Fold.min@ from the streamly package would be much faster. +-- +-- /Space/: \(\mathcal{O}(n)\) where @n@ is the window size. +-- +-- /Time/: \(\mathcal{O}(n*w)\) where \(w\) is the window size. +-- +{-# INLINE range #-} +range :: (Monad m, Num a, Ord a) => Fold m (a, Maybe a) a +range = Fold.teeWith (-) max min +-- | @md n@ computes the mean absolute deviation (or mean deviation) in a +-- sliding window of last @n@ elements in the stream. +-- +-- The mean absolute deviation of the numbers \(x_1, x_2, \ldots, x_n\) is: +-- +-- \(MD = \frac{1}{n}\sum_{i=1}^n |x_i-\mu|\) +-- +-- Note: It is expensive to compute MD in a sliding window. We need to +-- maintain a ring buffer of last n elements and maintain a running mean, when +-- the result is extracted we need to compute the difference of all elements +-- from the mean and get the average. Using standard deviation may be +-- computationally cheaper. +-- +-- See https://en.wikipedia.org/wiki/Average_absolute_deviation . +-- +-- /Unimplemented/ +md :: Int -> Fold m Double Double +md = undefined + +-- | The variance \(\sigma^2\) of a population of \(n\) equally likely values +-- is defined as the average of the squares of deviations from the mean +-- \(\mu\). In other words, second moment about the mean: +-- +-- \(\sigma^2 = \frac{1}{n}\sum_{i=1}^n {(x_{i}-\mu)}^2\) +-- +-- \(\sigma^2 = rawMoment(2) - \mu^2\) +-- +-- \(\mu_2 = -(\mu'_1)^2 + \mu'_2\) +-- +-- Note that the variance would be biased if applied to estimate the population +-- variance from a sample of the population. See 'sampleVariance'. +-- +-- See https://en.wikipedia.org/wiki/Variance. +-- +-- /Space/: \(\mathcal{O}(1)\) +-- +-- /Time/: \(\mathcal{O}(n)\) {-# INLINE variance #-} -variance :: forall m a. (MonadIO m, Fractional a) => Fold m (a, Maybe a) a -variance = Fold.teeWith (\p2 m -> p2 - m ^ (2 :: Integer)) (powerSumAvg 2) mean +variance :: (Monad m, Fractional a) => Fold m (a, Maybe a) a +variance = Fold.teeWith (\p2 m -> p2 - m ^ (2 :: Int)) (rawMoment 2) mean +-- | Standard deviation \(\sigma\) is the square root of 'variance'. +-- +-- This is the population standard deviation or uncorrected sample standard +-- deviation. +-- +-- >>> stdDev = sqrt <$> variance +-- +-- See https://en.wikipedia.org/wiki/Standard_deviation . +-- +-- /Space/: \(\mathcal{O}(1)\) +-- +-- /Time/: \(\mathcal{O}(n)\) {-# INLINE stdDev #-} -stdDev :: forall m a. (MonadIO m, Floating a) => Fold m (a, Maybe a) a +stdDev :: (Monad m, Floating a) => Fold m (a, Maybe a) a stdDev = sqrt <$> variance -{-# INLINE stdErrMean #-} -stdErrMean :: forall m a. (MonadIO m, Floating a) => Fold m (a, Maybe a) a -stdErrMean = - Fold.teeWith - (\sd n -> sd / sqrt n) - stdDev - (fmap fromIntegral windowSize) - +-- | Skewness \(\gamma\) is the standardized third central moment defined as: +-- +-- \(\tilde{\mu}_3 = \frac{\mu_3}{\sigma^3}\) +-- +-- The third central moment can be computed in terms of raw moments: +-- +-- \(\mu_3 = 2(\mu'_1)^3 - 3\mu'_1\mu'_2 + \mu'_3\) +-- +-- Substituting \(\mu'_1 = \mu\), and \(\mu'_2 = \mu^2 + \sigma^2\): +-- +-- \(\mu_3 = -\mu^3 - 3\mu\sigma^2 + \mu'_3\) +-- +-- Skewness is a measure of symmetry of the probability distribution. It is 0 +-- for a symmetric distribution, negative for a distribution that is skewed +-- towards left, positive for a distribution skewed towards right. +-- +-- For a normal like distribution the median can be found around +-- \(\mu - \frac{\gamma\sigma}{6}\) and the mode can be found around +-- \(\mu - \frac{\gamma \sigma}{2}\). +-- +-- See https://en.wikipedia.org/wiki/Skewness . +-- {-# INLINE skewness #-} -skewness :: forall m a. (MonadIO m, Floating a) => Fold m (a, Maybe a) a +skewness :: (Monad m, Floating a) => Fold m (a, Maybe a) a skewness = - toFold $ (\p3 sd m -> p3 / sd ^ (3 :: Int) - 3 * (m / sd) - - (m / sd) ^ (3 :: Int)) - <$> Tee (powerSumAvg 3) - <*> Tee stdDev - <*> Tee mean - + toFold + $ (\rm3 sd mu -> + rm3 / sd ^ (3 :: Int) - 3 * (mu / sd) - (mu / sd) ^ (3 :: Int) + ) + <$> Tee (rawMoment 3) + <*> Tee stdDev + <*> Tee mean + +-- XXX We can compute the 2nd, 3rd, 4th raw moments by repeatedly multiplying +-- instead of computing the powers every time. +-- +-- | Kurtosis \(\kappa\) is the standardized fourth central moment, defined as: +-- +-- \(\tilde{\mu}_4 = \frac{\mu_4}{\sigma^4}\) +-- +-- The fourth central moment can be computed in terms of raw moments: +-- +-- \(\mu_4 = -3(\mu'_1)^4 + 6(\mu'_1)^2\mu'_2 - 4\mu'_1\mu'_3\ + \mu'_4\) +-- +-- Substituting \(\mu'_1 = \mu\), and \(\mu'_2 = \mu^2 + \sigma^2\): +-- +-- \(\mu_4 = 3\mu^4 + 6\mu^2\sigma^2 - 4\mu\mu'_3 + \mu'_4\) +-- +-- It is always non-negative. It is 0 for a point distribution, low for light +-- tailed (platykurtic) distributions and high for heavy tailed (leptokurtic) +-- distributions. +-- +-- \(\kappa >= \gamma^2 + 1\) +-- +-- For a normal distribution \(\kappa = 3\sigma^4\). +-- +-- See https://en.wikipedia.org/wiki/Kurtosis . +-- +-- /Broken/ {-# INLINE kurtosis #-} -kurtosis :: forall m a. (MonadIO m, Floating a) => Fold m (a, Maybe a) a +kurtosis :: (Monad m, Floating a) => Fold m (a, Maybe a) a kurtosis = - toFold $ - (\p4 p3 sd m -> - p4 / sd ^ (4 :: Int) - 4 * ((m / sd) * (p3 / sd ^ (3 :: Int))) - - 3 * ((m / sd) ^ (4 :: Int))) - <$> Tee (powerSumAvg 4) - <*> Tee (powerSumAvg 3) - <*> Tee stdDev - <*> Tee mean - --- | Arithmetic mean. This uses Welford's algorithm to provide --- numerical stability, using a single pass over the sample data. --- --- Compared to 'mean', this loses a surprising amount of precision --- unless the inputs are very large. -{-# INLINE welfordMean #-} -welfordMean :: forall m a. (MonadIO m, Floating a) => Fold m (a, Maybe a) a -welfordMean = Fold step initial extract - - where - - initial = return $ Partial $ Tuple' (0 :: a) (0 :: Int) - - step (Tuple' x w) (y, my) = - return $ - case my of - Nothing -> Partial $ Tuple' (x + (y - x) / w') (w + 1) - where w' = fromIntegral (w + 1) - Just old -> Partial $ - Tuple' (x + (y - x) / w' + (x - old) / w') w - where w' = fromIntegral w + toFold + $ (\rm4 rm3 sd mu -> + rm4 / sd ^ (4 :: Int) + - 4 * ((mu / sd) * (rm3 / sd ^ (3 :: Int))) + - 3 * ((mu / sd) ^ (4 :: Int)) + ) + <$> Tee (rawMoment 4) + <*> Tee (rawMoment 3) + <*> Tee stdDev + <*> Tee mean + +------------------------------------------------------------------------------- +-- Estimation +------------------------------------------------------------------------------- + +-- | Geometric mean, defined as: +-- +-- \(GM = \sqrt[n]{x_1 x_2 \cdots x_n}\) +-- +-- \(GM = \left(\prod_{i=1}^n x_i\right)^\frac{1}{n}\) +-- +-- or, equivalently, as the arithmetic mean in log space: +-- +-- \(GM = e ^{{\frac{\sum_{i=1}^{n}\ln a_i}{n}}}\) +-- +-- >>> geometricMean = exp <$> lmap log mean +-- +-- See https://en.wikipedia.org/wiki/Geometric_mean . +{-# INLINE geometricMean #-} +geometricMean :: (Monad m, Floating a) => Fold m (a, Maybe a) a +geometricMean = exp <$> lmap log mean - extract (Tuple' x _) = return x +-- | @weightedMean@ computes the weighted mean of the sample data. +-- +-- The weights should add up to 1. It uses Kahan-Babuska-Neumaier summation. +-- +{-# INLINE weightedMean #-} +weightedMean :: forall m a. (MonadIO m, Fractional a, Storable a) + => Int -> Fold m (a, a) a +weightedMean n = Fold.lmap (uncurry (*)) (slidingWindow n sum) + +-- XXX Is this numerically stable? We can use the kbn summation here. +-- | ewmaStep smoothing-factor old-value new-value +{-# INLINE ewmaStep #-} +ewmaStep :: Double -> Double -> Double -> Double +ewmaStep k x0 x1 = (1 - k) * x0 + k * x1 + +{-# INLINE sampleVariance #-} +sampleVariance :: (Monad m, Fractional a) => Fold m (a, Maybe a) a +sampleVariance = Fold.teeWith (\n s2 -> n * s2 / (n - 1)) length variance + +-- | Sample standard deviation: +-- +-- \(s = \sqrt{sampleVariance}\) +-- +-- >>> sampleStdDev = sqrt <$> sampleVariance +-- +-- See https://en.wikipedia.org/wiki/Unbiased_estimation_of_standard_deviation +-- . +-- +{-# INLINE sampleStdDev #-} +sampleStdDev :: (Monad m, Floating a) => Fold m (a, Maybe a) a +sampleStdDev = sqrt <$> sampleVariance -{-# INLINE geometricMean #-} -geometricMean :: forall m a. (MonadIO m, Floating a) => Fold m (a, Maybe a) a -geometricMean = exp <$> Fold.lmap (bimap log (log <$>)) mean +-- | Standard error of the sample mean (SEM), defined as: +-- +-- \( SEM = \frac{sampleStdDev}{\sqrt{n}} \) +-- +-- See https://en.wikipedia.org/wiki/Standard_error . +-- +-- /Space/: \(\mathcal{O}(1)\) +-- +-- /Time/: \(\mathcal{O}(n)\) +{-# INLINE stdErrMean #-} +stdErrMean :: (Monad m, Floating a) => Fold m (a, Maybe a) a +stdErrMean = Fold.teeWith (\sd n -> sd / sqrt n) sampleStdDev length diff --git a/streamly-statistics.cabal b/streamly-statistics.cabal index 424a1e8..63be07d 100644 --- a/streamly-statistics.cabal +++ b/streamly-statistics.cabal @@ -32,6 +32,11 @@ flag use-gauge manual: True default: False +flag fusion-plugin + description: Use fusion plugin for for best performance + manual: True + default: False + common default-extensions default-extensions: BangPatterns @@ -84,6 +89,7 @@ common optimization-options -fdicts-strict -fspec-constr-recursive=16 -fmax-worker-args=16 + -fsimpl-tick-factor=200 common ghc-options import: default-extensions, compile-options, optimization-options @@ -110,6 +116,7 @@ test-suite test benchmark benchmark import: ghc-options + ghc-options: +RTS -M3G -RTS type: exitcode-stdio-1.0 hs-source-dirs: benchmark main-is: Main.hs @@ -117,6 +124,7 @@ benchmark benchmark , streamly == 0.8.1.1 , base >= 4.9 && < 5 , random >= 1.0.0 && < 2 + , deepseq >= 1.4.1 && < 1.5 if flag(use-gauge) build-depends: gauge >= 0.2.4 && < 0.3 @@ -127,3 +135,8 @@ benchmark benchmark (Test.Tasty.Bench as Gauge ,Test.Tasty.Bench as Gauge.Main ) + if flag(fusion-plugin) && !impl(ghcjs) && !impl(ghc < 8.6) + cpp-options: -DFUSION_PLUGIN + ghc-options: -fplugin Fusion.Plugin + build-depends: + fusion-plugin >= 0.2 && < 0.3