Implement double Parser (#2278)

rnjtranjan · harendra-kumar · adithyaov · web-flow · commit afcc8ea59fa8 · 2023-07-24T18:34:01.000+05:30
Co-authored-by: Harendra Kumar &lt;harendra@composewell.com&gt;
Co-authored-by: Adithya Kumar &lt;adithya@composewell.com&gt;
diff --git a/benchmark/Streamly/Benchmark/Unicode/Parser.hs b/benchmark/Streamly/Benchmark/Unicode/Parser.hs
@@ -0,0 +1,90 @@
+-- |
+-- Module      : Streamly.Benchmark.Data.Parser
+-- Copyright   : (c) 2020 Composewell Technologies
+--
+-- License     : BSD-3-Clause
+-- Maintainer  : streamly@composewell.com
+
+{-# LANGUAGE CPP #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# OPTIONS_GHC -Wno-orphans #-}
+
+module Main
+  (
+    main
+  ) where
+
+import Control.DeepSeq (NFData(..))
+import Control.Monad (replicateM_)
+import Streamly.Internal.Data.Parser (ParseError(..))
+import Streamly.Internal.Data.Stream (Stream)
+import Prelude hiding
+    (any, all, take, sequence, sequence_, sequenceA, takeWhile, dropWhile)
+
+import qualified Streamly.Internal.Data.Fold as Fold
+import qualified Streamly.Data.Stream as Stream
+import qualified Streamly.Data.Unfold as Unfold
+import qualified Streamly.Internal.Unicode.Parser as PRU
+
+import Gauge hiding (env)
+import Streamly.Benchmark.Common
+import Streamly.Benchmark.Common.Handle
+
+{-# INLINE sourceUnfoldrM #-}
+sourceUnfoldrM :: Monad m => Int -> Int -> Stream m Int
+sourceUnfoldrM value n = Stream.unfoldrM step n
+    where
+    step cnt =
+        if cnt > n + value
+        then return Nothing
+        else return (Just (cnt, cnt + 1))
+
+runParser :: Int -> (Stream IO Char -> IO a) -> IO ()
+runParser count p = do
+    let v = "+123456789.123456789e-123"
+    let s = Stream.unfold Unfold.fromList v
+    replicateM_ count (p s)
+
+-- | Takes a fold method, and uses it with a default source.
+{-# INLINE benchIOSink #-}
+benchIOSink :: Int -> String -> (Stream IO Char -> IO b) -> Benchmark
+benchIOSink value name f = bench name $ nfIO $ runParser value f
+
+{-# INLINE double #-}
+double :: Monad m => Stream m Char -> m (Either ParseError Double)
+double = Stream.parse PRU.double
+
+-------------------------------------------------------------------------------
+-- Benchmarks
+-------------------------------------------------------------------------------
+
+moduleName :: String
+moduleName = "Unicode.Parser"
+
+instance NFData ParseError where
+    {-# INLINE rnf #-}
+    rnf (ParseError x) = rnf x
+
+o_n_heap_serial :: Int -> [Benchmark]
+o_n_heap_serial value =
+    [
+      benchIOSink value "double" double
+    ]
+
+-------------------------------------------------------------------------------
+-- Driver
+-------------------------------------------------------------------------------
+
+main :: IO ()
+main = do
+    env <- mkHandleBenchEnv
+    runWithCLIOptsEnv defaultStreamSize alloc (allBenchmarks env)
+
+    where
+
+    alloc value = Stream.fold Fold.toList $ Stream.chunksOf 100 $ sourceUnfoldrM value 0
+
+    allBenchmarks _ _ value =
+        [ bgroup (o_n_heap_prefix moduleName) (o_n_heap_serial value)
+        ]
diff --git a/benchmark/streamly-benchmarks.cabal b/benchmark/streamly-benchmarks.cabal
@@ -741,3 +741,9 @@ benchmark Unicode.Utf8
     buildable: False
   else
     buildable: True
+
+benchmark Unicode.Parser
+  import: bench-options
+  type: exitcode-stdio-1.0
+  hs-source-dirs: Streamly/Benchmark/Unicode
+  main-is: Parser.hs
diff --git a/core/src/Streamly/Internal/Unicode/Parser.hs b/core/src/Streamly/Internal/Unicode/Parser.hs
@@ -47,6 +47,7 @@ module Streamly.Internal.Unicode.Parser
 
     -- * Numeric
     , signed
+    , number
     , double
     , decimal
     , hexadecimal
@@ -56,7 +57,9 @@ where
 import Control.Applicative (Alternative(..))
 import Data.Bits (Bits, (.|.), shiftL)
 import Data.Char (ord)
-import Streamly.Internal.Data.Parser (Parser)
+import Data.Ratio ((%))
+import Fusion.Plugin.Types (Fuse(..))
+import Streamly.Internal.Data.Parser (Parser(..), Initial(..),  Step(..))
 
 import qualified Data.Char as Char
 import qualified Streamly.Data.Fold as Fold
@@ -263,36 +266,193 @@ hexadecimal = Parser.takeWhile1 isHexDigit (Fold.foldl' step 0)
 signed :: (Num a, Monad m) => Parser Char m a -> Parser Char m a
 signed p = (negate <$> (char '-' *> p)) <|> (char '+' *> p) <|> p
 
--- | Parse a 'Double'.
+type Multiplier = Int
+
+-- XXX We can use Int instead of Integer to make it twice as fast. But then we
+-- will have to truncate the significant digits before overflow occurs.
+type Number = Integer
+type DecimalPlaces = Int
+type PowerMultiplier = Int
+type Power = Int
+
+{-# ANN type ScientificParseState Fuse #-}
+data ScientificParseState
+  = SPInitial
+  | SPSign !Multiplier
+  | SPAfterSign !Multiplier !Number
+  | SPDot !Multiplier !Number
+  | SPAfterDot !Multiplier !Number !DecimalPlaces
+  | SPExponent !Multiplier !Number !DecimalPlaces
+  | SPExponentWithSign !Multiplier !Number !DecimalPlaces !PowerMultiplier
+  | SPAfterExponent !Multiplier !Number !DecimalPlaces !PowerMultiplier !Power
+
+-- | A generic parser for scientific notation of numbers. Returns (mantissa,
+-- exponent) tuple. The result can be mapped to 'Double' or any other number
+-- representation e.g. @Scientific@.
 --
--- This parser accepts an optional leading sign character, followed by
--- at most one decimal digit.  The syntax is similar to that accepted by
--- the 'read' function, with the exception that a trailing @\'.\'@ is
--- consumed.
+{-# INLINE number #-}
+number :: Monad m => Parser Char m (Integer, Int)
+number =  Parser (\s a -> return $ step s a) initial (return . extract)
+
+    where
+
+    intToInteger :: Int -> Integer
+    intToInteger = fromIntegral
+
+    combineNum buf num = buf * 10 + num
+
+    {-# INLINE initial #-}
+    initial = pure $ IPartial SPInitial
+
+    exitSPInitial msg =
+        "number: expecting sign or decimal digit, got " ++ msg
+    exitSPSign msg =
+        "number: expecting decimal digit, got " ++ msg
+    exitSPAfterSign multiplier num = (intToInteger multiplier * num, 0)
+    exitSPAfterDot multiplier num decimalPlaces =
+        ( intToInteger multiplier * num
+        , -decimalPlaces
+        )
+    exitSPAfterExponent mult num decimalPlaces powerMult powerNum =
+        let e = powerMult * powerNum - decimalPlaces
+         in (intToInteger mult * num, e)
+
+    {-# INLINE step #-}
+    step SPInitial val =
+        case val of
+          '+' -> Continue 0 (SPSign 1)
+          '-' -> Continue 0 $ (SPSign (-1))
+          _ -> do
+              let num = ord val - 48
+              if num >= 0 && num <= 9
+              then Partial 0 $ SPAfterSign 1 (intToInteger num)
+              else Error $ exitSPInitial $ show val
+    step (SPSign multiplier) val =
+        let num = ord val - 48
+         in if num >= 0 && num <= 9
+            then Partial 0 $ SPAfterSign multiplier (intToInteger num)
+            else Error $ exitSPSign $ show val
+    step (SPAfterSign multiplier buf) val =
+        case val of
+            '.' -> Continue 0 $ SPDot multiplier buf
+            'e' -> Continue 0 $ SPExponent multiplier buf 0
+            'E' -> Continue 0 $ SPExponent multiplier buf 0
+            _ ->
+                let num = ord val - 48
+                 in if num >= 0 && num <= 9
+                    then
+                        Partial 0
+                            $ SPAfterSign multiplier (combineNum buf (intToInteger num))
+                    else Done 1 $ exitSPAfterSign multiplier buf
+    step (SPDot multiplier buf) val =
+        let num = ord val - 48
+         in if num >= 0 && num <= 9
+            then Partial 0 $ SPAfterDot multiplier (combineNum buf (intToInteger num)) 1
+            else Done 2 $ exitSPAfterSign multiplier buf
+    step (SPAfterDot multiplier buf decimalPlaces) val =
+        case val of
+            'e' -> Continue 0 $ SPExponent multiplier buf decimalPlaces
+            'E' -> Continue 0 $ SPExponent multiplier buf decimalPlaces
+            _ ->
+                let num = ord val - 48
+                 in if num >= 0 && num <= 9
+                    then
+                        Partial 0
+                            $ SPAfterDot
+                                  multiplier
+                                  (combineNum buf (intToInteger num))
+                                  (decimalPlaces + 1)
+                    else Done 1 $ exitSPAfterDot multiplier buf decimalPlaces
+    step (SPExponent multiplier buf decimalPlaces) val =
+        case val of
+          '+' -> Continue 0 (SPExponentWithSign multiplier buf decimalPlaces 1)
+          '-' -> Continue 0 (SPExponentWithSign multiplier buf decimalPlaces (-1))
+          _ -> do
+              let num = ord val - 48
+              if num >= 0 && num <= 9
+              then Partial 0 $ SPAfterExponent multiplier buf decimalPlaces 1 num
+              else Error $ exitSPInitial $ show val
+    step (SPExponentWithSign mult buf decimalPlaces powerMult) val =
+        let num = ord val - 48
+         in if num >= 0 && num <= 9
+            then Partial 0 $ SPAfterExponent mult buf decimalPlaces powerMult num
+            else Error $ exitSPSign $ show val
+    step (SPAfterExponent mult num decimalPlaces powerMult buf) val =
+        let n = ord val - 48
+         in if n >= 0 && n <= 9
+            then
+                Partial 0
+                    $ SPAfterExponent
+                          mult num decimalPlaces powerMult (combineNum buf n)
+            else
+                Done 1
+                    $ exitSPAfterExponent mult num decimalPlaces powerMult buf
+
+    {-# INLINE extract #-}
+    extract SPInitial = Error $ exitSPInitial "end of input"
+    extract (SPSign _) = Error $ exitSPSign "end of input"
+    extract (SPAfterSign mult num) = Done 0 $ exitSPAfterSign mult num
+    extract (SPDot mult num) = Done 1 $ exitSPAfterSign mult num
+    extract (SPAfterDot mult num decimalPlaces) =
+        Done 0 $ exitSPAfterDot mult num decimalPlaces
+    extract (SPExponent mult num decimalPlaces) =
+        Done 1 $ exitSPAfterDot mult num decimalPlaces
+    extract (SPExponentWithSign mult num decimalPlaces _) =
+        Done 2 $ exitSPAfterDot mult num decimalPlaces
+    extract (SPAfterExponent mult num decimalPlaces powerMult powerNum) =
+        Done 0 $ exitSPAfterExponent mult num decimalPlaces powerMult powerNum
+
+-- | Parse a decimal 'Double' value. This parser accepts an optional sign (+ or
+-- -) followed by at least one decimal digit. Decimal digits are optionally
+-- followed by a decimal point and at least one decimal digit after the point.
+-- This parser accepts the maximal valid input as long as it gives a valid
+-- number. Specifcally a trailing decimal point is allowed but not consumed.
+-- This function does not accept \"NaN\" or \"Infinity\" string representations
+-- of double values.
+--
+-- >>> import qualified Streamly.Data.Stream as Stream
+-- >>> import qualified Streamly.Unicode.Parser as Unicode
+--
+-- >>> p = Stream.parse Unicode.double . Stream.fromList
+--
+-- >>> p "-1.23e-123"
+-- Right (-1.23e-123)
 --
--- === Examples
+-- Trailing input examples:
 --
--- Examples with behaviour identical to 'read', if you feed an empty
--- continuation to the first result:
+-- >>> p "1."
+-- Right 1.0
 --
--- > IS.parse double (IS.fromList "3")     == 3.0
--- > IS.parse double (IS.fromList "3.1")   == 3.1
--- > IS.parse double (IS.fromList "3e4")   == 30000.0
--- > IS.parse double (IS.fromList "3.1e4") == 31000.0
--- > IS.parse double (IS.fromList "3e")    == 30
+-- >>> p "1.2.3"
+-- Right 1.2
 --
--- Examples with behaviour identical to 'read':
+-- >>> p "1e"
+-- Right 1.0
 --
--- > IS.parse (IS.fromList ".3")    == error "Parse failed"
--- > IS.parse (IS.fromList "e3")    == error "Parse failed"
+-- >>> p "1e2.3"
+-- Right 100.0
 --
--- Example of difference from 'read':
+-- >>> p "1+2"
+-- Right 1.0
 --
--- > IS.parse double (IS.fromList "3.foo") == 3.0
+-- Error cases:
 --
--- This function does not accept string representations of \"NaN\" or
--- \"Infinity\".
+-- >>> p ""
+-- Left (ParseError "number: expecting sign or decimal digit, got end of input")
 --
--- /Unimplemented/
-double :: Parser Char m Double
-double = undefined
+-- >>> p ".1"
+-- Left (ParseError "number: expecting sign or decimal digit, got '.'")
+--
+-- >>> p "+"
+-- Left (ParseError "number: expecting decimal digit, got end of input")
+--
+{-# INLINE double #-}
+double :: Monad m => Parser Char m Double
+double = fmap f number
+
+    where
+
+    f (m, e) =
+        if e > 0
+        then fromIntegral (m * 10 ^ e)
+        else fromRational (m % 10 ^ (-e))
diff --git a/core/src/Streamly/Unicode/Parser.hs b/core/src/Streamly/Unicode/Parser.hs
@@ -50,6 +50,7 @@ module Streamly.Unicode.Parser
     -- * Digit Sequences (Numbers)
     , decimal
     , hexadecimal
+    , double
 
     -- * Modifiers
     , signed
diff --git a/hie.yaml b/hie.yaml
@@ -86,6 +86,8 @@ cradle:
               component: "lib:streamly-benchmarks"
             - path: "./benchmark/Streamly/Benchmark/Data/Fold/Window.hs"
               component: "bench:Data.Fold.Window"
+            - path: "./benchmark/Streamly/Benchmark/Unicode/Parser.hs"
+              component: "bench:Unicode.Parser"
     - path: "./test"
       config:
         cradle:
@@ -156,6 +158,8 @@ cradle:
               component: "test:Prelude.ZipSerial"
             - path: "./test/Streamly/Test/Unicode/Stream.hs"
               component: "test:Unicode.Stream"
+            - path: "./test/Streamly/Test/Unicode/Parser.hs"
+              component: "test:Unicode.Parser"
             - path: "./test/Streamly/Test/Serialize/Serializable.hs"
               component: "test:Serialize.Serializable"
             - path: "./test/lib/"
diff --git a/test/Streamly/Test/Unicode/Parser.hs b/test/Streamly/Test/Unicode/Parser.hs
diff --git a/test/streamly-tests.cabal b/test/streamly-tests.cabal