Skip to content

Commit afcc8ea

Browse files
rnjtranjanharendra-kumaradithyaov
authored
Implement double Parser (#2278)
Co-authored-by: Harendra Kumar <harendra@composewell.com> Co-authored-by: Adithya Kumar <adithya@composewell.com>
1 parent f5f25b4 commit afcc8ea

File tree

7 files changed

+413
-24
lines changed

7 files changed

+413
-24
lines changed
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
-- |
2+
-- Module : Streamly.Benchmark.Data.Parser
3+
-- Copyright : (c) 2020 Composewell Technologies
4+
--
5+
-- License : BSD-3-Clause
6+
-- Maintainer : streamly@composewell.com
7+
8+
{-# LANGUAGE CPP #-}
9+
{-# LANGUAGE FlexibleContexts #-}
10+
{-# LANGUAGE ScopedTypeVariables #-}
11+
{-# OPTIONS_GHC -Wno-orphans #-}
12+
13+
module Main
14+
(
15+
main
16+
) where
17+
18+
import Control.DeepSeq (NFData(..))
19+
import Control.Monad (replicateM_)
20+
import Streamly.Internal.Data.Parser (ParseError(..))
21+
import Streamly.Internal.Data.Stream (Stream)
22+
import Prelude hiding
23+
(any, all, take, sequence, sequence_, sequenceA, takeWhile, dropWhile)
24+
25+
import qualified Streamly.Internal.Data.Fold as Fold
26+
import qualified Streamly.Data.Stream as Stream
27+
import qualified Streamly.Data.Unfold as Unfold
28+
import qualified Streamly.Internal.Unicode.Parser as PRU
29+
30+
import Gauge hiding (env)
31+
import Streamly.Benchmark.Common
32+
import Streamly.Benchmark.Common.Handle
33+
34+
{-# INLINE sourceUnfoldrM #-}
35+
sourceUnfoldrM :: Monad m => Int -> Int -> Stream m Int
36+
sourceUnfoldrM value n = Stream.unfoldrM step n
37+
where
38+
step cnt =
39+
if cnt > n + value
40+
then return Nothing
41+
else return (Just (cnt, cnt + 1))
42+
43+
runParser :: Int -> (Stream IO Char -> IO a) -> IO ()
44+
runParser count p = do
45+
let v = "+123456789.123456789e-123"
46+
let s = Stream.unfold Unfold.fromList v
47+
replicateM_ count (p s)
48+
49+
-- | Takes a fold method, and uses it with a default source.
50+
{-# INLINE benchIOSink #-}
51+
benchIOSink :: Int -> String -> (Stream IO Char -> IO b) -> Benchmark
52+
benchIOSink value name f = bench name $ nfIO $ runParser value f
53+
54+
{-# INLINE double #-}
55+
double :: Monad m => Stream m Char -> m (Either ParseError Double)
56+
double = Stream.parse PRU.double
57+
58+
-------------------------------------------------------------------------------
59+
-- Benchmarks
60+
-------------------------------------------------------------------------------
61+
62+
moduleName :: String
63+
moduleName = "Unicode.Parser"
64+
65+
instance NFData ParseError where
66+
{-# INLINE rnf #-}
67+
rnf (ParseError x) = rnf x
68+
69+
o_n_heap_serial :: Int -> [Benchmark]
70+
o_n_heap_serial value =
71+
[
72+
benchIOSink value "double" double
73+
]
74+
75+
-------------------------------------------------------------------------------
76+
-- Driver
77+
-------------------------------------------------------------------------------
78+
79+
main :: IO ()
80+
main = do
81+
env <- mkHandleBenchEnv
82+
runWithCLIOptsEnv defaultStreamSize alloc (allBenchmarks env)
83+
84+
where
85+
86+
alloc value = Stream.fold Fold.toList $ Stream.chunksOf 100 $ sourceUnfoldrM value 0
87+
88+
allBenchmarks _ _ value =
89+
[ bgroup (o_n_heap_prefix moduleName) (o_n_heap_serial value)
90+
]

benchmark/streamly-benchmarks.cabal

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -741,3 +741,9 @@ benchmark Unicode.Utf8
741741
buildable: False
742742
else
743743
buildable: True
744+
745+
benchmark Unicode.Parser
746+
import: bench-options
747+
type: exitcode-stdio-1.0
748+
hs-source-dirs: Streamly/Benchmark/Unicode
749+
main-is: Parser.hs

core/src/Streamly/Internal/Unicode/Parser.hs

Lines changed: 184 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ module Streamly.Internal.Unicode.Parser
4747

4848
-- * Numeric
4949
, signed
50+
, number
5051
, double
5152
, decimal
5253
, hexadecimal
@@ -56,7 +57,9 @@ where
5657
import Control.Applicative (Alternative(..))
5758
import Data.Bits (Bits, (.|.), shiftL)
5859
import Data.Char (ord)
59-
import Streamly.Internal.Data.Parser (Parser)
60+
import Data.Ratio ((%))
61+
import Fusion.Plugin.Types (Fuse(..))
62+
import Streamly.Internal.Data.Parser (Parser(..), Initial(..), Step(..))
6063

6164
import qualified Data.Char as Char
6265
import qualified Streamly.Data.Fold as Fold
@@ -263,36 +266,193 @@ hexadecimal = Parser.takeWhile1 isHexDigit (Fold.foldl' step 0)
263266
signed :: (Num a, Monad m) => Parser Char m a -> Parser Char m a
264267
signed p = (negate <$> (char '-' *> p)) <|> (char '+' *> p) <|> p
265268

266-
-- | Parse a 'Double'.
269+
type Multiplier = Int
270+
271+
-- XXX We can use Int instead of Integer to make it twice as fast. But then we
272+
-- will have to truncate the significant digits before overflow occurs.
273+
type Number = Integer
274+
type DecimalPlaces = Int
275+
type PowerMultiplier = Int
276+
type Power = Int
277+
278+
{-# ANN type ScientificParseState Fuse #-}
279+
data ScientificParseState
280+
= SPInitial
281+
| SPSign !Multiplier
282+
| SPAfterSign !Multiplier !Number
283+
| SPDot !Multiplier !Number
284+
| SPAfterDot !Multiplier !Number !DecimalPlaces
285+
| SPExponent !Multiplier !Number !DecimalPlaces
286+
| SPExponentWithSign !Multiplier !Number !DecimalPlaces !PowerMultiplier
287+
| SPAfterExponent !Multiplier !Number !DecimalPlaces !PowerMultiplier !Power
288+
289+
-- | A generic parser for scientific notation of numbers. Returns (mantissa,
290+
-- exponent) tuple. The result can be mapped to 'Double' or any other number
291+
-- representation e.g. @Scientific@.
267292
--
268-
-- This parser accepts an optional leading sign character, followed by
269-
-- at most one decimal digit. The syntax is similar to that accepted by
270-
-- the 'read' function, with the exception that a trailing @\'.\'@ is
271-
-- consumed.
293+
{-# INLINE number #-}
294+
number :: Monad m => Parser Char m (Integer, Int)
295+
number = Parser (\s a -> return $ step s a) initial (return . extract)
296+
297+
where
298+
299+
intToInteger :: Int -> Integer
300+
intToInteger = fromIntegral
301+
302+
combineNum buf num = buf * 10 + num
303+
304+
{-# INLINE initial #-}
305+
initial = pure $ IPartial SPInitial
306+
307+
exitSPInitial msg =
308+
"number: expecting sign or decimal digit, got " ++ msg
309+
exitSPSign msg =
310+
"number: expecting decimal digit, got " ++ msg
311+
exitSPAfterSign multiplier num = (intToInteger multiplier * num, 0)
312+
exitSPAfterDot multiplier num decimalPlaces =
313+
( intToInteger multiplier * num
314+
, -decimalPlaces
315+
)
316+
exitSPAfterExponent mult num decimalPlaces powerMult powerNum =
317+
let e = powerMult * powerNum - decimalPlaces
318+
in (intToInteger mult * num, e)
319+
320+
{-# INLINE step #-}
321+
step SPInitial val =
322+
case val of
323+
'+' -> Continue 0 (SPSign 1)
324+
'-' -> Continue 0 $ (SPSign (-1))
325+
_ -> do
326+
let num = ord val - 48
327+
if num >= 0 && num <= 9
328+
then Partial 0 $ SPAfterSign 1 (intToInteger num)
329+
else Error $ exitSPInitial $ show val
330+
step (SPSign multiplier) val =
331+
let num = ord val - 48
332+
in if num >= 0 && num <= 9
333+
then Partial 0 $ SPAfterSign multiplier (intToInteger num)
334+
else Error $ exitSPSign $ show val
335+
step (SPAfterSign multiplier buf) val =
336+
case val of
337+
'.' -> Continue 0 $ SPDot multiplier buf
338+
'e' -> Continue 0 $ SPExponent multiplier buf 0
339+
'E' -> Continue 0 $ SPExponent multiplier buf 0
340+
_ ->
341+
let num = ord val - 48
342+
in if num >= 0 && num <= 9
343+
then
344+
Partial 0
345+
$ SPAfterSign multiplier (combineNum buf (intToInteger num))
346+
else Done 1 $ exitSPAfterSign multiplier buf
347+
step (SPDot multiplier buf) val =
348+
let num = ord val - 48
349+
in if num >= 0 && num <= 9
350+
then Partial 0 $ SPAfterDot multiplier (combineNum buf (intToInteger num)) 1
351+
else Done 2 $ exitSPAfterSign multiplier buf
352+
step (SPAfterDot multiplier buf decimalPlaces) val =
353+
case val of
354+
'e' -> Continue 0 $ SPExponent multiplier buf decimalPlaces
355+
'E' -> Continue 0 $ SPExponent multiplier buf decimalPlaces
356+
_ ->
357+
let num = ord val - 48
358+
in if num >= 0 && num <= 9
359+
then
360+
Partial 0
361+
$ SPAfterDot
362+
multiplier
363+
(combineNum buf (intToInteger num))
364+
(decimalPlaces + 1)
365+
else Done 1 $ exitSPAfterDot multiplier buf decimalPlaces
366+
step (SPExponent multiplier buf decimalPlaces) val =
367+
case val of
368+
'+' -> Continue 0 (SPExponentWithSign multiplier buf decimalPlaces 1)
369+
'-' -> Continue 0 (SPExponentWithSign multiplier buf decimalPlaces (-1))
370+
_ -> do
371+
let num = ord val - 48
372+
if num >= 0 && num <= 9
373+
then Partial 0 $ SPAfterExponent multiplier buf decimalPlaces 1 num
374+
else Error $ exitSPInitial $ show val
375+
step (SPExponentWithSign mult buf decimalPlaces powerMult) val =
376+
let num = ord val - 48
377+
in if num >= 0 && num <= 9
378+
then Partial 0 $ SPAfterExponent mult buf decimalPlaces powerMult num
379+
else Error $ exitSPSign $ show val
380+
step (SPAfterExponent mult num decimalPlaces powerMult buf) val =
381+
let n = ord val - 48
382+
in if n >= 0 && n <= 9
383+
then
384+
Partial 0
385+
$ SPAfterExponent
386+
mult num decimalPlaces powerMult (combineNum buf n)
387+
else
388+
Done 1
389+
$ exitSPAfterExponent mult num decimalPlaces powerMult buf
390+
391+
{-# INLINE extract #-}
392+
extract SPInitial = Error $ exitSPInitial "end of input"
393+
extract (SPSign _) = Error $ exitSPSign "end of input"
394+
extract (SPAfterSign mult num) = Done 0 $ exitSPAfterSign mult num
395+
extract (SPDot mult num) = Done 1 $ exitSPAfterSign mult num
396+
extract (SPAfterDot mult num decimalPlaces) =
397+
Done 0 $ exitSPAfterDot mult num decimalPlaces
398+
extract (SPExponent mult num decimalPlaces) =
399+
Done 1 $ exitSPAfterDot mult num decimalPlaces
400+
extract (SPExponentWithSign mult num decimalPlaces _) =
401+
Done 2 $ exitSPAfterDot mult num decimalPlaces
402+
extract (SPAfterExponent mult num decimalPlaces powerMult powerNum) =
403+
Done 0 $ exitSPAfterExponent mult num decimalPlaces powerMult powerNum
404+
405+
-- | Parse a decimal 'Double' value. This parser accepts an optional sign (+ or
406+
-- -) followed by at least one decimal digit. Decimal digits are optionally
407+
-- followed by a decimal point and at least one decimal digit after the point.
408+
-- This parser accepts the maximal valid input as long as it gives a valid
409+
-- number. Specifcally a trailing decimal point is allowed but not consumed.
410+
-- This function does not accept \"NaN\" or \"Infinity\" string representations
411+
-- of double values.
412+
--
413+
-- >>> import qualified Streamly.Data.Stream as Stream
414+
-- >>> import qualified Streamly.Unicode.Parser as Unicode
415+
--
416+
-- >>> p = Stream.parse Unicode.double . Stream.fromList
417+
--
418+
-- >>> p "-1.23e-123"
419+
-- Right (-1.23e-123)
272420
--
273-
-- === Examples
421+
-- Trailing input examples:
274422
--
275-
-- Examples with behaviour identical to 'read', if you feed an empty
276-
-- continuation to the first result:
423+
-- >>> p "1."
424+
-- Right 1.0
277425
--
278-
-- > IS.parse double (IS.fromList "3") == 3.0
279-
-- > IS.parse double (IS.fromList "3.1") == 3.1
280-
-- > IS.parse double (IS.fromList "3e4") == 30000.0
281-
-- > IS.parse double (IS.fromList "3.1e4") == 31000.0
282-
-- > IS.parse double (IS.fromList "3e") == 30
426+
-- >>> p "1.2.3"
427+
-- Right 1.2
283428
--
284-
-- Examples with behaviour identical to 'read':
429+
-- >>> p "1e"
430+
-- Right 1.0
285431
--
286-
-- > IS.parse (IS.fromList ".3") == error "Parse failed"
287-
-- > IS.parse (IS.fromList "e3") == error "Parse failed"
432+
-- >>> p "1e2.3"
433+
-- Right 100.0
288434
--
289-
-- Example of difference from 'read':
435+
-- >>> p "1+2"
436+
-- Right 1.0
290437
--
291-
-- > IS.parse double (IS.fromList "3.foo") == 3.0
438+
-- Error cases:
292439
--
293-
-- This function does not accept string representations of \"NaN\" or
294-
-- \"Infinity\".
440+
-- >>> p ""
441+
-- Left (ParseError "number: expecting sign or decimal digit, got end of input")
295442
--
296-
-- /Unimplemented/
297-
double :: Parser Char m Double
298-
double = undefined
443+
-- >>> p ".1"
444+
-- Left (ParseError "number: expecting sign or decimal digit, got '.'")
445+
--
446+
-- >>> p "+"
447+
-- Left (ParseError "number: expecting decimal digit, got end of input")
448+
--
449+
{-# INLINE double #-}
450+
double :: Monad m => Parser Char m Double
451+
double = fmap f number
452+
453+
where
454+
455+
f (m, e) =
456+
if e > 0
457+
then fromIntegral (m * 10 ^ e)
458+
else fromRational (m % 10 ^ (-e))

core/src/Streamly/Unicode/Parser.hs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ module Streamly.Unicode.Parser
5050
-- * Digit Sequences (Numbers)
5151
, decimal
5252
, hexadecimal
53+
, double
5354

5455
-- * Modifiers
5556
, signed

hie.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,8 @@ cradle:
8686
component: "lib:streamly-benchmarks"
8787
- path: "./benchmark/Streamly/Benchmark/Data/Fold/Window.hs"
8888
component: "bench:Data.Fold.Window"
89+
- path: "./benchmark/Streamly/Benchmark/Unicode/Parser.hs"
90+
component: "bench:Unicode.Parser"
8991
- path: "./test"
9092
config:
9193
cradle:
@@ -156,6 +158,8 @@ cradle:
156158
component: "test:Prelude.ZipSerial"
157159
- path: "./test/Streamly/Test/Unicode/Stream.hs"
158160
component: "test:Unicode.Stream"
161+
- path: "./test/Streamly/Test/Unicode/Parser.hs"
162+
component: "test:Unicode.Parser"
159163
- path: "./test/Streamly/Test/Serialize/Serializable.hs"
160164
component: "test:Serialize.Serializable"
161165
- path: "./test/lib/"

0 commit comments

Comments
 (0)