@@ -47,6 +47,7 @@ module Streamly.Internal.Unicode.Parser
4747
4848 -- * Numeric
4949 , signed
50+ , number
5051 , double
5152 , decimal
5253 , hexadecimal
5657import Control.Applicative (Alternative (.. ))
5758import Data.Bits (Bits , (.|.) , shiftL )
5859import Data.Char (ord )
59- import Streamly.Internal.Data.Parser (Parser )
60+ import Data.Ratio ((%) )
61+ import Fusion.Plugin.Types (Fuse (.. ))
62+ import Streamly.Internal.Data.Parser (Parser (.. ), Initial (.. ), Step (.. ))
6063
6164import qualified Data.Char as Char
6265import qualified Streamly.Data.Fold as Fold
@@ -263,36 +266,193 @@ hexadecimal = Parser.takeWhile1 isHexDigit (Fold.foldl' step 0)
263266signed :: (Num a , Monad m ) => Parser Char m a -> Parser Char m a
264267signed p = (negate <$> (char ' -' *> p)) <|> (char ' +' *> p) <|> p
265268
266- -- | Parse a 'Double'.
269+ type Multiplier = Int
270+
271+ -- XXX We can use Int instead of Integer to make it twice as fast. But then we
272+ -- will have to truncate the significant digits before overflow occurs.
273+ type Number = Integer
274+ type DecimalPlaces = Int
275+ type PowerMultiplier = Int
276+ type Power = Int
277+
278+ {-# ANN type ScientificParseState Fuse #-}
279+ data ScientificParseState
280+ = SPInitial
281+ | SPSign ! Multiplier
282+ | SPAfterSign ! Multiplier ! Number
283+ | SPDot ! Multiplier ! Number
284+ | SPAfterDot ! Multiplier ! Number ! DecimalPlaces
285+ | SPExponent ! Multiplier ! Number ! DecimalPlaces
286+ | SPExponentWithSign ! Multiplier ! Number ! DecimalPlaces ! PowerMultiplier
287+ | SPAfterExponent ! Multiplier ! Number ! DecimalPlaces ! PowerMultiplier ! Power
288+
289+ -- | A generic parser for scientific notation of numbers. Returns (mantissa,
290+ -- exponent) tuple. The result can be mapped to 'Double' or any other number
291+ -- representation e.g. @Scientific@.
267292--
268- -- This parser accepts an optional leading sign character, followed by
269- -- at most one decimal digit. The syntax is similar to that accepted by
270- -- the 'read' function, with the exception that a trailing @\'.\'@ is
271- -- consumed.
293+ {-# INLINE number #-}
294+ number :: Monad m => Parser Char m (Integer , Int )
295+ number = Parser (\ s a -> return $ step s a) initial (return . extract)
296+
297+ where
298+
299+ intToInteger :: Int -> Integer
300+ intToInteger = fromIntegral
301+
302+ combineNum buf num = buf * 10 + num
303+
304+ {-# INLINE initial #-}
305+ initial = pure $ IPartial SPInitial
306+
307+ exitSPInitial msg =
308+ " number: expecting sign or decimal digit, got " ++ msg
309+ exitSPSign msg =
310+ " number: expecting decimal digit, got " ++ msg
311+ exitSPAfterSign multiplier num = (intToInteger multiplier * num, 0 )
312+ exitSPAfterDot multiplier num decimalPlaces =
313+ ( intToInteger multiplier * num
314+ , - decimalPlaces
315+ )
316+ exitSPAfterExponent mult num decimalPlaces powerMult powerNum =
317+ let e = powerMult * powerNum - decimalPlaces
318+ in (intToInteger mult * num, e)
319+
320+ {-# INLINE step #-}
321+ step SPInitial val =
322+ case val of
323+ ' +' -> Continue 0 (SPSign 1 )
324+ ' -' -> Continue 0 $ (SPSign (- 1 ))
325+ _ -> do
326+ let num = ord val - 48
327+ if num >= 0 && num <= 9
328+ then Partial 0 $ SPAfterSign 1 (intToInteger num)
329+ else Error $ exitSPInitial $ show val
330+ step (SPSign multiplier) val =
331+ let num = ord val - 48
332+ in if num >= 0 && num <= 9
333+ then Partial 0 $ SPAfterSign multiplier (intToInteger num)
334+ else Error $ exitSPSign $ show val
335+ step (SPAfterSign multiplier buf) val =
336+ case val of
337+ ' .' -> Continue 0 $ SPDot multiplier buf
338+ ' e' -> Continue 0 $ SPExponent multiplier buf 0
339+ ' E' -> Continue 0 $ SPExponent multiplier buf 0
340+ _ ->
341+ let num = ord val - 48
342+ in if num >= 0 && num <= 9
343+ then
344+ Partial 0
345+ $ SPAfterSign multiplier (combineNum buf (intToInteger num))
346+ else Done 1 $ exitSPAfterSign multiplier buf
347+ step (SPDot multiplier buf) val =
348+ let num = ord val - 48
349+ in if num >= 0 && num <= 9
350+ then Partial 0 $ SPAfterDot multiplier (combineNum buf (intToInteger num)) 1
351+ else Done 2 $ exitSPAfterSign multiplier buf
352+ step (SPAfterDot multiplier buf decimalPlaces) val =
353+ case val of
354+ ' e' -> Continue 0 $ SPExponent multiplier buf decimalPlaces
355+ ' E' -> Continue 0 $ SPExponent multiplier buf decimalPlaces
356+ _ ->
357+ let num = ord val - 48
358+ in if num >= 0 && num <= 9
359+ then
360+ Partial 0
361+ $ SPAfterDot
362+ multiplier
363+ (combineNum buf (intToInteger num))
364+ (decimalPlaces + 1 )
365+ else Done 1 $ exitSPAfterDot multiplier buf decimalPlaces
366+ step (SPExponent multiplier buf decimalPlaces) val =
367+ case val of
368+ ' +' -> Continue 0 (SPExponentWithSign multiplier buf decimalPlaces 1 )
369+ ' -' -> Continue 0 (SPExponentWithSign multiplier buf decimalPlaces (- 1 ))
370+ _ -> do
371+ let num = ord val - 48
372+ if num >= 0 && num <= 9
373+ then Partial 0 $ SPAfterExponent multiplier buf decimalPlaces 1 num
374+ else Error $ exitSPInitial $ show val
375+ step (SPExponentWithSign mult buf decimalPlaces powerMult) val =
376+ let num = ord val - 48
377+ in if num >= 0 && num <= 9
378+ then Partial 0 $ SPAfterExponent mult buf decimalPlaces powerMult num
379+ else Error $ exitSPSign $ show val
380+ step (SPAfterExponent mult num decimalPlaces powerMult buf) val =
381+ let n = ord val - 48
382+ in if n >= 0 && n <= 9
383+ then
384+ Partial 0
385+ $ SPAfterExponent
386+ mult num decimalPlaces powerMult (combineNum buf n)
387+ else
388+ Done 1
389+ $ exitSPAfterExponent mult num decimalPlaces powerMult buf
390+
391+ {-# INLINE extract #-}
392+ extract SPInitial = Error $ exitSPInitial " end of input"
393+ extract (SPSign _) = Error $ exitSPSign " end of input"
394+ extract (SPAfterSign mult num) = Done 0 $ exitSPAfterSign mult num
395+ extract (SPDot mult num) = Done 1 $ exitSPAfterSign mult num
396+ extract (SPAfterDot mult num decimalPlaces) =
397+ Done 0 $ exitSPAfterDot mult num decimalPlaces
398+ extract (SPExponent mult num decimalPlaces) =
399+ Done 1 $ exitSPAfterDot mult num decimalPlaces
400+ extract (SPExponentWithSign mult num decimalPlaces _) =
401+ Done 2 $ exitSPAfterDot mult num decimalPlaces
402+ extract (SPAfterExponent mult num decimalPlaces powerMult powerNum) =
403+ Done 0 $ exitSPAfterExponent mult num decimalPlaces powerMult powerNum
404+
405+ -- | Parse a decimal 'Double' value. This parser accepts an optional sign (+ or
406+ -- -) followed by at least one decimal digit. Decimal digits are optionally
407+ -- followed by a decimal point and at least one decimal digit after the point.
408+ -- This parser accepts the maximal valid input as long as it gives a valid
409+ -- number. Specifcally a trailing decimal point is allowed but not consumed.
410+ -- This function does not accept \"NaN\" or \"Infinity\" string representations
411+ -- of double values.
412+ --
413+ -- >>> import qualified Streamly.Data.Stream as Stream
414+ -- >>> import qualified Streamly.Unicode.Parser as Unicode
415+ --
416+ -- >>> p = Stream.parse Unicode.double . Stream.fromList
417+ --
418+ -- >>> p "-1.23e-123"
419+ -- Right (-1.23e-123)
272420--
273- -- === Examples
421+ -- Trailing input examples:
274422--
275- -- Examples with behaviour identical to 'read', if you feed an empty
276- -- continuation to the first result:
423+ -- >>> p "1."
424+ -- Right 1.0
277425--
278- -- > IS.parse double (IS.fromList "3") == 3.0
279- -- > IS.parse double (IS.fromList "3.1") == 3.1
280- -- > IS.parse double (IS.fromList "3e4") == 30000.0
281- -- > IS.parse double (IS.fromList "3.1e4") == 31000.0
282- -- > IS.parse double (IS.fromList "3e") == 30
426+ -- >>> p "1.2.3"
427+ -- Right 1.2
283428--
284- -- Examples with behaviour identical to 'read':
429+ -- >>> p "1e"
430+ -- Right 1.0
285431--
286- -- > IS.parse (IS.fromList ".3") == error "Parse failed "
287- -- > IS.parse (IS.fromList "e3") == error "Parse failed"
432+ -- >>> p "1e2.3 "
433+ -- Right 100.0
288434--
289- -- Example of difference from 'read':
435+ -- >>> p "1+2"
436+ -- Right 1.0
290437--
291- -- > IS.parse double (IS.fromList "3.foo") == 3.0
438+ -- Error cases:
292439--
293- -- This function does not accept string representations of \"NaN\" or
294- -- \"Infinity\".
440+ -- >>> p ""
441+ -- Left (ParseError "number: expecting sign or decimal digit, got end of input")
295442--
296- -- /Unimplemented/
297- double :: Parser Char m Double
298- double = undefined
443+ -- >>> p ".1"
444+ -- Left (ParseError "number: expecting sign or decimal digit, got '.'")
445+ --
446+ -- >>> p "+"
447+ -- Left (ParseError "number: expecting decimal digit, got end of input")
448+ --
449+ {-# INLINE double #-}
450+ double :: Monad m => Parser Char m Double
451+ double = fmap f number
452+
453+ where
454+
455+ f (m, e) =
456+ if e > 0
457+ then fromIntegral (m * 10 ^ e)
458+ else fromRational (m % 10 ^ (- e))
0 commit comments