Skip to content

Commit 7751130

Browse files
committed
feat: implement comment parser
add lexer tokens and rules remove lexer "Whitespace" token This token is not needed, we will later use the position information to pad each token. implement "Comment" handling ... which is not handling at all for the time being temporary fix by dropping comments before parseGenericPackageDescription make metaFields a map of positions rearrange and simplify field make lexer emit comment wherever they would occur stop parser from emitting indentation warning for comments fix: restore checkIndentation behaviour for Field test: add dummy tests test: accept new golden expressions test: accept new golden expressions test: rename comment test group debug: trace tokens fix: split comments recursively fix: consume comments after colon in FieldLayoutOrBraces debug: remove tracing test: update expected test: improve comment tests test: correct comment tests test: assert interleaving comment parsing fix: correct interleaving comment parsing test: update expected debug: remove tracing test: assert parsing of fieldline flag test: update expected fix: correct parsing fieldLine starting with -- as comment test: update expected test: remove test case that doesn't pass on upstream minor fixes test: ignore comment in test comparison docs: improve comments on the grammar style: whitespace style: fourmolu ref: simplification docs: update grammar specification for comments ref: run hlint improve describeToken on comments ref: make diff smaller test: fix no-thunks test test: fix md5Check test fix compiler errors and warnings test: add expectation for failing hackage test We also reintroduced the flag "CABAL_PARSEC_DEBUG" to debug the lexer/parser. fix hackage test 001 fix hackage test test: disable comments in comparison in roundtrip hackage test refactor parser refactor test style: run fourmolu remove todos yay test: remove test dependencies move ToExpr to orphan module test: simplify restore accidently formatted cabal restore previous debug behaviour refactor: don't use liftA2 and liftA3 refactor annotation to ([Comment ann], ann) attempt test: update expects fix errors for Deprecated module fix compilation errors for integration tests fix grammar while incorrect output We need to look into how to wire the output for it to hold the comments in the right position. refactor parser style: run fourmolu fix comment attach post processing refactor fix: only discard element comments at top level test: update expected fix: derive Eq instance for Comment This fixes builds for old GHC use strict either for parser fix: doctest define proper WithComments data type remove exactComment field in GenericPackageDescription add Lens functions for AnnotatedGenericPackageDescription test AnnotatedGenericPackageDescription instead run fourmolu run hlint remove redundant imports tests: test hasktorch tests: update expected tests: fix integration tests tests: fix nothunks test run fourmolu fix doctests ref: keep backward compatibility of exported functions test: explicitly test *WithComment readFields variant test: fix doctest fix: build undo changes from experiments ref: clean up the parser code ref: reduce diff
1 parent 1e3c355 commit 7751130

File tree

75 files changed

+1657
-247
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

75 files changed

+1657
-247
lines changed

Cabal-syntax/Cabal-syntax.cabal

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,11 @@ build-type: Simple
1818
extra-doc-files:
1919
README.md ChangeLog.md
2020

21+
flag CABAL_PARSEC_DEBUG
22+
description: Enable debug build for the cabal field lexer/parser.
23+
default: False
24+
manual: True
25+
2126
source-repository head
2227
type: git
2328
location: https://github.com/haskell/cabal/
@@ -59,6 +64,11 @@ library
5964
if impl(ghc >= 8.0) && impl(ghc < 8.8)
6065
ghc-options: -Wnoncanonical-monadfail-instances
6166

67+
if flag(CABAL_PARSEC_DEBUG)
68+
CPP-Options: -DCABAL_PARSEC_DEBUG
69+
build-depends:
70+
vector
71+
6272
build-tool-depends: alex:alex
6373

6474
exposed-modules:
@@ -148,6 +158,8 @@ library
148158
Distribution.Types.ForeignLibOption
149159
Distribution.Types.ForeignLibType
150160
Distribution.Types.GenericPackageDescription
161+
Distribution.Types.AnnotatedGenericPackageDescription
162+
Distribution.Types.AnnotatedGenericPackageDescription.Lens
151163
Distribution.Types.GenericPackageDescription.Lens
152164
Distribution.Types.HookedBuildInfo
153165
Distribution.Types.IncludeRenaming

Cabal-syntax/src/Distribution/FieldGrammar.hs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ module Distribution.FieldGrammar
2626
, Section (..)
2727
, Fields
2828
, partitionFields
29+
, extractComments
2930
, takeFields
3031
, runFieldParser
3132
, runFieldParser'
@@ -38,6 +39,7 @@ module Distribution.FieldGrammar
3839
import Distribution.Compat.Prelude
3940
import Prelude ()
4041

42+
import qualified Data.Bifunctor as Bi
4143
import qualified Data.Map.Strict as Map
4244

4345
import Distribution.FieldGrammar.Class
@@ -99,10 +101,17 @@ partitionFields = finalize . foldl' f (PS mempty mempty mempty)
99101
PS fs (MkSection name sargs sfields : s) ss
100102

101103
-- | Take all fields from the front.
104+
-- Returns a tuple containing the comments, nameless fields, and sections
102105
takeFields :: [Field ann] -> (Fields ann, [Field ann])
103106
takeFields = finalize . spanMaybe match
104107
where
105108
finalize (fs, rest) = (Map.fromListWith (flip (++)) fs, rest)
106109

107110
match (Field (Name ann name) fs) = Just (name, [MkNamelessField ann fs])
108111
match _ = Nothing
112+
113+
extractComments :: (Foldable f, Functor f) => [f (WithComments ann)] -> ([Comment ann], [f ann])
114+
extractComments = Bi.first mconcat . unzip . map extractCommentsStep
115+
116+
extractCommentsStep :: (Foldable f, Functor f) => f (WithComments ann) -> ([Comment ann], f ann)
117+
extractCommentsStep f = (foldMap justComments f, fmap unComments f)

Cabal-syntax/src/Distribution/Fields/Field.hs

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
{-# LANGUAGE CPP #-}
2+
{-# LANGUAGE DeriveGeneric #-}
23
{-# LANGUAGE DeriveTraversable #-}
34
{-# LANGUAGE StandaloneDeriving #-}
45

@@ -17,6 +18,12 @@ module Distribution.Fields.Field
1718
, SectionArg (..)
1819
, sectionArgAnn
1920

21+
-- * Comment
22+
, Comment (..)
23+
, WithComments (..)
24+
, mapComments
25+
, mapCommentedData
26+
2027
-- * Name
2128
, FieldName
2229
, Name (..)
@@ -44,11 +51,26 @@ import qualified Data.Foldable1 as F1
4451
-- Cabal file
4552
-------------------------------------------------------------------------------
4653

54+
data Comment ann = Comment !ByteString !ann
55+
deriving (Show, Generic, Eq, Ord, Functor)
56+
57+
data WithComments ann = WithComments
58+
{ justComments :: ![Comment ann]
59+
, unComments :: !ann
60+
}
61+
deriving (Show, Generic, Eq, Ord, Functor)
62+
63+
mapComments :: ([Comment ann] -> [Comment ann]) -> WithComments ann -> WithComments ann
64+
mapComments f (WithComments cs x) = WithComments (f cs) x
65+
66+
mapCommentedData :: (ann -> ann) -> WithComments ann -> WithComments ann
67+
mapCommentedData f (WithComments cs x) = WithComments cs (f x)
68+
4769
-- | A Cabal-like file consists of a series of fields (@foo: bar@) and sections (@library ...@).
4870
data Field ann
4971
= Field !(Name ann) [FieldLine ann]
5072
| Section !(Name ann) [SectionArg ann] [Field ann]
51-
deriving (Eq, Show, Functor, Foldable, Traversable)
73+
deriving (Eq, Show, Functor, Foldable, Traversable, Generic)
5274

5375
-- | @since 3.12.0.0
5476
deriving instance Ord ann => Ord (Field ann)
@@ -73,7 +95,7 @@ fieldUniverse f@(Field _ _) = [f]
7395
--
7496
-- /Invariant:/ 'ByteString' has no newlines.
7597
data FieldLine ann = FieldLine !ann !ByteString
76-
deriving (Eq, Show, Functor, Foldable, Traversable)
98+
deriving (Eq, Show, Functor, Foldable, Traversable, Generic)
7799

78100
-- | @since 3.12.0.0
79101
deriving instance Ord ann => Ord (FieldLine ann)
@@ -94,7 +116,7 @@ data SectionArg ann
94116
SecArgStr !ann !ByteString
95117
| -- | everything else, mm. operators (e.g. in if-section conditionals)
96118
SecArgOther !ann !ByteString
97-
deriving (Eq, Show, Functor, Foldable, Traversable)
119+
deriving (Eq, Show, Functor, Foldable, Traversable, Generic)
98120

99121
-- | @since 3.12.0.0
100122
deriving instance Ord ann => Ord (SectionArg ann)
@@ -115,7 +137,7 @@ type FieldName = ByteString
115137
--
116138
-- /Invariant/: 'ByteString' is lower-case ASCII.
117139
data Name ann = Name !ann !FieldName
118-
deriving (Eq, Show, Functor, Foldable, Traversable)
140+
deriving (Eq, Show, Functor, Foldable, Traversable, Generic)
119141

120142
-- | @since 3.12.0.0
121143
deriving instance Ord ann => Ord (Name ann)

Cabal-syntax/src/Distribution/Fields/Lexer.x

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@ import qualified Data.ByteString.Char8 as B.Char8
3131
import qualified Data.Word as Word
3232

3333
#ifdef CABAL_PARSEC_DEBUG
34-
import Debug.Trace
3534
import qualified Data.Vector as V
3635
import qualified Data.Text as T
3736
import qualified Data.Text.Encoding as T
@@ -84,8 +83,9 @@ tokens :-
8483
<bol_section, bol_field_layout, bol_field_braces> {
8584
@nbspspacetab* @nl { \pos len inp -> checkWhitespace pos len inp >> adjustPos retPos >> lexToken }
8685
-- no @nl here to allow for comments on last line of the file with no trailing \n
87-
$spacetab* "--" $comment* ; -- TODO: check the lack of @nl works here
88-
-- including counting line numbers
86+
$spacetab* "--" $comment* { toki TokComment }
87+
-- TODO: check the lack of @nl works here
88+
-- including counting line numbers
8989
}
9090

9191
<bol_section> {
@@ -105,9 +105,8 @@ tokens :-
105105
}
106106

107107
<in_section> {
108-
$spacetab+ ; --TODO: don't allow tab as leading space
109-
110-
"--" $comment* ;
108+
$spacetab+ ; --TODO: don't allow tab as leading space
109+
"--" $comment* { toki TokComment }
111110

112111
@name { toki TokSym }
113112
@string { \pos len inp -> return $! L pos (TokStr (B.take (len - 2) (B.tail inp))) }
@@ -161,6 +160,7 @@ data Token = TokSym !ByteString -- ^ Haskell-like identifier, number or
161160
| Colon
162161
| OpenBrace
163162
| CloseBrace
163+
| TokComment !ByteString
164164
| EOF
165165
| LexicalError InputStream --TODO: add separate string lexical error
166166
deriving Show
@@ -230,7 +230,9 @@ lexToken = do
230230
setInput inp'
231231
let !len_bytes = B.length inp - B.length inp'
232232
t <- action pos len_bytes inp
233-
--traceShow t $ return tok
233+
#ifdef CABAL_PARSEC_DEBUG
234+
traceShow t $ return tok
235+
#endif
234236
return t
235237

236238

@@ -241,10 +243,12 @@ checkPosition pos@(Position lineno colno) inp inp' len_chars = do
241243
let len_bytes = B.length inp - B.length inp'
242244
pos_txt | lineno-1 < V.length text_lines = T.take len_chars (T.drop (colno-1) (text_lines V.! (lineno-1)))
243245
| otherwise = T.empty
244-
real_txt = B.take len_bytes inp
246+
real_txt :: B.ByteString
247+
real_txt = B.take len_bytes inp
245248
when (pos_txt /= T.decodeUtf8 real_txt) $
246249
traceShow (pos, pos_txt, T.decodeUtf8 real_txt) $
247-
traceShow (take 3 (V.toList text_lines)) $ return ()
250+
traceShow (take 3 (V.toList text_lines)) $
251+
return ()
248252
where
249253
getDbgText = Lex $ \s@LexState{ dbgText = txt } -> LexResult s txt
250254
#else

0 commit comments

Comments
 (0)