Skip to content

Commit

Permalink
Revamp parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
Ben Hamlin committed Nov 13, 2017
1 parent cefb8af commit 11c32f9
Show file tree
Hide file tree
Showing 5 changed files with 160 additions and 61 deletions.
21 changes: 18 additions & 3 deletions Text/ConfigParser.hs
Original file line number Diff line number Diff line change
@@ -1,6 +1,21 @@
module Text.ConfigParser
( module X
( module Types
, module Parser
) where

import Text.ConfigParser.Types as X
import Text.ConfigParser.Parser as X
import Text.ConfigParser.Types as Types
( Key
, ConfigOption
, ConfigParser
, configParser_
, configParser
, defaultKeyValue
, defaultLineCommentInit
)
import Text.ConfigParser.Parser as Parser
( config
, string
, integer
, bool
, list
)
123 changes: 87 additions & 36 deletions Text/ConfigParser/Parser.hs
Original file line number Diff line number Diff line change
@@ -1,70 +1,121 @@
module Text.ConfigParser.Parser
( config
, string
, integer
, bool
, list
) where
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE ScopedTypeVariables #-}
module Text.ConfigParser.Parser where

import Control.Monad (void)
import Data.String (IsString(..))
import Text.Parsec (char, choice, digit, sepBy, many, many1, try, spaces)
import Text.Parsec (eof, endOfLine, optional, (<|>), (<?>))
import Text.Parsec.Char (noneOf, oneOf)
import Text.Parsec.Text (GenParser, Parser)
import Text.Parsec (SourceName, ParseError, State(..))
import Text.Parsec (getParserState, setParserState, unexpected, newline)
import Text.Parsec (manyTill, char, choice, digit, sepBy, many, many1, try)
import Text.Parsec (spaces, eof, parse, (<|>), (<?>))
import Text.Parsec.Char (noneOf, oneOf, anyChar)
import Text.Parsec.Text (Parser)
import qualified Text.Parsec as P (string)

import Text.ConfigParser.Util
import Text.ConfigParser.Types

-- | Parse a string surrounded by quotes. Quotes within the string must be
-- escaped with backslashes.
string :: IsString s => Parser s
string = char '"' *> fmap fromString (many escapedChar) <* char '"'
<?> "string in quotes"
string = char '"' *> fmap fromString (many stringChar) <* char '"'
<?> "string in quotes"
where
escapedChar = let escapables = "\"\n\\" in choice
[ noneOf escapables
, char '\\' *> oneOf escapables
]
stringChar = noneOf "\"\n\\" <|> char '\\' *> escapeSeq
escapeSeq = char '"' <|> char '\\' <|> '\n' <$ char 'n'

-- | Parse an integer.
integer :: Parser Integer
integer = read .: (++) <$> sign <*> many1 digit <?> "integer"
where
sign = P.string "-" <|> P.string ""

-- | Parse a bounded integer. Fail to parse with a descriptive message if the
-- value is out of bounds.
boundedIntegral :: forall n. (Show n, Bounded n, Integral n) => Parser n
boundedIntegral = bound =<< integer
<?> "integer between " ++ show intMin ++ " and " ++ show intMax
where
intMin = minBound :: n
intMax = maxBound :: n
bound n | n > fromIntegral intMax = unexpected $ "integer above " ++ show intMax
| n < fromIntegral intMin = unexpected $ "integer below " ++ show intMin

| otherwise = return $ fromIntegral n

-- | Parse a boolean. Valid synonyms for @True@ are @true@, @yes@, @Yes@, @on@,
-- and @On@. Valid synonyms for @False@ are @false@, @no@, @No@, @off@, and
-- @Off@.
bool :: Parser Bool
bool = True <$ truthy <|> False <$ falsey
bool = True <$ try truthy <|> False <$ try falsey
where
truthy = choice $ fmap P.string ["true", "True", "yes", "Yes", "on", "On"]
falsey = choice $ fmap P.string ["false", "False", "no", "No", "off", "Off"]

-- | Parse a list of values surrounded by @[@ and @]@, and separated by commas.
-- The list can contain whitespace and newlines.
list :: (Parser a) -> Parser [a]
list p =
initial *> (p `sepBy` separator) <* terminator <?> "list in brackets"
list p = initial *> (p `sepBy` separator) <* terminator <?> "list in brackets"
where
initial = try $ char '[' <* spaces
separator = try $ spaces *> char ',' <* spaces
terminator = try $ spaces *> char ']'

-- | Ignore zero or more spaces, tabs, or vertical tabs.
whitespace :: Parser ()
whitespace = () <$ many (oneOf " \t") <?> "whitespace"
whitespace = () <$ many (oneOf " \t\v\r") <?> "whitespace"

-- | Extract a parser for a transformation on c's from a 'ConfigOption'.
actionParser :: ConfigParser c -> ConfigOption c -> Parser (c -> c)
actionParser c ConfigOption {..} =
whitespace *> keyValue c key (action <$> parser)

lineEnd :: String -> Parser ()
lineEnd commentMarker = () <$ whitespace <* optional comment <* terminator
-- Parse a string and replace the input of the parser with the result.
replaceParserInput :: Parser String -> Parser ()
replaceParserInput p = do
s <- getParserState
i <- p
void $ setParserState s {stateInput = fromString i}

-- Replace each line comment with a single newline.
removeLineComments :: ConfigParser c -> Parser ()
removeLineComments ConfigParser {..} = replaceParserInput $
mconcat <$> many (escapedComment <|> comment <|> content)
where
comment = commentParser commentMarker
terminator = endOfLine <?> "end of line"
startComment = choice $ try . P.string <$> lineCommentInit
terminator = void newline <|> eof
comment = '\n':[] <$ startComment <* anyChar `manyTill` terminator
escapedComment = try $ char '\\' *> startComment
content = (:[]) <$> anyChar

commentParser :: String -> Parser ()
commentParser marker = () <$ P.string marker <* many (noneOf "\n") <?> "line comment"
-- Remove spaces from the start and end of each line, at the start of the
-- input, and at the end of the input.
removeExtraSpaces :: Parser ()
removeExtraSpaces = replaceParserInput $
whitespace *> contentChar `manyTill` try (whitespace *> eof)
where
contentChar = try strippedNL <|> anyChar
strippedNL = whitespace *> newline <* whitespace

actionParser :: ConfigParser c -> ConfigOption c -> Parser (c -> c)
actionParser c (ConfigOption k p a) = lineParser c k $ a <$> p
-- Replace sequences of multiple newlines with a single newline.
removeExtraLines :: Parser ()
removeExtraLines = replaceParserInput $
optionalNLs *> contentChar `manyTill` try (optionalNLs *> eof)
where
contentChar = combinedNLs <|> anyChar
optionalNLs = () <$ many newline
combinedNLs = '\n' <$ many1 newline

-- Parse a config file as specified by a 'ConfigParser'.
config :: ConfigParser c -> Parser c
config c = foldr ($) (defaults c) <$> (fileStart *> file <* fileEnd)
config p = foldr ($) (defaults p) <$> do
removeLineComments p
removeExtraSpaces
removeExtraLines
optionParsers `sepBy` newline <* eof
where
fileStart = try $ many emptyLines
file = keyValue `sepBy` emptyLines
fileEnd = try (many emptyLines) *> optional whitespace *> optional comment *> eof
comment = commentParser $ commentStart c
keyValue = choice $ try . actionParser c <$> options c
emptyLines = many1 $ lineEnd (commentStart c)
optionParsers = choice $ try . actionParser p <$> options p

-- Parse a config file from disk.
parseFromFile :: ConfigParser c -> SourceName -> IO (Either ParseError c)
parseFromFile p f = parse (config p) f . fromString <$> readFile f
49 changes: 30 additions & 19 deletions Text/ConfigParser/Types.hs
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
{-# LANGUAGE RankNTypes #-}
module Text.ConfigParser.Types where

import Text.Parsec (string, spaces, char, many, (<?>))
import Text.Parsec.Char
import Data.List (nub)
import Text.Parsec (string, spaces, char, (<?>))
import Text.Parsec.Text (Parser)

type Key = String
Expand All @@ -15,36 +15,47 @@ data ConfigOption c = forall a. ConfigOption
, action :: a -> c -> c -- ^ How the value should change the state @c@.
}

-- | Parameters for a parser that takes a config file and produces a @c@. Only
-- use this constructor if you want to specify your own 'lineParser' or
-- 'commentStart'. Otherwise, use the 'configParser' smart constructor.
-- | Parameters for a parser that takes a config file and produces a @c@. Use
-- the 'configParser_' constructor if you want to specify your own 'lineParser'
-- or 'commentStart'. Otherwise, use the 'configParser' smart constructor.
data ConfigParser c = ConfigParser
{ lineParser :: forall a. Key -> Parser a -> Parser a
{ keyValue :: forall a. Key -> Parser a -> Parser a
-- ^ Specifies how a key and a value parser should be represented in the
-- config file, e.g., @key = value@, or @key: value@.
, commentStart :: String
-- ^ String to starts a line comment, e.g., @"#"@. Make sure this doesn't
-- share a prefix with 'lineParser' or the 'parser' element of any
-- 'ConfigOption's.
, defaults :: c
, lineCommentInit :: [String]
-- Strings to start a line comment, such as @#@, @--@, or @//@. All
-- characters following this string up to the following newline or EOF
-- will be removed. You can use the string without starting a comment by
-- escaping it with a backslash, e.g. @\#@ or @\--@.
, defaults :: c
-- Initial @c@ to fold each 'ConfigOption's action over.
, options :: [ConfigOption c]
, options :: [ConfigOption c]
-- List of key-value pairs to parse from the config file. Any key in the
-- config file that doesn't appear here will result in parse error.
}

-- | Smart constructor to check that 'options' doesn't contain any duplicate
-- keys before creating a 'ConfigParser'.
configParser_ :: (forall a. Key -> Parser a -> Parser a)
-> [String] -> c -> [ConfigOption c] -> ConfigParser c
configParser_ kv cs ds os' = ConfigParser kv cs ds os
where
os = if length (nub $ fmap key os') == length os'
then os'
else error "duplicate option keys in ConfigParser"

-- | Smart constructor for a 'ConfigParser' that uses a default syntax like
-- @key = value@ and line comments starting with @#@.
configParser :: c -> [ConfigOption c] -> ConfigParser c
configParser = ConfigParser defaultLineParser defaultCommentStart
configParser = configParser_ defaultKeyValue defaultLineCommentInit

-- | Default syntax like @key = value@.
defaultLineParser :: Key -> Parser a -> Parser a
defaultLineParser k p = keyParser *> separator *> p
defaultKeyValue :: Key -> Parser a -> Parser a
defaultKeyValue k p = keyParser *> separator *> p
where
keyParser = many (oneOf " \t") *> string k <?> "option key"
keyParser = string k <?> "option key"
separator = spaces *> char '=' <* spaces

-- | Line comments starting with @#@.
defaultCommentStart :: String
defaultCommentStart = "#"
-- | Default line comment like @# comment text@.
defaultLineCommentInit :: [String]
defaultLineCommentInit = ["#"]
1 change: 1 addition & 0 deletions Text/ConfigParser/Util.hs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
module Text.ConfigParser.Util where

(.:) :: (c -> d) -> (a -> b -> c) -> a -> b -> d
(.:) = (.).(.)
27 changes: 24 additions & 3 deletions config-parser.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,30 @@ extra-source-files: ChangeLog.md
cabal-version: >=1.10

library
ghc-options: -Wall -Werror
exposed-modules: Text.ConfigParser
other-modules: Text.ConfigParser.Util, Text.ConfigParser.Types, Text.ConfigParser.Parser
other-extensions: ExistentialQuantification, RankNTypes
build-depends: base >=4.9 && <4.10, parsec >=3.1 && <3.2
other-modules: Text.ConfigParser.Util,
Text.ConfigParser.Types,
Text.ConfigParser.Parser
other-extensions: ExistentialQuantification,
RankNTypes,
RecordWildCards
build-depends: base >=4.9 && <4.10,
parsec >=3.1 && <3.2
-- hs-source-dirs:
default-language: Haskell2010

test-suite parsing
ghc-options: -Wall -Werror
other-modules: Text.ConfigParser.Util,
Text.ConfigParser.Types,
Text.ConfigParser.Parser,
Text.ConfigParser
type: exitcode-stdio-1.0
main-is: tests/Parsing.hs
default-language: Haskell2010
build-depends: base >=4.9 && <4.10,
hspec >=2.4 && <2.5,
lens >=4.15 && <4.16,
parsec >=3.1 && <3.2,
text >=1.2 && <1.3

0 comments on commit 11c32f9

Please sign in to comment.