The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
{-# OPTIONS_GHC -O2 -fglasgow-exts -funbox-strict-fields -fno-warn-orphans #-}

-- A Haskell port of PGE::OPTable.

module Text.Parser.OpTable where
import Prelude hiding (length, lookup, null, drop, span)
import qualified Data.Map as Map
import qualified Data.Seq as Seq
import qualified Data.FastPackedString as Str
import qualified Data.List as List
import Data.Ratio
import Data.Generics hiding (Prefix, Infix)
import Data.Char (isDigit)
import Data.List (find)
import Data.Seq (Seq, fromList)
import Data.Map (Map, insert, lookup, toAscList, (!))
import Data.FastPackedString (empty, pack, null, drop, dropSpace, length, isPrefixOf, span, FastString(..), lineIdxs)
import GHC.Prim(unsafeCoerce#)

data Op
    = Infix         { str :: !Str, assoc :: !Assoc }
    | Prefix        { str :: !Str }
    | Postfix       { str :: !Str }
    | Term          { str :: !Str }
    | DynTerm       { str :: !Str, dynStr  :: !DynStr }
    | Ternary       { str :: !Str, str2 :: !Str }
    | Circumfix     { str :: !Str, str2 :: !Str }
    | PostCircumfix { str :: !Str, str2 :: !Str }
    | Close         { str :: !Str }
    deriving (Eq, Show, Ord, Typeable, Data)

-- newtype DynStr = MkDynStr (forall m. Monad m => Str -> m (Str, Op -> Seq Match))
type DynStr = Str -> Str -> Maybe DynResult

data DynResult
    = DynResultMatch
        { dynMatched    :: !Str
        , dynRemainder  :: !Str
        }
    | DynResultTrans
        { dynMatched    :: !Str
        , dynRemainder  :: !Str
        , dynOpTrans    :: !(forall r. OpTable r -> OpTable r)
        }
    deriving (Typeable)

instance Data DynResult where
    gunfold = error "gunfold"
        :: (forall r. c (Str -> r) -> c r) -> (forall r . r -> c r) -> Constr -> c DynResult
    toConstr = error "gfoldl"
    dataTypeOf = error "dataTypeOf"

instance Eq DynStr where _ == _ = True
instance Ord DynStr where compare _ _ = EQ
instance Show DynStr where show _ = "<dyn>"

instance Eq (DynMkMatch r) where _ == _ = True
instance Ord (DynMkMatch r) where compare _ _ = EQ
instance Show (DynMkMatch r) where show _ = "<mkMatch>"

type DynMkMatch r = (Token r -> [r] -> r)
type Precedence = Ratio Integer
type Arity = Int

data Token r = MkToken
    { tokOp      :: !Op
    , tokPrec    :: !Precedence
    , tokArity   :: !Arity
    , tokClose   :: !(Maybe Op)
    , tokNull    :: !Bool -- null-width assertion
    , tokMkMatch :: !(DynMkMatch r)
    }
    deriving (Eq, Show, Typeable, Data)

instance Ord (Token r) where
    compare x y = compare (tokPrec x) (tokPrec y)

data Match = MkMatch
    { matchOp   :: !Op
    , matchArgs :: !(Seq Match)
    }
    deriving (Eq, Show, Ord, Typeable, Data)

data Assoc
    = AssocNon | AssocLeft | AssocRight | AssocChain | AssocList
    deriving (Eq, Show, Ord, Typeable, Data)

data Whitespace
    = AllowWhitespace
    | NoWhitespace
    deriving (Eq, Show, Ord, Typeable, Data)

data PrecRelation
    = DefaultPrec
    | SameAs        { relOp :: !Op }
    | TighterThan   { relOp :: !Op }
    | LooserThan    { relOp :: !Op }
    deriving (Eq, Show, Ord, Typeable, Data)

data OpTable r = MkOpTable
    { tableEntries   :: !(EntryMap r)
    , tableTerms     :: !(TokenMap r)
    , tableOpers     :: !(TokenMap r)
    , tableWsTerms   :: !(TokenMap r)
    , tableWsOpers   :: !(TokenMap r)
    }
    deriving (Eq, Show, Ord, Typeable, Data)

emptyTable :: OpTable r
emptyTable = MkOpTable Map.empty Map.empty Map.empty Map.empty Map.empty

type Str = Str.FastString
type EntryMap a = Map Op (Token a)
type TokenMap a = Map TokenName (Token a)

-- | Terms are ordered by descending length first.
newtype TokenName = MkTokenName { nameToStr :: Str }
    deriving (Eq, Show, Typeable, Data)

nameLength :: TokenName -> Int
nameLength = length . nameToStr

instance Ord TokenName where
    compare (MkTokenName x) (MkTokenName y) = case compare (Str.length y) (Str.length x) of
        EQ -> compare x y
        o  -> o

addToken :: OpTable r -> Op -> DynMkMatch r -> PrecRelation -> Whitespace -> OpTable r
addToken table op mk rel ws = doCloseOp . doInsert $ table{ tableEntries = ents' }
    where
    ents  = tableEntries table
    ents' = doCloseEntry $ insert op tok ents
    tok   = MkToken
        { tokOp      = op
        , tokPrec    = calculatePrec rel ents
        , tokArity   = arityOf op
        , tokClose   = maybeOpClose
        , tokNull    = False
        , tokMkMatch = mk
        }
    doInsert = insertBy op tok ws
    (doCloseOp, doCloseEntry, maybeOpClose)
        | isClosing op =
            ( insertOp mkTokClose AllowWhitespace
            , insert opClose tok
            , Just opClose
            )
        | otherwise    = (id, id, Nothing)
    opClose = Close (str2 op)
    mkTokClose = MkToken
        { tokOp      = opClose
        , tokPrec    = tokPrec tok
        , tokArity   = 0
        , tokClose   = Nothing
        , tokNull    = False
        , tokMkMatch = mk
        }

arityOf :: Op -> Arity
arityOf Close{}                 = 0
arityOf Ternary{}               = 3
arityOf Infix{assoc=AssocList}  = -1 -- infinity
arityOf Infix{}                 = 2
arityOf PostCircumfix{}         = 2
arityOf _                       = 1

insertBy :: Op -> Token r -> Whitespace -> OpTable r -> OpTable r
insertBy Term{}           = insertTerm
insertBy DynTerm{}        = insertTerm
insertBy Prefix{}         = insertTerm
insertBy Circumfix{}      = insertTerm
insertBy _                = insertOp

isClosing :: Op -> Bool
isClosing Ternary{}         = True
isClosing Circumfix{}       = True
isClosing PostCircumfix{}   = True
isClosing _                 = False

insertTerm, insertOp :: Token r -> Whitespace -> OpTable r -> OpTable r
insertTerm tok NoWhitespace    table = table
    { tableTerms   = insertTok tok (tableTerms table)   }
insertTerm tok AllowWhitespace table = table
    { tableTerms   = insertTok tok (tableTerms table)
    , tableWsTerms = insertTok tok (tableWsTerms table) }
insertOp   tok NoWhitespace    table = table
    { tableOpers     = insertTok tok (tableOpers table)     }
insertOp   tok AllowWhitespace table = table
    { tableOpers     = insertTok tok (tableOpers table)
    , tableWsOpers   = insertTok tok (tableWsOpers table) }

insertTok :: Token r -> TokenMap r -> TokenMap r
insertTok tok tmap = insert key tok tmap
    where
    key = MkTokenName (tokStr tok)

tokStr :: Token r -> Str
tokStr = str . tokOp

defaultPrec :: Precedence
defaultPrec = 1%1

calculatePrec :: PrecRelation -> EntryMap r -> Precedence
calculatePrec rel toks = case rel of
    DefaultPrec     -> defaultPrec
    SameAs {}       -> prec
    LooserThan {}   -> prec - 1 % (denominator prec * 2)
    TighterThan {}  -> prec + 1 % (denominator prec * 2)
    where
    prec = tokPrec (toks ! (relOp rel))

type TokenStack r = [Token r]
type TermStack r  = [r]
type OperStack r  = [r]

type Parse r a = ( ?termStack  :: TermStack r
                 , ?tokenStack :: TokenStack r
                 , ?operStack  :: OperStack ([r] -> r)
                 , ?tbl        :: OpTable r
                 , ?str        :: Str
                 , ?final      :: r -> Str -> r
                 ) => a

opParse :: (r -> Str -> r) -> OpTable r -> Str -> r
opParse f tbl str =
    let ?termStack  = []
        ?tokenStack = []
        ?operStack  = []
        ?tbl        = tbl 
        ?str        = str
        ?final      = f
     in expectTerm

strPos :: Str -> String
strPos (PS p s l) = "line " ++ show lineNum ++ ", column " ++ show colNum
    where
    idxs = lineIdxs (PS p 0 l)
    lns  = (-1:List.filter (< s) idxs)
    colNum  = s - List.last lns
    lineNum = List.length lns

opParsePartial :: forall r. OpTable r -> Str -> (r, Str)
opParsePartial tbl input = forceOut (opParse forceIn tbl input)
    where
    forceIn :: r -> Str -> r
    forceIn res str = unsafeCoerce# (res, str)
    forceOut :: r -> (r, Str)
    forceOut = unsafeCoerce#

opParseAll :: OpTable r -> Str -> r
opParseAll = opParse $ \res str -> if null (dropSpace str)
    then res
    else error ("incomplete parse at " ++ strPos str)

expectTerm :: Parse r r
expectTerm
    | null ?str = emptyTerm
    | otherwise = let ?str = str' in
        tryMatch foundTerm (let ?str = orig in emptyTerm) terms
    where
    orig  = ?str
    str'  = dropSpace ?str
    terms = (if length str' == length ?str then tableTerms else tableWsTerms) ?tbl

tryMatch :: Parse r (Parse r (Token r -> a) -> Parse r a -> TokenMap r -> a)
tryMatch ok nok tmap = case find ((`isPrefixOf` ?str) . nameToStr . fst) (toAscList tmap) of
    Just res -> matched ok nok res
    Nothing  -> nok

matched :: Parse r (Parse r (Token r -> a) -> Parse r a -> (TokenName, Token r) -> a)
matched ok nok (name, token@MkToken{ tokOp = DynTerm{ dynStr = dyn } }) =
    let str' = drop (nameLength name) ?str in
        case dyn (nameToStr name) str' of
            Just res ->
                let ok' = let ?str = dynRemainder res
                           in ok token{ tokOp = Term (dynMatched res) }
                    in case res of
                    DynResultTrans{} -> let ?tbl = dynOpTrans res ?tbl in ok'
                    _                -> ok'
            _                -> nok
matched ok _ (name, token) = let ?str = drop (nameLength name) ?str in ok token

isTerm :: Op -> Bool
isTerm Term{}    = True
isTerm DynTerm{} = True
isTerm _         = False

foundTerm :: Parse r (Token r -> r)
foundTerm token
    | isTerm (tokOp token) = pushTermStack token expectOper
    | otherwise            = operShift token

pushTermStack :: Parse r (Token r -> Parse r a -> Parse r a)
pushTermStack token p = let ?termStack = (tokMkMatch token token []: ?termStack) in p

pushOperStack :: Parse r (Token r -> Parse r a -> Parse r a)
pushOperStack token p = let ?operStack = (tokMkMatch token token: ?operStack) in p

pushTokenStack :: Parse r (Token r -> Parse r a -> Parse r a)
pushTokenStack token p = let ?tokenStack = (token: ?tokenStack) in p

mkMatch :: DynMkMatch Match
mkMatch token = MkMatch (tokOp token) . fromList

expectOper :: Parse r r
expectOper
    | null str' = endParse
    | otherwise = let ?str = str' in
        tryMatch foundOper (let ?str = orig in emptyOper) opers
    where
    orig  = ?str
    str'  = dropSpace ?str
    opers = (if length str' == length ?str then tableOpers else tableWsOpers) ?tbl

emptyTerm :: Parse r r
emptyTerm = case lookup nameEmpty (tableTerms ?tbl) of
    Just tok -> matched foundTerm nullTerm (nameEmpty, tok)
    Nothing  -> nullTerm
    where
    nameEmpty = (MkTokenName empty)

emptyOper :: Parse r r
emptyOper = case lookup operEmpty (tableOpers ?tbl) of
    Just tok -> matched foundOper endParse (operEmpty, tok)
    Nothing  -> endParse
    where
    operEmpty = (MkTokenName empty)

nullTerm :: Parse r r
nullTerm | (t@MkToken{ tokNull = True }:_) <- ?tokenStack = pushTermStack t expectOper
         | (MkToken{ tokOp = op }:_) <- ?tokenStack, null (str op) = endParse
         | otherwise = error ("missing term at " ++ strPos ?str)

foundOper :: Parse r (Token r -> Parse r r)
foundOper oper
    | (top:_) <- ?tokenStack = case tokOp top of
        Postfix{}                   -> operReduce oper
        topOp       | Close{} <- op -> if isClosing topOp
            then if str op == str2 topOp
                then operShift oper
                else endParse
            else operReduce oper
        Circumfix{}                 -> operShift oper
        PostCircumfix{}             -> operShift oper
        _           | oper > top    -> operShift oper
        Ternary{}                   -> case op of
            Ternary{}   -> error "Missing ternary close"
            _           -> operShift oper
        _           | oper < top    -> operReduce oper
        Infix{ assoc = AssocRight } -> operShift oper
        Infix{ assoc = AssocList  } -> operShift oper
        _                           -> operReduce oper
    | Close{} <- op          = endParse
    | otherwise              = operShift oper
    where
    op = tokOp oper

operShift :: Parse r (Token r -> Parse r r)
operShift token = pushTokenStack token (pushOperStack token (case tokOp token of
        Prefix{}        -> expectTerm
        Infix{}         -> expectTerm
        Ternary{}       -> expectTerm
        PostCircumfix{} -> expectTerm
        Circumfix{}     -> expectTerm
        Postfix{}       -> expectOper
        _ | (_:MkToken{tokOp=Ternary{}}:_) <- ?tokenStack
                        -> expectTerm
        _               -> expectOper
    ))

operReduce :: Parse r (Token r -> r)
operReduce oper = reduce (foundOper oper)

endParse :: Parse r r
endParse
    | [] <- ?tokenStack = ?final (head ?termStack) ?str
    | otherwise         = reduce endParse

reduce :: Parse r (Parse r r -> r)
reduce p = case ?tokenStack of
    (MkToken{tokOp=Close{}}:t:ts) ->
        let ?operStack  = tail ?operStack
            ?tokenStack = ts
         in reduce1 (tokArity t) p
    (t:ts) | tokArity t == (-1) ->
        let (same, rest) = List.span (== t) ts
            len          = List.length same in
        let ?operStack   = List.drop len ?operStack
            ?tokenStack  = rest
         in reduce1 (2 + len) p
    (t:ts)  -> let ?tokenStack = ts in reduce1 (tokArity t) p
    _       -> error "reducing an empty token stack"

reduce1 :: Parse r (Arity -> Parse r r -> r)
reduce1 arity p =
    let (op:opers)    = ?operStack
        (args, terms) = splitAt arity ?termStack
     in let ?operStack = opers
            ?termStack = (op (reverse args):terms) in p

mkOpTable :: [[(Whitespace, DynMkMatch r, Op)]] -> OpTable r
mkOpTable = fst . List.foldl mkOps (emptyTable, DefaultPrec)
    where
    mkOps x [] = x
    mkOps (tbl, rel) [(ws, mk, op)] = (addToken tbl op mk rel ws, LooserThan op)
    mkOps (tbl, rel) ((ws, mk, op):xs) = mkOps (addToken tbl op mk rel ws, rel) xs

testTable :: OpTable Match
testTable = mkOpTable
    [ op mkMatch Circumfix       "( )"
    , op mkMatch Term            (span isDigit)
    , op mkMatch Infix           "* /"
    , op mkMatch Infix AssocLeft "+ -"
    ]

noWs :: [(Whitespace, a, Op)] -> [(Whitespace, a, Op)]
noWs = map (\(_, x, y) -> (NoWhitespace, x, y))

class OpClass a where op :: a

instance OpClass (a -> (Str -> Op) -> [Char] -> [(Whitespace, a, Op)]) where
    op mk op1 = List.map (((,,) AllowWhitespace mk) . op1 . pack) . splitWords

instance OpClass (a -> (Str -> Str -> Op) -> [Char] -> [(Whitespace, a, Op)]) where
    op mk op2 = List.map (((,,) AllowWhitespace mk) . uncurry op2) . pack2 . splitWords
        where
        pack2 (x:y:zs)  = ((pack x, pack y):pack2 zs)
        pack2 _         = []

instance OpClass (a -> (Str -> Assoc -> Op) -> [Char] -> [(Whitespace, a, Op)]) where
    op mk op1 = op mk op1 AssocLeft

instance OpClass (a -> (Str -> Assoc -> Op) -> Assoc -> [Char] -> [(Whitespace, a, Op)]) where
    op mk op1 assoc = List.map (((,,) AllowWhitespace mk) . (`op1` assoc) . pack) . splitWords

instance (OpClass (a -> (Str -> Op) -> (Str -> Str -> Maybe (Str, Str)) -> [(Whitespace, a, Op)])) where
    op mk _ f = [(AllowWhitespace, mk, DynTerm empty dyn)]
        where
        dyn pre post = fmap (uncurry DynResultMatch) (f pre post)

instance (OpClass (a -> (Str -> Op) -> String -> (Str -> Str -> Maybe (Str, Str)) -> [(Whitespace, a, Op)])) where
    op mk _ s f = [(AllowWhitespace, mk, DynTerm (pack s) dyn)]
        where
        dyn pre post = fmap (uncurry DynResultMatch) (f pre post)

instance (OpClass (a -> (Str -> Op) -> String -> (Str -> Maybe (Str, Str)) -> [(Whitespace, a, Op)])) where
    op mk op1 s f = op mk op1 s (\(_ :: Str) x -> f x)

instance (OpClass (a -> (Str -> Op) -> (Str -> Maybe (Str, Str)) -> [(Whitespace, a, Op)])) where
    op mk op1 f = op mk op1 (\(_ :: Str) x -> f x)

instance (OpClass (a -> (Str -> Op) -> (Str -> (Str, Str)) -> [(Whitespace, a, Op)])) where
    op mk op1 f = op mk op1 (\(_ :: Str) x -> f x)

instance (OpClass (a -> (Str -> Op) -> (Str -> Str -> (Str, Str)) -> [(Whitespace, a, Op)])) where
    op mk _ f = [(AllowWhitespace, mk, DynTerm empty dyn)]
        where
        dyn pre post = let (pre', post') = f pre post in
            if null pre then Nothing else Just (DynResultMatch pre' post')

instance (OpClass ((String -> Token r -> [r] -> r) -> (Str -> Op) -> String -> [(Whitespace, DynMkMatch r, Op)])) where
    op mk op1 = concatMap (\x -> op (mk x) op1 x) . splitWords

splitWords :: String -> [String]
splitWords [] = [""]
splitWords x  = List.words x