The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
{-# OPTIONS_GHC -fglasgow-exts -fno-warn-orphans -fallow-overlapping-instances #-}

module Pugs.Prim.Match (
    op2Match, rxSplit, rxSplit_n, matchFromMR, pkgParents, pkgParentClasses
) where
import Pugs.Internals
import Pugs.Embed
import Pugs.AST
import Pugs.Types
import Pugs.Config
import Pugs.Prim.Code
import qualified Data.Map as Map
import qualified Data.Array as Array

-- XXX - kluge: before we figure out the parrot calling convention,
--       we'll simply inline the adverbs into the regex.
ruleWithAdverbs :: VRule -> Eval VStr
ruleWithAdverbs MkRulePGE{ rxRule = re, rxAdverbs = advs } = do
    when (null re) $
        fail "Null patterns are invalid; use <?null> or an empty string instead"
    hv      <- join $ doHash advs hash_fetch
    advs    <- forM (Map.assocs hv) $ \(k, v) -> do
        str <- case v of
            VBool True  -> return "1"
            VBool False -> return "0"
            _           -> fromVal v
        return $ \x -> ":" ++ k ++ "(" ++ str ++ ")[" ++ x ++ "]"
    return $ combine advs re
ruleWithAdverbs _ = fail "PCRE regexes can't be compiled to PGE regexes"

doMatch :: String -> VRule -> Eval VMatch
doMatch cs rule@MkRulePGE{ rxRule = ruleStr } = do
    let pwd1 = getConfig "installsitelib" ++ "/auto/pugs/perl5/lib"
        pwd2 = getConfig "sourcedir" ++ "/perl5/Pugs-Compiler-Rule/lib"
    hasSrc <- liftIO $ doesDirectoryExist pwd2
    let pwd = if hasSrc then pwd2 else pwd1
    glob    <- askGlobal
    let syms = [ (cast $ v_name var, tvar)
               | (var, [(_, tvar)]) <- padToList glob
               , SRegex == v_sigil var
               , isGlobalVar var
               ]
    subrules <- forM syms $ \(name, tvar) -> do
        ref         <- liftSTM $ readTVar tvar
        VRule rule  <- fromVal =<< readRef ref
        text        <- ruleWithAdverbs rule
        return (name, text)
    text <- ruleWithAdverbs rule
    rv   <- liftIO $ fmap (fmap (fmap toUpper)) (getEnv "PUGS_REGEX_ENGINE")
    let ruleEngine | Just "PGE" <- rv   = evalPGE
                   | otherwise          = evalPCR
    pge  <- liftIO $ ruleEngine pwd cs text subrules
            `catchIO` (\e -> return $ show e)
    rv  <- tryIO Nothing $ fmap Just (readIO $ decodeUTF8 pge)
    let matchToVal PGE_Fail = VMatch mkMatchFail
        matchToVal (PGE_String str) = VStr str
        matchToVal (PGE_Array ms) = VList (map matchToVal ms)
        matchToVal (PGE_Match from to pos named) = VMatch $
            mkMatchOk from to substr pos' named'
            where
            substr  = genericTake (to - from) (genericDrop from cs)
            pos'    = map matchToVal pos
            named'  = Map.map matchToVal $ Map.fromList named
    case rv of
        Just m  -> fromVal (matchToVal m)
        Nothing -> do
            liftIO $ putStrLn ("*** Cannot parse regex: " ++ ruleStr ++ "\n*** Error: " ++ pge)
            return mkMatchFail

doMatch csChars MkRulePCRE{ rxRegex = re } = do
    rv <- liftIO $ matchRegexWithPCRE re csBytes 0
    if isNothing rv then return mkMatchFail else do
    let ((fromBytes, lenBytes):subs) = Array.elems (fromJust rv)
        substr str from len = take len (drop from str)
        subsMatch = [
            VMatch $ if fBytes == -1 then mkMatchFail else mkMatchOk
                fChars (fChars + lChars)
                (substr csChars fChars lChars)
                [] Map.empty
            | (fBytes, lBytes) <- subs
            , let fChars = chars $ take fBytes csBytes
            , let lChars = chars $ substr csBytes fBytes lBytes
            ]
        fromChars = chars $ take fromBytes csBytes
        lenChars  = chars $ substr csBytes fromBytes lenBytes
        chars = genericLength . decodeUTF8

    return $ mkMatchOk fromChars (fromChars + lenChars) (substr csChars fromChars lenChars) subsMatch Map.empty
    where
    csBytes = encodeUTF8 csChars

matchFromMR :: MatchResult Char -> Val
matchFromMR mr = VMatch $ mkMatchOk 0 0 (decodeUTF8 all) subsMatch Map.empty
    where
    (all:subs) = elems $ mrSubs mr
    subsMatch = [ VMatch $ mkMatchOk 0 0 (decodeUTF8 sub) [] Map.empty | sub <- subs ]

-- Used in op2Match
not_VRule :: Val -> Bool
not_VRule _y@(VRule _) = False
not_VRule _            = True

classType :: Type
classType = mkType "Class"

-- XXX - need to generalise this
op2Match :: Val -> Val -> Eval Val

op2Match x y@(VCode _) = do
    (arity :: Int) <- fromVal =<< op1CodeArity y
    res <- fromVal =<< case arity of
        0 -> do
            writeVar (cast "$*_") x
            evalExp $ App (Val y) Nothing []
        1 -> do
            evalExp $ App (Val y) Nothing [Val x]
        _ -> fail ("Unexpected arity in smart match: " ++ (show arity))
    return $ VBool $ res

op2Match x (VRef (MkRef (IScalar sv))) | scalar_iType sv == mkType "Scalar::Const" = do
    y' <- scalar_fetch' sv
    op2Match x y'

op2Match x (VRef y) = do
    y' <- readRef y
    op2Match x y'

op2Match x@(VObject MkObject{ objType = cls }) y | cls == classType = do
    fetch   <- doHash x hash_fetchVal
    name    <- fromVal =<< fetch "name"
    op2Match (VType (mkType name)) y

op2Match x y@(VObject MkObject{ objType = cls }) | cls == classType = do
    fetch   <- doHash y hash_fetchVal
    name    <- fromVal =<< fetch "name"
    op2Match x (VType (mkType name))

-- $x ~~ tr/x/y/ ==> $x = ~$x.trans('x' => 'y')
op2Match x (VSubst (MkTrans from to)) = do
    str <- fromVal x
    evalExp $ Syn "="
        [ Val x
        , App (_Var "&trans") (Just (Val (VStr str)))
            [ App (_Var "&infix:=>") Nothing
                [ Val (VStr from)
                , Val (VStr to)
                ]
            ]
        ]

op2Match x (VSubst (MkSubst rx subst)) | rxGlobal rx = do
    str         <- fromVal x
    (str', cnt) <- doReplace str 0
    if cnt == 0 then return (VBool False) else do
    ref     <- fromVal x
    writeRef ref $ VStr str'
    return $ castV cnt
    where
    doReplace :: String -> Int -> Eval (String, Int)
    doReplace str ok = do
        match <- str `doMatch` rx
        if not (matchOk match) then return (str, ok) else do
        glob    <- askGlobal
        matchSV <- findSymRef (cast "$/") glob
        writeRef matchSV (VMatch match)
        str'    <- fromVal =<< evalExp subst
        -- XXX - on zero-width match, advance the cursor and, if can't,
        --       don't even bother with the recursive call.
        case (matchTo match, matchFrom match) of
            (0, 0) -> if null str then return (str' ++ str, ok) else do
                (after', ok') <- doReplace (tail str) (ok + 1)
                return (concat [str' ++ (head str:after')], ok')
            (to, from) -> do
                (after', ok') <- doReplace (genericDrop to str) (ok + 1)
                return (concat [genericTake from str, str', after'], ok')

op2Match x (VSubst (MkSubst rx subst)) = do
    str     <- fromVal x
    ref     <- fromVal x
    match   <- str `doMatch` rx
    if not (matchOk match) then return (VBool False) else do
    glob    <- askGlobal
    matchSV <- findSymRef (cast "$/") glob
    writeRef matchSV (VMatch match)
    str'    <- fromVal =<< evalExp subst
    writeRef ref . VStr $ concat
        [ genericTake (matchFrom match) str
        , str'
        , genericDrop (matchTo match) str
        ]
    return $ VBool True

op2Match x (VRule rx) | rxGlobal rx = do
    str     <- fromVal x
    rv      <- matchOnce str
    cxt     <- asks envContext
    case rxStringify rx of
        True -> do
            strs <- mapM fromVal rv
            return $ case strs of
                [str]   -> VStr str
                _       -> VList $ map VStr strs
        _ | isSlurpyCxt cxt -> do
            return (VList rv)
        _ -> do
            return (VInt $ genericLength rv)
    where
    hasSubpatterns = case rx of
        MkRulePGE{}             -> True -- XXX bogus - use <p6rule> to parse itself
        MkRulePCRE{rxNumSubs=n} -> not (n == 0)
    matchOnce :: String -> Eval [Val]
    matchOnce str = do
        match <- str `doMatch` rx
        if not (matchOk match) then return [] else do
        let ret x = return $ if hasSubpatterns
                        then [ m | m@(VMatch MkMatch{ matchOk = True }) <- matchSubPos match ] ++ x
                        else (VMatch match):x
        case (matchTo match, matchFrom match) of
            (0, 0) -> if null str then ret [] else do
                rest <- matchOnce (tail str)
                ret rest
            (to, _) -> do
                rest <- matchOnce (genericDrop to str)
                ret rest

op2Match x (VRule rx) = do
    str     <- fromVal x
    match   <- str `doMatch` rx
    glob    <- askGlobal
    matchSV <- findSymRef (cast "$/") glob
    writeRef matchSV (VMatch match)
    ifListContext
        (return $ VList (matchSubPos match))
        (return $ VMatch match)

op2Match x@(VRule _) y | not_VRule y = do
    op2Match y x

op2Match (VType typ) (VType t) = do
    typs <- pkgParents (showType typ)
    return . VBool $ showType t `elem` (showType typ:typs)

op2Match x y@(VType _) = do
    typ <- fromVal x
    case x of
        VRef x | typ == mkType "Class" -> do
            x' <- readRef x
            op2Match x' y
        _ -> op2Match (VType typ) y

op2Match (VRef x) y = do
    x' <- readRef x
    op2Match x' y

op2Match x y = do
    op2Cmp (fromVal :: Val -> Eval VStr) (==) x y

op2Cmp :: (a -> Eval b) -> (b -> b -> VBool) -> a -> a -> Eval Val
op2Cmp f cmp x y = do
    x' <- f x
    y' <- f y
    return $ VBool $ x' `cmp` y'

rxSplit :: VRule -> String -> Eval [Val]
rxSplit _  [] = return []
rxSplit rx str = do
    match <- str `doMatch` rx
    if not (matchOk match) then return [VStr str] else do
    if matchFrom match == matchTo match
        then do
            let (c:cs) = str
            rest <- rxSplit rx cs
            return (VStr [c]:rest)
        else do
            let before = genericTake (matchFrom match) str
                after  = genericDrop (matchTo match) str
            rest <- rxSplit rx after
            return $ (VStr before:matchSubPos match) ++ rest

-- duplicated for now, pending über-Haskell-fu

rxSplit_n :: VRule -> String -> Int -> Eval [Val]
rxSplit_n _ [] _ = return []
rxSplit_n rx str n = do
    match <- str `doMatch` rx
    if or [ ( n == 1 ), ( not (matchOk match) ) ] then return [VStr str] else do
    if matchFrom match == matchTo match
        then do
            let (c:cs) = str
            rest <- rxSplit_n rx (cs) (n-1)
            return (VStr [c]:rest)
        else do
            let before = genericTake (matchFrom match) str
                after  = genericDrop (matchTo match) str
            rest <- rxSplit_n rx after (n-1)
            return $ (VStr before:matchSubPos match) ++ rest

pkgParents :: VStr -> Eval [VStr]
pkgParents pkg = do
    ref     <- readVar $ cast (':':'*':pkg)
    if ref == undef then return [] else do
    meta    <- readRef =<< fromVal ref
    fetch   <- doHash meta hash_fetchVal
    attrs   <- fromVal =<< fetch "is"
    attrs'  <- fromVal =<< fetch "does" -- XXX wrong
    pkgs    <- mapM pkgParents (attrs ++ attrs')
    return $ nub (pkg:concat pkgs)

-- XXX - copy and paste code; merge with above!
pkgParentClasses :: VStr -> Eval [VStr]
pkgParentClasses pkg = do
    ref     <- readVar $ cast (':':'*':pkg)
    if ref == undef then return [] else do
    meta    <- readRef =<< fromVal ref
    fetch   <- doHash meta hash_fetchVal
    attrs   <- fromVal =<< fetch "is"
    pkgs    <- mapM pkgParentClasses attrs
    return $ nub (pkg:concat pkgs)