The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
{-# OPTIONS_GHC -cpp -fglasgow-exts  #-}

-- arch-tag: 1498020a-9de2-44e3-ae69-14bb1febb77e

{- |

basic usage:

string =~ 'regular expression' returns different things depending on context


type - what it evaluates to
---------------------------
Int - number of times the regular expression matches
String -  matching portion of string
(String,String,String) - (text before match, matching text, text after match)
[Either String String] - list of matching and nonmatching strings, if concated, the original string results.  Left = notmatching, Right = matching.
Bool - whether the string matches
() - always returns () (useful in monad context, see below)
[String] - list of matches
Array Int String - list of substring matches for first match 
(String, Array Int String) - full matching text and substring matches
[(String, Array Int String)] - all matches, full match plus substrings
[Array Int String] - all substrings from all matches


also, there is the monadic version (=~~) which always behaves exactly the same
as (=~) except when the match fails, instead of returning a default value, the
monad fails. 

regular expressions:

these may be strings, which are interpreted as regular expressions, or Regex's
from the Text.Regex module. or any other instance of the RegexLike class. 

when using strings, you may prefix the regex by "(?flags)" where flags is one
of 'i' for a case insensitive match and 'm' means a multi-line match. other 
flags may be available depending on your implementation

advanced features:

not just strings can be matched, but rather lists of anything a matcher is
defined for.  RegexLikeImp data class can be used for in-place code generated
by template haskell for compile-time checked regular expresions


-}


module RRegex.Syntax(
    RegexLike(..), 
    RegexContext(..), 
    (!~~), 
    MatchResult(..)) where

import Data.Array
import RRegex.PCRE as PCRE
--import RRegex
import Data.Maybe
import Control.Monad
import System.IO.Unsafe


-- | instances of this class may be used as regular expressions with this syntax.

class RegexLike r a | r -> a where
    -- | Test whether the regex matches at all
    matchTest :: r -> [a] -> Bool
    -- | Count the number of times the regex matches
    matchCount :: r -> [a] -> Int
    -- | return all matches
    matchAll  :: r -> [a] -> [(Array Int (Int,Int))]
    -- | match once
    matchOnce :: r  -- ^ Regular Expression
            -> [a]  -- ^ String to match
            -> Bool -- ^ Whether we are at the begining of the string (as opposed to continuing a previous match)
            -> Maybe (Array Int (Int,Int))  -- ^ array of matched expression
    matchShow :: r -> String  -- ^ Used for error messages
    matchTest r xs = isJust (matchOnce r xs True)
    matchCount r xs = length (matchAll r xs)
    matchAll r xs = f 0 xs where
        f t xs = case matchOnce r xs (t == 0) of 
            Nothing -> []
            Just a  | l == 0 -> [na]
                    | otherwise -> na:f (t + x) (drop x xs) where
                (o,l) = a!0
                x = o + l  
                na = fmap (adj t) a
        adj t (x,y) = ((,) $! x + t) $! y
    matchShow _ = "Unknown"

data MatchResult a = MR {
    mrBefore :: [a],
    mrMatch  :: [a],
    mrAfter  :: [a],
    mrSubList :: [[a]],
    mrSubs   :: Array Int [a]
}   

instance RegexLike PCRE.Regex Char where
    matchOnce re cs bol = unsafePerformIO (PCRE.execute re cs (if not bol then pcreNotbol else 0))
    matchShow _ = "PCRE Regex"

class RegexContext x a where
    -- | match a list against a regular expression, changing its behavior based on its result type.
    (=~) :: RegexLike r x => [x] -> r -> a
    -- | Monadic version of (=~). behaves identically, except it causes the monad to fail when the expression does not match, rather than returning a default value.
    (=~~) :: (Monad m, RegexLike r x) => [x] -> r -> m a

-- | check if regular expression does not match
(!~~) :: RegexLike r x => [x] -> r -> Bool
s !~~ re = not (s =~ re)

regexFailed :: (RegexLike r a, Monad m) => r -> m b
regexFailed re =  fail $ "regex failed to match: " ++ matchShow re

-- | return number of expressions matched
instance  RegexContext x Int where
    s =~ re =  matchCount re s 
    s =~~ re = case (s =~ re) of
        0 -> regexFailed re
        xs -> return $  xs

instance RegexContext x ([x],[x],[x]) where
    s =~ re = maybe (s,[],[]) id (s =~~ re)
    s =~~ re =  case matchOnce re s True of 
        Nothing -> regexFailed re 
        Just a  -> let (o,l) = a!o in return (take o s,take l (drop o s),drop (o + l) s)

instance RegexContext x ([x],[x],[x],Array Int [x]) where
    s =~ re = maybe (s,[],[], listArray (1,0) []) id (s =~~ re)
    s =~~ re =  case s =~~ re of 
        Nothing -> regexFailed re 
        Just z -> return (mrBefore z,mrMatch z, mrAfter z, mrSubs z)

instance RegexContext x (MatchResult x) where
    s =~ re = maybe MR {mrBefore = s,mrMatch = [],mrAfter = [],mrSubs = listArray (1,0) [], mrSubList = []} id (s =~~ re)
    s =~~ re =  case matchOnce re s True of 
        Nothing -> regexFailed re 
        Just z ->  return $ MR {mrBefore = take o s, mrAfter = drop (o + l) s, mrMatch = a!0, mrSubs = a, mrSubList = tail (elems a) } where
            a = fmap f z
            f (o,l) = take l (drop o s)
            (o,l) = z!0 

{-
instance RegexContext x [Either [x] [x]] where
    s =~ re = map f $  matchAll re s where
        f (Left s) = Left s
        f (Right (x,_)) = Right x
    s =~~ re = case (s =~ re) of
        [Left _] -> regexFailed re
        xs -> return $  xs
-}

extract :: [a] -> (Int, Int) -> [a]
extract s (x,y) = take y (drop x s)

instance RegexContext x [x] where
    s =~ re = case s =~~ re of 
        Nothing -> [] 
        Just z -> mrMatch z
    s =~~ re = liftM mrMatch (s =~~ re)

-- useful in non-monad context
instance RegexContext x Bool where 
    s =~ re = matchTest re s
    s =~~ re = case s =~ re of
        False -> regexFailed re
        True -> return True
-- useful in monad context        
instance RegexContext x () where  
    _ =~ _ = ()
    s =~~ re = case s =~ re of
        False -> regexFailed re
        True -> return ()

instance RegexContext x [[x]] where
    s =~ re =  [extract s (a!0) | a <- matchAll re s]
    s =~~ re = case (s =~ re) of
        [] -> regexFailed re
        xs -> return  xs

instance RegexContext x [Array Int [x]] where 
    s =~ re =  [fmap (extract s) x | x <- matchAll re s]
    s =~~ re = case (s =~ re) of
        [] -> regexFailed re
        xs -> return  xs
{-
instance RegexContext x [Array Int (Int,Int)] where 
    s =~ re =  matchAll re s
    s =~~ re = case (s =~ re) of
        [] -> regexFailed re
        xs -> return  xs
-}
instance RegexContext x [Array Int ([x],(Int,Int))] where 
    s =~ re =  [fmap (\z -> (extract s z,z)) x | x <- matchAll re s]
    s =~~ re = case (s =~ re) of
        [] -> regexFailed re
        xs -> return  xs

instance RegexContext x (Array Int [x]) where 
    s =~ re = maybe (listArray (1,0) []) id (s =~~ re)
    s =~~ re = case s =~~ re of 
        Nothing -> regexFailed re
        Just z -> return $ mrSubs z


instance RegexLike String Char where
    matchOnce re xs bol = matchOnce (c re) xs bol where
        c s = unsafePerformIO $ 
            PCRE.compile s 0 >>= \x -> case x of
                Left (i,err) -> fail $ "PCRE Regular Expression Error:\n" ++ re ++ "\n" ++ replicate i ' ' ++ "^ " ++ err 
                Right p -> return p
    matchShow s = s

{-
instance RegexLike Regex Char where 
    matchOnce re xs = fmap f (matchRegexAll re xs) where
        f (x,y,z,ls) = MR { mrBefore = x,mrMatch = y ,mrAfter = z,mrSubs = listArray (1,length ls) ls}
    matchShow _ = "Regex"
    
instance RegexLike String Char where 
    --matchOnce re xs = fmap f (matchRegexAll (mr re) xs) where
    --    f (x,y,z,ls) = (x,y,z,listArray (1,length ls) ls)
    matchOnce re xs = fmap f (matchRegexAll (mr re) xs) where
        f (x,y,z,ls) = MR { mrBefore = x,mrMatch = y ,mrAfter = z,mrSubs = listArray (1,length ls) ls}
        mr ('i':'/':re) = mkRegexWithOpts re True False
        mr ('s':'/':re) = mkRegexWithOpts re False True
        mr ('i':'s':'/':re)  = mkRegexWithOpts re False False
        mr ('s':'i':'/':re)  = mkRegexWithOpts re False False
        mr ('/':re) = mkRegex re 
        mr (re) = mkRegex re 
    matchShow re = re
-}