Skip to content

Commit

Permalink
Disambiguation production for nonterminals
Browse files Browse the repository at this point in the history
  • Loading branch information
expipiplus1 committed May 12, 2023
1 parent 3d3b3c1 commit ceba2b8
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 30 deletions.
8 changes: 8 additions & 0 deletions Text/Earley/Grammar.hs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ module Text.Earley.Grammar
, terminal
, (<?>)
, constraint
, disambiguate
, alts
, Grammar(..)
, rule
Expand Down Expand Up @@ -57,6 +58,8 @@ data Prod r e t a where
Named :: !(Prod r e t a) -> e -> Prod r e t a
-- Non-context-free extension: conditioning on the parsed output.
Constraint :: !(Prod r e t a) -> (a -> Bool) -> Prod r e t a
--
Disamb :: !(Prod r e t a) -> !(Prod r e t ([a] -> [b])) -> Prod r e t b

-- | Match a token for which the given predicate returns @Just a@,
-- and return the @a@.
Expand All @@ -71,6 +74,9 @@ terminal p = Terminal p $ Pure id
constraint :: (a -> Bool) -> Prod r e t a -> Prod r e t a
constraint = flip Constraint

disambiguate :: ([a] -> [b]) -> Prod r e t a -> Prod r e t b
disambiguate d = flip Disamb (Pure d)

-- | Lifted instance: @(<>) = 'liftA2' ('<>')@
instance Semigroup a => Semigroup (Prod r e t a) where
(<>) = liftA2 (Data.Semigroup.<>)
Expand All @@ -88,6 +94,7 @@ instance Functor (Prod r e t) where
fmap f (Alts as p) = Alts as $ fmap (f .) p
fmap f (Many p q) = Many p $ fmap (f .) q
fmap f (Named p n) = Named (fmap f p) n
fmap f (Disamb p d) = Disamb p (fmap (fmap (fmap f)) d)

-- | Smart constructor for alternatives.
alts :: [Prod r e t a] -> Prod r e t (a -> b) -> Prod r e t b
Expand All @@ -110,6 +117,7 @@ instance Applicative (Prod r e t) where
Alts as p <*> q = alts as $ flip <$> p <*> q
Many a p <*> q = Many a $ flip <$> p <*> q
Named p n <*> q = Named (p <*> q) n
Disamb p d <*> q = Disamb p ((\a b c -> fmap ($ b) (a c)) <$> d <*> q)

instance Alternative (Prod r e t) where
empty = Alts [] $ pure id
Expand Down
82 changes: 52 additions & 30 deletions Text/Earley/Parser/Internal.hs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{-# LANGUAGE CPP, BangPatterns, DeriveFunctor, GADTs, Rank2Types, RecursiveDo #-}
{-# LANGUAGE CPP, BangPatterns, DeriveFunctor, GADTs, Rank2Types, RecursiveDo, LambdaCase #-}
-- | This module exposes the internals of the package: its API may change
-- independently of the PVP-compliant version number.
module Text.Earley.Parser.Internal where
Expand All @@ -16,6 +16,7 @@ import Data.Monoid
import Data.Semigroup
import Control.Category (Category)
import qualified Control.Category as C
import Data.Traversable (for)

-------------------------------------------------------------------------------
-- * Concrete rules and productions
Expand Down Expand Up @@ -46,6 +47,10 @@ prodNulls prod = case prod of
Many a p -> prodNulls (pure [] <|> pure <$> a) <**> prodNulls p
Named p _ -> prodNulls p
Constraint p _ -> prodNulls p
Disamb p d -> Results $ do
ps <- unResults $ prodNulls p
ds <- unResults $ prodNulls d
pure $ ($ ps) =<< ds

-- | Remove (some) nulls from a production
removeNulls :: ProdR s r e t a -> ProdR s r e t a
Expand All @@ -58,6 +63,7 @@ removeNulls prod = case prod of
Many {} -> prod
Named p n -> Named (removeNulls p) n
Constraint p n -> Constraint (removeNulls p) n
Disamb p d -> Disamb (removeNulls p) d

type ProdR s r e t a = Prod (Rule s r) e t a

Expand Down Expand Up @@ -154,33 +160,40 @@ data Cont s r e t a b where
FinalCont :: ResultsCont s a c -> Cont s r e t a c

data Conts s r e t b a c = Conts
{ conts :: !(STRef s [Cont s r e t a c])
, contsInputs :: !(ResultsCont s b a)
, contsArgs :: !(STRef s (Maybe (STRef s (Results s a))))
{ conts :: !(STRef s [Cont s r e t a c])
, contsArgs :: !(ResultsCont s b a)
, contsResults :: !(STRef s (Maybe (STRef s (Results s a))))
}

mapContsArgs
:: Conts s r e t b a c
-> (ResultsCont s b a -> ResultsCont s b' a)
-> ST s (Conts s r e t b' a c)
mapContsArgs (Conts c a r) f = pure $ Conts c (f a) r

newConts :: STRef s [Cont s r e t a c] -> ST s (Conts s r e t a a c)
newConts r = Conts r C.id <$> newSTRef Nothing

contraMapCont :: ResultsCont s b a -> Cont s r e t a c -> Cont s r e t b c
contraMapCont f (Cont g p cs) = Cont (f >>> g) p cs
contraMapCont f (FinalCont args) = FinalCont (f >>> args)

contToState :: BirthPos -> Results s a -> Cont s r e t a c -> State s r e t c
contToState pos r (Cont g p cs) =
State
contToState :: BirthPos -> Results s a -> Cont s r e t a c -> ST s (State s r e t c)
contToState pos r (Cont g p cs) = do
cs' <- mapContsArgs cs (\args -> ResultsCont $ \f -> resultBind (args <<< resultArrs' f <<< g) r)
pure $ State
p
pos
cs{contsInputs = ResultsCont $ \f -> resultBind (contsInputs cs <<< resultArrs' f <<< g) r}
contToState _ r (FinalCont args) = Final $ resultBind args r
cs'
contToState _ r (FinalCont args) = pure $ Final $ resultBind args r

-- | Strings of non-ambiguous continuations can be optimised by removing
-- indirections.
simplifyCont :: Conts s r e t x b a -> ST s [Cont s r e t b a]
simplifyCont Conts {conts = cont} = readSTRef cont >>= go False
where
go !_ [Cont g (Pure f) cont'] = do
let args = contsInputs cont'
let args = contsArgs cont'
ks' <- simplifyCont cont'
go True $ map (contraMapCont $ args <<< resultArr' f <<< g) ks'
go True ks = do
Expand Down Expand Up @@ -282,38 +295,39 @@ parse (st:ss) env = case st of
-- and thus add (X → α a • β, j) to S(k+1)
-- In our case, advancing the dot past a terminal means applying the
-- results of that terminal to the input of the continuation
Just a ->
let args' = args <<< resultArr ($ a)
in parse ss env {next = State p Previous scont{contsInputs = args'}
: next env}
Just a -> do
scont' <- mapContsArgs scont (<<< resultArr ($ a))
parse ss env {next = State p Previous scont' : next env}
Nothing -> parse ss env
-- Prediction operation
-- For every state in S(k) of the form (X → α • Y β, j)...
NonTerminal r p -> do
rkref <- readSTRef $ ruleConts r
ks <- readSTRef rkref
writeSTRef rkref (Cont C.id (fmap fmap p) scont{contsInputs = args} : ks)
writeSTRef rkref (Cont C.id (fmap fmap p) scont{contsArgs = args} : ks)
ns <- unResults $ ruleNulls r
-- ...add (Y → • γ, k) to S(k) for every production in the grammar with Y
-- on the left-hand side (Y → γ).
let addNullState
| null ns = id
| otherwise = (:)
$ State p pos scont{contsInputs = ResultsCont $ \f -> args `resultBind` manyResults (liftA2 ($) f ns)}
| null ns = pure
| otherwise = \ss' -> do
scont' <- mapContsArgs scont (\args' -> ResultsCont $ \f -> args' `resultBind` manyResults (liftA2 ($) f ns))
pure $ State p pos scont': ss'
if null ks then do -- The rule has not been expanded at this position.
st' <- State (ruleProd r) Current <$> newConts rkref
parse (addNullState $ st' : ss)
env {reset = resetConts r >> reset env}
else -- The rule has already been expanded at this position.
parse (addNullState ss) env
ss' <- addNullState (st' : ss)
parse ss' env {reset = resetConts r >> reset env}
else do -- The rule has already been expanded at this position.
ss' <- addNullState ss
parse ss' env
-- Completion operation
-- For every state in S(k) of the form (Y → γ •, j)...
Pure a
-- Skip following continuations that stem from the current position; such
-- continuations are handled separately.
| pos == Current -> parse ss env
| otherwise -> do
let argsRef = contsArgs scont
let argsRef = contsResults scont
masref <- readSTRef argsRef
case masref of
Just asref -> do -- The continuation has already been followed at this position.
Expand All @@ -330,30 +344,38 @@ parse (st:ss) env = case st of
writeSTRef argsRef $ Just asref
ks <- simplifyCont scont
res <- lazyResults $ unResults =<< readSTRef asref
let kstates = map (contToState pos res) ks
kstates <- traverse (contToState pos res) ks
parse (kstates ++ ss)
env {reset = writeSTRef argsRef Nothing >> reset env}
-- We need to add p with a continuation which takes into account 'd'
Disamb p (Pure d) -> do
scont' <- mapContsArgs scont (<<< resultArr' d)
parse (State p pos scont' : ss) env
Disamb p d -> do
scont' <- newConts =<< newSTRef [Cont C.id d scont{contsArgs = args}]
parse (State p Previous scont' : ss) env
-- For every alternative, add a state for that production all pointing to
-- the same continuation.
Alts as (Pure f) -> do
let args' = args <<< resultArr f
sts = [State a pos scont{contsInputs = args'} | a <- as]
sts <- for as $ \a -> do
scont' <- mapContsArgs scont (<<< resultArr f)
pure $ State a pos scont'
parse (sts ++ ss) env
Alts as p -> do
scont' <- newConts =<< newSTRef [Cont C.id (fmap fmap p) scont{contsInputs = args}]
scont' <- newConts =<< newSTRef [Cont C.id (fmap fmap p) scont{contsArgs = args}]
let sts = [State a Previous scont' | a <- as]
parse (sts ++ ss) env
-- Rustle up a left-recursive non-terminal and add it to the states to be
-- processed next.
Many p q -> mdo
r <- mkRule $ pure [] <|> (:) <$> p <*> NonTerminal r (Pure id)
parse (State (NonTerminal r q) pos scont{contsInputs = args} : ss) env
parse (State (NonTerminal r q) pos scont{contsArgs = args} : ss) env
-- Insert a state for the named production, but add the name to the list of
-- names for this position
Named pr' n -> parse (State pr' pos scont{contsInputs = args} : ss)
Named pr' n -> parse (State pr' pos scont{contsArgs = args} : ss)
env {names = n : names env}
-- Insert a state whose continuation filters any results
Constraint pr' c -> parse (State pr' pos scont{contsInputs = test >>> args} : ss) env
Constraint pr' c -> parse (State pr' pos scont{contsArgs = test >>> args} : ss) env
where test = ResultsCont $ \xs -> manyResults (filter c xs)

type Parser e i a = forall s. i -> ST s (Result s e i a)
Expand Down

0 comments on commit ceba2b8

Please sign in to comment.