%% =============
%%  CMPU-181, Spring 2013
%%  Definite Clause Grammars in Prolog
%%  April 17, 2013
%% =============
%%   Note about "regular expressions".
%%     The expression, a*, represents the set of all "words" that
%%     consist of zero or more a_s.  Thus, a* = { NIL, a, aa, aaa, ... }.
%%   Here_s a "grammar" (i.e., a set of rules) for generating the "words"
%%     in this "language":

pA --> [].  
pA --> [a], pA.

%%  The above rules are legal prolog syntax, even though they look much
%%  different from the kinds of prolog rules we have seen before.  Each can
%%  be translated into a form similar to what we have seen before.

%%  QUERY:  pA(Listy,[]).  <--- the [] is required for the DCG syntax.

%% Similarly, a*b*c* is a "regular expression" that represents the set of 
%% strings each of which consists of zero or more a_s, followed by zero or
%% more b_s, followed by zero or more c_s.  Thus, examples of strings in 
%% the set a*b*c* include:  abc, aaabccccc, ac, bc, bccc, abbbcccc.
%% Below, we_ll specify a grammar in prolog for the "words" in the "language" a*b*c*.

pS --> pA, pB, pC.

pB --> [].
pB --> [b], pB.

pC --> [].
pC --> [c], pC.

%% SAMPLE QUERY:  pS(Listy,[]).
%%                pS([a,X,Y,c],[]).

%% "Wrapper predicate"...

pSSSS(Listy) :- pS(Listy,[]).

%% ================================================================
%%  What_s going on under the covers?  DIFFERENCE LISTS!!
%% ================================================================
%%  Consider the following lists:
%%            Xs = [a,b,c,d,e,f,g,h,i]
%%            Ys =         [e,f,g,h,i]  <--- notice that Ys is the TAIL of Xs.
%%  The difference list, Xs/Ys, is the part of Xs that remains after
%%    subtracting off the tail, Ys.
%%  Thus:  Xs/Ys = [a,b,c,d]
%%
%%  Difference lists are convenient for describing grammars because
%%  they make it easy to encode "concatenation"...
%%  Here_s and example:
%%    Xs = [a,b,c,d,e,f,g,h,i]
%%    Ys =         [e,f,g,h,i]
%%    Zs =             [g,h,i]
%%  Notice that Xs/Ys = [a,b,c,d]  and Ys/Zs = [e,f].
%%  And:  Xs/Zs = Xs/Ys ++ Ys/Zs
%%  I.e.: [a,b,c,d,e,f] = [a,b,c,d] ++ [e,f].
%%
%%  Why is this kind of thing useful in grammars?
%%  Consider the rule:  S --> As, Bs, Cs.
%%  It says that a list of words forms a sentence, S, if it is
%%  the concatenation of a bunch of A_s, with a bunch of B_s, with a
%%  bunch of C_s.  
%% 
%%  Now, it is very laborious for us to use this difference-list notation.
%%  But prolog can "hide" the use of difference lists from us.
%%  We can focus, instead, on the friendlier DCG syntax!

%% -------------------------------------------------------

%%  sent(Xs) is a "wrapper predicate" that holds if Xs is a sentence
%%  contained in the language a*b*c*.

%%  The list Xs is represented by Xs/[] (i.e., Xs minus the empty list):

sent(Xs) :- s(Xs/[]).

%%  The following encodes the rule:  S --> ABC
%%    As/Xs, As/Bs, Bs/Cs, and Cs/Xs all represent lists of letters.
%%    Notice that because they are difference lists, we have:
%%      As/Xs is the result of concatenating As/Bs, Bs/Cs, and Cs/Xs.
%%    The reason?  The little part of As/Bs is the same as the big
%%                  part of Bs/Cs, and the little part of Bs/Cs is 
%%                  the same as the big part of Cs/Xs.

%%  s(As/Xs) holds if the list represented by As/Xs is 
%%    the concatenation of the lists represented by As/Bs, Bs/Cs and Cs/Xs,
%%    and a(As/Bs), b(Bs/Cs) and c(Cs/Xs) all hold.

s(As/Xs) :- a(As/Bs), b(Bs/Cs), c(Cs/Xs).  

%%  Example:
%%    As    = aaabbccccddd
%%    Xs    =          ddd
%%    Bs    =    bbccccddd
%%    Cs    =      ccccddd
%%    As/Bs = aaa
%%    Bs/Cs =    bb
%%    Cs/Xs =      cccc
 
%%  a(Xs/Ys) holds if Xs/Ys represents a list containing zero or more a_s.

%%  Base Case:  Zero a_s 

a(Xs/Xs).  %% <---- Xs/Xs represents the empty list!!

%%  Recursive Case:  One or more a_s:

a([a|Xs]/Ys) :- a(Xs/Ys).

%%  Similar rules for lists of zero or more b_s, and zero or more c_s.

b(Xs/Xs).
b([b|Xs]/Ys) :- b(Xs/Ys).

c(Xs/Xs).
c([c|Xs]/Ys) :- c(Xs/Ys).

%%   Try:  sent([a,a,b,c]).
%%         sent([a,X,c]).
%%         sent([X,X,X,X]).
%%         sent([X,Y,Z]).

%% =====================================
%%  DCG for English grammar!
%% =====================================

%%  Sentence is a noun phrase followed by a verb phrase.
%%   NOTE:  xS, xNP and xVP are all PREDICATES.

xS --> xNP, xVP.

%% Building noun phrases...

xNP --> xPropNoun.
xNP --> xDet, xNoun.

%% Some words...

xDet --> [a].
xDet --> [the].
xNoun --> [boy].
xNoun --> [girl].
xPropNoun --> [luke].
xPropNoun --> [barack].

%% Building verb phrases...

xVP --> xIVerb.
xVP --> xTVerb, xNP.
xVP --> xIVerb, xIObj.
xIObj --> xPrep, xNP.

%% Some more words...

xIVerb --> [runs].
xPrep --> [to].
xPrep --> [from].
xTVerb --> [trips].

%% Try this query:  xS(Ss,[]) <------ i.e., Ss/[] is the list of words

%% Or, better yet, define a wrapper predicate:

%% xxxx(Ss) -- holds if Ss is a sentence accepted by the above grammar

xxxx(Ss) :- xS(Ss,[]).  %% <--- Notice that we are supplying the
                       %%      two (usually hidden) inputs to xS.

%% Try this query:  xxxx([A,B,C,D]).


%% =======================================================
%%  Adding extra arguments to the predicates
%% =======================================================
%%  For example, below we add an extra TENSE argument.
%%  The Tense variable can have values such as present,
%%  past, infinitive, etc.

%%  A sentence is formed by a noun phrase followed by a verb phrase.
%%  The tense of the verb phrase must be the same as the tense of
%%  the sentence.

xS(Tense) --> xNP, xVP(Tense).

%%  Building verb phrases using a Tense variable to ensure that
%%  the tenses of the parts match the tense of the whole.

xVP(Tense) --> xIVerb(Tense).
xVP(Tense) --> xTVerb(Tense), xNP.
xVP(Tense) --> xIVerb(Tense), xIObj.

%%  Individual words.  Here, the tense of a given word is
%%  specified by a constant (e.g., past or present).

xIVerb(present) --> [runs].
xIVerb(inf) --> [run].
xIVerb(past) --> [ran].
xTVerb(present) --> [trips].
xTVerb(inf) --> [trip].
xTVerb(past) --> [tripped].

%%  xHelper:  holds for the helper verbs "does", "did", etc.

xHelper(present) --> [does].
xHelper(past) --> [did].

%% QUERIES:  xS(past, Listy, []).
%%           XS(present, Listy, []).

%%  Forming questions from a sentence...
%%  If S(present) = NP, VP(present) is a sentence then 
%%   Q = [does], NP, VP(infinitive) is a corresponding question.

xQ(Tense) --> xHelper(Tense), xNP, xVP(inf).

%% Try:  xQ(present, Q, []). 

%% Or, better yet, define a wrapper predicate:
%%  xQQQ(Ss) holds if Ss is a question accepted by the above grammar.
%%  Notice that xQ is given its three arguments:  the 
%%  Tense argument followed by the two (normally hidden) arguments
%%  representing the big and little parts of a difference list:

xQQQ(Ss,Tense) :- xQ(Tense, Ss, []). 

%%  Try this query:  xQQQ([A,B,C,D,E],past).  <--- arbitrary five-word 
%%                                                 question in past tense.


%% ==========================
%%  Generating parse trees
%% ==========================
%%  Below, we include an extra argument representing a parse tree.
%%  Parse trees are represented by FUNCTIONAL terms, using the
%%  functional symbols, s, np, vp, and so on.

yS(s(NPTree,VPTree)) --> yNP(NPTree), yVP(VPTree).

yNP(np(propNoun(Word))) --> yPropNoun(Word).
yVP(vp(iVerb(Word))) --> yIVerb(Word).

yVP(vp(tVerb(Word),NPTree)) --> yTVerb(Word), yNP(NPTree).

%% Some individual words:

yPropNoun(luke) --> [luke].
yIVerb(runs) --> [runs].
yTVerb(congratulates) --> [congratulates].

%% Try this query:  yS(Tree, Ss, []).

%% Or, define a wrapper predicate:

yyyyS(Ss,Tree) :- yS(Tree, Ss, []).

%% Try this query:  yyyyS([A,B,C], Tree).

%% Or:  yyyyS([luke,congratulates,luke],Tree).

%% =======================================

%% A grammar for lists...

myList --> leftParen, elements, rightParen.

leftParen --> ['('].
rightParen --> [')'].

elements --> [].
elements --> element, elements.

element --> myList.
element --> [x].