Paper angefangen auszuformulieren.

2013-11-17 15:49:47 +01:00 · 2013-11-17 15:49:47 +01:00 · 9f32168252
parent bfd88c5a98
commit 9f32168252
2 changed files with 184 additions and 37 deletions
--- a/prosem/prosem-ki.bib
+++ b/prosem/prosem-ki.bib
@ -24,6 +24,89 @@
  timestamp = {2013.10.29}
 }

+@INBOOK{Jurafsky2009,
+  chapter = {18},
+  pages = {617--644},
+  title = {Speech and Language Processing},
+  publisher = {Pearson},
+  year = {2009},
+  author = {Jurafsky, Daniel and Martin, James H.},
+  series = {Prentice-Hall series in artificial intelligence},
+  edition = {Second},
+  abstract = {Sentences get their meanings from the words they contain and the syntactic
+	order of the words. Therefore the meaning of a sentence is partially
+	based on the words and its syntactic structure. The composition of
+	meaning representation is guided by the syntactic components and
+	relations provided by grammars such as CFGs.
+	
+	
+	A meaning representation is generated by first sending the input through
+	a parser which results in the syntactic analysis and second passing
+	this analysis as input to a semantic analyzer.
+	
+	
+	In the syntax-driven semantic analysis it is assumed that syntactic,
+	lexical and anaphoric ambiguities are not a problem.
+	
+	
+	The semantic meanings are attached to the grammar rules and lexical
+	entries from which trees are generated in the first place. This is
+	called rule-to-rule hypothesis.
+	
+	
+	The semantic attachments are written in braces after the syntactic
+	rules themselves.
+	
+	
+	After the syntactic analysis has been created, every word receives
+	a FOL predicate and/or term. The semantic analyzer goes the tree
+	up until the complete FOL term has been created. On the way lambda
+	reduction is used to replace predicates and terms with their proper
+	meanings, received from other parts of the tree.},
+  booktitle = {Speech and Language Processing},
+  owner = {jim},
+  quality = {1},
+  timestamp = {2013.11.16}
+}
+
+@INBOOK{Jurafsky2009a,
+  chapter = {17},
+  pages = {579--616},
+  title = {Speech and Language Processing},
+  publisher = {Pearson},
+  year = {2009},
+  author = {Jurafsky, Daniel and Martin, James H.},
+  series = {Prentice-Hall series in artificial intelligence},
+  edition = {Second},
+  abstract = {Lambda notation is used to bind variables dynamically to later appearing
+	contents.
+	
+	
+	lambda x P(x)(y) results in P(y) after a lambda reduction as x has
+	been bound to y.
+	
+	lambda P P(x)(lambda x Restaurant(x)) results in lambda x Restaurant(x)(x)
+	which results in Restaurant(x)},
+  booktitle = {Speech and Language Processing},
+  owner = {jim},
+  quality = {1},
+  timestamp = {2013.11.16}
+}
+
+@INBOOK{Jurafsky2009b,
+  chapter = {13},
+  pages = {461--492},
+  title = {Speech and Language Processing},
+  publisher = {Pearson},
+  year = {2009},
+  author = {Jurafsky, Daniel and Martin, James H.},
+  series = {Prentice-Hall series in artificial intelligence},
+  edition = {Second},
+  owner = {jim},
+  quality = {1},
+  timestamp = {2013.11.17}
+}
+
@CONFERENCE{Kessler1997,
  author = {Kessler, Brett and Nunberg, Geoffrey and Schuetze, Hinrich},
  title = {Automatic Detection of Text Genre},
@ -150,17 +233,14 @@
 }

@INBOOK{Russel2010,
-  author = {Russel, Stuart J. and Norvig, Peter},
-  title = {Artificial intelligence: A Modern Approach},
-  booktitle = {Artificial intelligence: A Modern Approach},
-  year = {2009},
-  date = {December 11},
-  bookauthor = {Russel, Stuart J. and Norvig, Peter},
-  edition = {Third},
-  series = {Prentice-Hall series in artificial intelligence},
-  publisher = {Prentice Hall},
  chapter = {23},
-  pages = {888-927},
+  pages = {888--927},
+  title = {Artificial intelligence: A Modern Approach},
+  publisher = {Pearson},
+  year = {2009},
+  author = {Russel, Stuart J. and Norvig, Peter},
+  series = {Prentice-Hall series in artificial intelligence},
+  edition = {Third},
  abstract = {The first method to understanding natural language is syntactic analysis
 	or parsing. The goal is to find the phrase structure of a sequence
 	of words according to the rules of the applied grammar.
@ -311,6 +391,9 @@
 	task against a restricted set of options. A general purpose system
 	can only work accurately if it creates one model for every speaker.
 	Prominent examples like Apple's siri are therefore not very accurate.},
+  bookauthor = {Russel, Stuart J. and Norvig, Peter},
+  booktitle = {Artificial intelligence: A Modern Approach},
+  date = {December 11},
  owner = {jim},
  timestamp = {2013.10.24}
 }
@ -330,8 +413,8 @@
  title = {Dependency Parsing by Belief Propagation},
  booktitle = {Conference on Empirical Methods in Natural Language Processing},
  year = {2008},
-  date = {October 25 - October 27},
  pages = {145-156},
+  date = {October 25 - October 27},
  owner = {jim},
  quality = {1},
  timestamp = {2013.10.29}
--- a/prosem/prosempaper.tex
+++ b/prosem/prosempaper.tex
@ -64,6 +64,8 @@
 % Configurationen:

 \hyphenation{whe-ther} 									% Manually use: "\-" in a word: Staats\-ver\-trag
+\hyphenation{spe-ci-fies}
+\hyphenation{spe-ci-fi-ca-tion}

 %\lstloadlanguages{C}                   % Set the default language for listings
 \DeclareGraphicsExtensions{.pdf,.svg,.jpg,.png,.eps} % first try pdf, then eps, png and jpg
@ -189,42 +191,104 @@
 	\begin{itemize}
 		\item	two kinds of natural language: spoken language and written language
 		\item	will concentrate on written language
-		\item	important method for written language: parsing
-		\item	different approaches for the kind of grammar being used
+		\item	definition of syntax, semantics and pragmatics
+		\item	two important methods: syntactic parsing and semantic analysis
 	\end{itemize}

-\section{Evaluation of approaches}
-\label{sec:evalApproaches}
-	\subsection{CYK, PCFG, lexicalized PCFG, DCG}
-	\label{subSec:RusselParsing}
-		\begin{itemize}
-			\item	presents the context-free approach explained by Norvig and Russel\cite{Russel2010}
-		\end{itemize}
-	\subsection{Link Grammar}
-	\label{subSec:linkGrammar}
-		\begin{itemize}
-			\item	presents an alternative to PCFGs; referencing Sleator here\cite{Sleator1993}
-		\end{itemize}
-	\subsection{Dependency grammar}
-	\label{subSec:dependencyGrammar}
-		\begin{itemize}
-			\item	presents dependency grammar here, referencing Paskin\cite{Paskin2001}
-		\end{itemize}
-	\subsection{Categorial grammar}
-	\label{subSec:categorialGrammar}
-		\begin{itemize}
-			\item	presents categorial grammars, using Clark\cite{Clark2004} here
-		\end{itemize}
+\section{Evaluation of methods}
+\label{sec:evalMethods}
+	\subsection{Syntactic Parsing}
+	\label{subSec:syntacticParsing}
+		Syntactic Parsing is used to create parse trees. These can be used for grammar checks in a text editor: ``A sentence that cannot be parsed may have grammatical errors''\footnote{\cite{Jurafsky2009b}, page 461}. But they more likely ``serve as an important intermediate stage of representation for semantic analysis''\footnote{\cite{Jurafsky2009b}, page 461}. There are different algorithms available to create such trees. The CYK\footnote{named after inventors John Cocke, Daniel Younger and Tadeo Kasami} algorithm will be explained further. But before the CYK algorithm is explained, the reason for its existance is presented.
+		
+		There are two classical ways of parsing a sentence. The one is bottom-up and the other one is top-down. Both approaches have their own advantages and disadvantages. In addition the ambiguity creates problems. To implement bottom-up and top-down search algorithms in the face of ambiguity, ``an agenda-based backtracking strategy''\footnote{\cite{Jurafsky2009b}, page 468} is used. The problem here is that every time the parser recognizes that the current parse tree is wrong, it has to backtrack and explore other parts of the sentence. This creates a huge amount of work duplication and is therefore inefficient.
+		
+		A solution to these problems is offered by ``dynamic programming parsing methods''\footnote{\cite{Jurafsky2009b}, page 469}. The CYK algorithm is one of multiple algorithms based on dynamic programming.
+		
+		The CYK does only work with grammars in the Chomsky Normal Form (CNF). Every context-free grammar can be converted to CNF without loss in expressiveness. Therefore this restriction does no harm but simplifies the parsing. For information on how context-free grammars can be converted to CNF, refer to Jurafsky\cite{Jurafsky2009b}.
+		
+		CYK requires $\mathcal{O}(n^{2}m)$ space for the $P$ table (a table with probabilities), where ``$m$ is the number of nonterminal symbols in the grammar''\footnote{\cite{Russel2010}, page 893}, and uses $\mathcal{O}(n^{3}m)$ time. ``$m$ is constant for a particular grammar, [so it] is commonly described as $\mathcal{O}(n^{3})$''\footnote{\cite{Russel2010}, page 893}. There is no algorithm that is better than CYK for general context-free grammars\cite{Russel2010}. 
+		
+		But how does CYK work? CYK doesn't examine all parse trees. It just examines the most probable one and computes the probability of that tree. All the other parse trees are present in the $P$ table and could be enumerated with a little work (in exponential time). But the strength and beauty of CYK is, that they don't have to be enumerated. CYK defines ``the complete state space defined by the `apply grammar rule' operator''\footnote{\cite{Russel2010}, page 894}. You can search just a part of this space with $A^{*}$ search.\cite{Russel2010} ``With the $A^{*}$ algorithm [...] the first parse found will be the most probable''\footnote{\cite{Russel2010}, page 895}.
+		
+		But these probabilities need to be learned from somewhere. This somewhere is usually a ``treebank''\footnote{\cite{Russel2010}, page 895}, which contains a corpus of correctly parsed sentences. The best known is the Penn Treebank\cite{Russel2010}, which ``consists of 3 million words which have been annotated with part of speech and parse-tree structure, using human labor assisted by some automated tools''\footnote{\cite{Russel2010}, page 895}. The probabilities are then computed by counting and smoothing in the given data.\cite{Russel2010} There are other ways to learn the probabilities that are more difficult. For more information refer to Russel\cite{Russel2010}.
+		
+	\subsection{Semantic Analysis}
+	\label{subSec:semanticAnalysis}
+	
+		Semantic analysis provides multiple approaches. In this paper the approach of ``syntax-driven semantic analysis''\footnote{\cite{Jurafsky2009}, page 617} is explained further. In this approach the output of a parser, the syntactic analysis, ``is passed as input to a semantic analyzer to produce a meaning representation''\footnote{\cite{Jurafsky2009}, page 618}.

+		Therefore context-free grammar rules are augmented with ``semantic attachments''\footnote{\cite{Jurafsky2009}, page 618}. Every word and syntactic structure in a sentence gets such a semantic attachment. The tree with syntactic components is now traversed in a bottom-up manner. On the way the semantic attachments are combined to finally produce ``First-Order Logic''\footnote{\cite{Jurafsky2009a}, page 589} that can be interpreted in a meaningful way. This procedure has some prerequisites that will be explained first.
+		
+		The mentioned \textit{First-Order Logic} can be represented by a context-free grammar specification. It is beyond this paper to describe this specification completely. Jurafsky\cite{Jurafsky2009a} provides a detailed picture of the specification with all elements in figure 17.3. The most important aspects of this specification are explained here. The logic provides terms which can be functions, constants and variables. Functions have a term as argument. Syntactically they are the same as single-argument predicates. But functions represent one unique object.
+		Predicates can have multiple terms as arguments. In addition the logic provides quantifiers ($\forall, \exists$) and connectives ($\wedge, \vee, \Rightarrow$).
+		
+		Another prerequisite is the ``lambda notation''\footnote{\cite{Jurafsky2009a}, page 593}. A simple example of this notation is an expression of the following form\footnote{examples taken from Jurafsky\cite{Jurafsky2009a}, pp. 593-594}:		
+		\[
+			\lambda x.P(x)
+		\]
+		
+		The $\lambda$ can be reduced in a so called ``$\lambda$-reduction''\footnote{\cite{Jurafsky2009a}, page 593}. The expression above could be reduced in the following way:
+		
+		\begin{alignat*}{2}
+			\lambda x.&P(x)&(A) \\
+			&P(A)&
+		\end{alignat*}
+		
+		Those expressions can be extended to $n$ such $\lambda$s. An example is this expression:
+		\[
+			\lambda x.\lambda y.Near(x,y)
+		\]
+				
+		This expression can be reduced in multiple steps.
+		\begin{alignat*}{1}
+			\lambda x.\lambda y.&Near(x,y)(Bacaro) \\
+			\lambda y.&Near(Bacaro, y)(Centro) \\
+			&Near(Bacaro, Centro)
+		\end{alignat*}
+				
+		This technique is called ``currying''\footnote{\cite{Jurafsky2009a}, page 594} and is used to convert ``a predicate with multiple arguments into a sequence of single-argument predicates''\footnote{\cite{Jurafsky2009a}, page 594}.
+		
+		After the prerequisites are now explained, it is time to start with the actual syntax-driven semantic analysis. It will be shown with an example provided by Jurafsky. Assume the sentence \textit{Every restaurant closed}. ``The target representation for this example should be the following''\footnote{\cite{Jurafsky2009}, page 621}.
+		
+		\begin{equation}
+		\label{eq:tarRep}
+			\forall x \,Restaurant(x) \Rightarrow \exists e \,Closed(e) \wedge ClosedThing(e,x)
+		\end{equation}
+		
+		The first step is to determine what the meaning representation of \textit{Every restaurant} should be. \textit{Every} is responsible for the $\forall$ quantifier and \textit{restaurant} specifies the category over which is quantified. This is called the ``restriction''\footnote{\cite{Jurafsky2009}, page 622} of the noun phrase. The meaning representation could be $\forall x\,Restaurant(x)$. It is a valid logical formula but it doesn't make much sense. ``It says that everything is a restaurant.''\footnote{\cite{Jurafsky2009}, page 622} ``Noun phrases like [this] are [usually] embedded in expressions that [say] something about the universally quantified variable. That is, we're probably trying to \textit{say something} about all restaurants. This notion is traditionally referred to as the \textit{NP}'s nuclear scope''\footnote{\cite{Jurafsky2009}, page 622}. In the given example, the nuclear scope is \textit{closed}. To represent this notion in the target representation, a dummy predicate $Q$ is added, which results in this expression:
+		\[
+			\forall x\,Restaurant(x) \Rightarrow Q(x)
+		\]
+		To replace $Q$ with something meaningful, the $\lambda$ notation is needed.
+		\[
+			\lambda Q.\forall x\,Restaurant(x) \Rightarrow Q(x)
+		\] 
+		After more generalization, this is the result:
+		\[
+			\lambda P.\lambda Q.\forall x\,P(x) \Rightarrow Q(x)
+		\]
+		What happened? The descriptor \textit{every} gets this last expression as semantic attachment. The noun \textit{restaurant} gets $\lambda x.Restaurant(x)$. When combined, the second expression is the result. The verb is still missing. Therefore the verb \textit{closed} gets the following expression.
+		\[
+			\lambda x.\exists e\,Closed(e) \wedge ClosedThing(e,x)
+		\]
+		After combining the formulas of the verb and the noun phrase, the previously shown target representation\eqref{eq:tarRep} is the result.
+		
+		This example is just one of many, but it shows how semantic meaning can be attached to syntactic components. Furthermore it should be clear now, how semantic analysis in a syntax-driven approach works.
 \section{Critical discussion}
 \label{sec:critDiscussion}

+	Now that both methods have been presented with one selected approach each, it is time to discuss them critically. The CYK algorithm solves many problems like ambiguity; at least to a certain degree. But it also is problematic, because of the restriction to CNF. While in theory every context-free grammar can be converted to CNF, in practice it poses ``some non-trivial problems''\footnote{\cite{Jurafsky2009b}, page 475}. One of this problems can be explored in conjunction with the second presented method (semantic analysis). ``[T]he conversion to CNF will complicate any syntax-driven approach to semantic analysis''\footnote{\cite{Jurafsky2009b}, page 475}. A solution to this problem is some kind of post-processing in which the trees are converted back to the original grammar.\cite{Jurafsky2009b} Another option is to use a more complex dynamic programming algorithm that accepts any kind of context-free grammar. Such an algorithm is the ``Earley Algorithm''\footnote{\cite{Jurafsky2009b}, page 477}.
+
+	\begin{itemize}
+		\item	compares syntactic parsing and semantic analysis based on these criteria: meaning, complexity, usability for actual communication
+	\end{itemize}
+
 \section{Conclusion}
 \label{sec:concl}

 	\begin{itemize}
-	 	\item	summarizes the results of the critical discussion
-	 	\item	depending on the results: may give an advice which approach is more useful/easier etc.
+	 	\item	
 	 \end{itemize}

 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%