From 2e7825679033524f386179230161bbbcdaa37261 Mon Sep 17 00:00:00 2001 From: Jim Martens Date: Wed, 13 Nov 2013 17:50:20 +0100 Subject: [PATCH] =?UTF-8?q?ProSem:=20Outline,=20Bib-Datei=20und=20Ausgangs?= =?UTF-8?q?version=20Paper=20hinzugef=C3=BCgt.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- prosem/Outline.tex | 85 +++++++++++ prosem/prosem-ki.bib | 339 +++++++++++++++++++++++++++++++++++++++++ prosem/prosempaper.tex | 244 +++++++++++++++++++++++++++++ 3 files changed, 668 insertions(+) create mode 100755 prosem/Outline.tex create mode 100755 prosem/prosem-ki.bib create mode 100755 prosem/prosempaper.tex diff --git a/prosem/Outline.tex b/prosem/Outline.tex new file mode 100755 index 0000000..715d83b --- /dev/null +++ b/prosem/Outline.tex @@ -0,0 +1,85 @@ +\documentclass[10pt,a4paper,oneside,english,numbers=noenddot,titlepage]{scrartcl} +\usepackage[T1]{fontenc} +\usepackage[utf8]{inputenc} +\usepackage[english]{babel} +\usepackage{amsmath} +\usepackage{amsfonts} +\usepackage{amssymb} +\usepackage{paralist} +\usepackage{gauss} +\usepackage{pgfplots} +\usepackage[locale=DE,exponent-product=\cdot,detect-all]{siunitx} +\usepackage{tikz} +\usetikzlibrary{matrix,fadings,calc,positioning,decorations.pathreplacing,arrows,decorations.markings} +\usepackage{polynom} +\polyset{style=C, div=:,vars=x} +\pgfplotsset{compat=1.8} +\pagenumbering{arabic} +% ensures that paragraphs are separated by empty lines +\parskip 12pt plus 1pt minus 1pt +\parindent 0pt +% define how the sections are rendered +%\def\thesection{\arabic{section})} +%\def\thesubsection{\alph{subsection})} +%\def\thesubsubsection{(\roman{subsubsection})} +% some matrix magic +\makeatletter +\renewcommand*\env@matrix[1][*\c@MaxMatrixCols c]{% + \hskip -\arraycolsep + \let\@ifnextchar\new@ifnextchar + \array{#1}} +\makeatother +\addto{\captionsenglish}{\renewcommand{\refname}{Bibliography}} + +\begin{document} +\author{Jim Martens} +\title{Outline about ``With what methods can we understand natural language to build dialog systems?''} +%\title{Outline about "Mit welchen Methoden können wir natürliche Sprache verstehen um Dialogsysteme aufzubauen?"} +\maketitle +\section*{Abstract} + This is a placeholder for the abstract. +\tableofcontents +\clearpage + +\section{Introduction} + \begin{itemize} + \item two kinds of natural language: spoken language and written language + \item will concentrate on written language + \item important method for written language: parsing + \item different approaches for the kind of grammar being used + \end{itemize} +\section{Evaluation of approaches} + \subsection{CYK, PCFG, lexicalized PCFG, DCG} + \begin{itemize} + \item presents the context-free approach explained by Norvig and Russel\cite{Russel2010} + \end{itemize} + \subsection{Link Grammar} + \begin{itemize} + \item presents an alternative to PCFGs; referencing Sleator here\cite{Sleator1993} + \end{itemize} + \subsection{Dependency grammar} + \begin{itemize} + \item presents dependency grammar here, referencing Paskin\cite{Paskin2001} + \end{itemize} + \subsection{Categorial grammar} + \begin{itemize} + \item presents categorial grammars, using Clark\cite{Clark2004} here + \end{itemize} + +\section{Critical discussion} + \begin{itemize} + \item compares the presented grammar approaches with each other + \end{itemize} + +\section{Conclusion} + \begin{itemize} + \item summarizes the results of the critical discussion + \item depending on the results: may give an advice which approach is more useful/easier etc. + \end{itemize} + +\clearpage + +\bibliography{prosem-ki} +\bibliographystyle{ieeetr} +\addcontentsline{toc}{section}{Bibliography} +\end{document} \ No newline at end of file diff --git a/prosem/prosem-ki.bib b/prosem/prosem-ki.bib new file mode 100755 index 0000000..e27fb01 --- /dev/null +++ b/prosem/prosem-ki.bib @@ -0,0 +1,339 @@ +% This file was created with JabRef 2.9b2. +% Encoding: Cp1252 + +@INPROCEEDINGS{Brin1998, + author = {Brin, Sergey and Page, Lawrence}, + title = {The Anatomy of a Large-Scale Hypertextual Web Search Engine}, + booktitle = {Seventh World Wide Web Conference}, + year = {1998}, + keywords = {World Wide Web, Search Engines, Information Retrieval, PageRank, Google}, + owner = {jim}, + quality = {1}, + timestamp = {2013.10.29} +} + +@CONFERENCE{Clark2004, + author = {Clark, Stephen and Curran, James R.}, + title = {Parsing the {WSJ} using {CCG} and Log-Linear Models}, + booktitle = {Proceedings of the 42nd Annual Meeting of the Association for Computational + Linguistics}, + year = {2004}, + pages = {104-111}, + owner = {jim}, + quality = {1}, + timestamp = {2013.10.29} +} + +@CONFERENCE{Kessler1997, + author = {Kessler, Brett and Nunberg, Geoffrey and Schuetze, Hinrich}, + title = {Automatic Detection of Text Genre}, + booktitle = {Proceedings of the 35th Annual Meeting of the Association for Computational + Linguistics}, + year = {1997}, + pages = {32-38}, + owner = {jim}, + quality = {1}, + timestamp = {2013.10.29} +} + +@CONFERENCE{Klein2003, + author = {Klein, Dan and Smarr, Joseph and Nguyen, Huy and Manning, Christopher + D.}, + title = {Named Entity Recognition with Character-Level Models}, + booktitle = {Conference on Natural Learning (CoNLL)}, + year = {2003}, + pages = {180-183}, + owner = {jim}, + quality = {1}, + timestamp = {2013.10.29} +} + +@TECHREPORT{Paskin2001, + author = {Paskin, Mark A.}, + title = {Cubic-time Parsing and Learning Algorithms for Grammatical Bigram + Models}, + institution = {University of California}, + year = {2001}, + number = {UCB/CSD-01-1148}, + month = {June}, + abstract = {In Dependency Grammar there are head words and dependents. Each phrase + has only one head word. The head word determines how all of its dependents + may be syntactically combined with other words to form a sentence. + A head word and all of its dependents form a constituent. In every + sentence there may be one or more dependency relationships with one + head word each. + + Dependents that precede their head are called predependents and dependents + that follow their head are called postdependents. + + + A dependency parse consists of a set of dependency relationships that + satisfies three constraints: 1. Every word except one (the root) + is dependent to exactly one head. 2. The dependency relationships + are acyclic; no word is, through a sequence of dependency relationships, + dependent to itself. 3. When drawn as a graph above the sentence, + no two dependency relations cross - a property known as projectivity + or planarity. + + + The Grammatical Bigram Probability Model assumes that all the dependents + of a head word are independent of one another and their relative + order. This is a strong approximation as in full English there are + argument structure constraints that rely on the order of dependents. + This simplification allows for a reduced computational complexity + for parsing and learning. The grammar model falls into the class + of "Bilexical grammars". + + + A dependency parse consists of multiple spans. A span has at least + two words up to n words. Spans have one property: No word in the + span has a parent outside the span. Spans can be joined and closed. + To join the span one of them has to be connected (both end words + are connected with an edge) and both spans have to share one endword. + The new span will be connected if both subspans were connected. If + that is not the case, it can be closed by adding an edge between + the endwords of the new span. + + + Every dependency parse has a unique span decomposition. For joining + the left subspan has be simple. That means it has to have an edge + between its endwords or consist of two words only. Relying on this + ensures that each span is derived only once. + + + Every span has a signature. This signature states the indexes of its + endwords, if it is simple and whether the left or right endword have + parents within the span. Spans where both the left and right endword + have the parent within the string are called toplevel signatures + as such signatures characterize valid parses. + + + Parser operations take signatures as input rather than spans. They + produce signatures as well. SEED creates an unconnected and simple + span with two adjacent words. CLOSE-LEFT adds an edge between the + endwords and makes the left endword the parent of the right one. + CLOSE-RIGHT does the opposite and makes the right endword the parent + of the left one. These operators require that neither the left nor + the right endword have a parent within the span. + + + JOIN takes two input spans and joins them. It requires that the spans + share an endword (1.), the shared endword has one parent (2.) and + the left input is simple (3.). The JOIN rule applies only if the + left span doesn't start the sentence. + + + These operators constitute an algebra over span signatures called + span signature algebra. A derivation D is an expression in this algebra. + Like operations it evaluates to span signatures. These expressions + can be represented as trees where the nodes are operations. There + is an isomorphism between dependency parses and their corresponding + derivations. + + + Optimal derivation must consist of an operation over the results of + optimal sub-derivations. Therefore it is enough to record the parse + operation with the most likely derivation of a given signature in + order to reconstruct the most likely derivation of the entire sentence. + + + The chart-parse algorithm returns the optimal parse. It uses a subprocedure + called EXTRACT-OPT-PARSE that constructs the optimal parse by finding + the top-level signature (sigma) with maximum optimal probability + (pi*). It backtracks then recursively through the optimal derivation + defined by (omega*). If CLOSE operations are encountered edges are + recorded in the parse. The algorithm requires O(n³) time and O(n²) + space.}, + owner = {jim}, + quality = {1}, + timestamp = {2013.10.29} +} + +@INBOOK{Russel2010, + author = {Russel, Stuart J. and Norvig, Peter}, + title = {Artificial intelligence: A Modern Approach}, + booktitle = {Artificial intelligence: A Modern Approach}, + year = {2009}, + date = {December 11}, + bookauthor = {Russel, Stuart J. and Norvig, Peter}, + edition = {Third}, + series = {Prentice-Hall series in artificial intelligence}, + publisher = {Prentice Hall}, + chapter = {23}, + pages = {888-927}, + abstract = {The first method to understanding natural language is syntactic analysis + or parsing. The goal is to find the phrase structure of a sequence + of words according to the rules of the applied grammar. + + A strict top-to-bottom or bottom-to-top parsing can be inefficient. + Given two sentences with the same first 10 words and a difference + only from the 11th word on, parsing from left-to-right would force + the parser to make a guess about the nature of the sentence. But + it doesn't know if it's right until the 11th word. From there it + had to backtrack and reanalyze the sentence. + + + To prevent that dynamic programming is used. Every analyzed substring + gets stored for later. Once it is discovered that for example "the + students in section 2 of Computer Science 101" is a noun phrase, + this information can be stored in a structure known as chart. Algorithms + that do such storing are called chart parsers. One of this chart + parsers is a bottom-up version called CYK algorithm after its inventors + John Cocke, Daniel Younger and Tadeo Kasami. This algorithm requires + a grammar in the Chomsky Normal Form. The algorithm takes O(n²m) + space for the P table with n being the number of words in the sentence + and m the number of nonterminal symbols in the grammar. It takes + O(n³m) time whereas m is constant for a particular grammar. That's + why it is commonly described as O(n³). There is no faster algorithm + for general context-free grammars. + + + The CYK algorithm only co mputes the probability of the most probable + tree. The subtrees are all represented in P table. + + + PCFGs (Probabilistic context free grammars) have many rules with a + probability for each one of them. Learning the grammar from data + is better than a knowledge engineering approach. Learning is easiest + if we are given a corpus of correctly parsed sentences; commonly + known as a treebank. The best known treebank is the Penn Treebank + as it consists of 3 million words which have been annotated with + part of speech and parse-tree structure. Given an amount of trees, + a PCFG can be created just by counting and smoothing. + + + If no treebank is given it is still possible to learn the grammar + but it is more difficult. In such a case there are actually two problems: + First learning the structure of the grammar rules and second learning + the probabilities associated with them. + + + PCFGs have the problem that they are context-free. Combining a PCFG + and Markov model will get the best of both. This leads ultimately + to lexicalized PCFGs. But another problem of PCFGs is there preference + for short sentences. + + + Lexicalized PCFGs introduce so called head words. Such words are the + most important words in a phrase and the probabilities are calculated + between the head words. Example: "eat a banana" "eat" is the head + of the verb phrase "eat a banana", whereas "banana" is the head of + the noun phrase "a banana". Probability P1 now depends on "eat" and + "banana" and the result would be very high. If the head of the noun + phrase were "bandanna", the result would be significantly lower. + + + The next step are definite clause grammars. They can be used to parse + in a way of logical inference and makes it possible to reason about + languages and strings in many different ways. Furthermore augmentations + allow for distinctions in a single subphrase. For example the noun + phrase (NP) depends on the subject case and the person and number + of persons. A real world example would be "to smell". It is "I smell", + "you smell", "we smell", "you smell" and "they smell" but "he/she/it + smells". It depends on the person what version is taken. + + + Semantic interpretation is used to give sentences a meaning. This + is achieved through logical sentences. The semantics can be added + to an already augmented grammar (created during the previous step), + resulting in multiple augmentations at the same time. Chill is an + inductive logic programming program that can learn to achieve 70% + to 85% accuracy on various database query tasks. + + + But there are several complications as English is endlessly complex. + First there is the time at which things happened (present, past, + future). Second you have the so called speech act which is the speaker's + action that has to be deciphered by the hearer. The hearer has to + find out what type of action it is (a statement, a question, an order, + a warning, a promise and so on). Then there are so called long-distance + dependencies and ambiguity. The ambiguity can reach from lexical + ambiguity where a word has multiple usages, over syntactic ambiguity + where a sentence has multiple parses up to semantic ambiguity where + the meaning of and the same sentence can be different. Last there + is ambiguity between literal meaning and figurative meanings. + + + Finally there are four models that need to be combined to do disambiguation + properly: the world model, the mental model, the language model and + the acoustic model. + + + -- not so much an abstract of the specific content of that section + as an abstract about speech recognition in general -- + + + The second method is speech recognition. It has the added difficulty + that the words are not clearly separated and every speaker can pronounce + the same sentence with the same meaning different. An example is + "The train is approaching". Another written form would be "The train's + approaching". Both convey the same meaning in the written language. + But if a BBC, a CNN and a german news anchor speeks this sentence + it will sound dramatically different. Speech recognition has to deal + with that problem to get the written text associated with the spoken + words. From the text the first method can than be used to analyze + the words and find a meaning. Finally this meaning can be used to + create some kind of action in a dialog system. + + + -- + + + Some problems of speech recognition are segmentation, coarticulation + and homophones. Two used models are the acoustic model and the language + model. Another major model is the noisy channel model, named after + Claude Shannon (1948). He showed that the original message can always + be recovered in a noisy channel if the original message is encoded + in a redundant enough way. + + + The acoustic model in particular is used to get to the really interesting + parts. It is not interesting how words were spoken but more what + words where spoken. That means that not all available information + needs to be stored and a relative low sample rate is enough. 80 samples + at 8kHz with a frame length of about 10 milliseconds is enough for + that matter. To distinguish words so called phones are used. There + are 49 phones used in English. A phoneme is the smallest unit of + sound that has a distinct meaning to speakers of a particular language. + Back to the frames: every frame is summarized by a vector of features. + Features are important aspects of a speech signal. It can be compared + to listening to an orchestra and saying "here the French horns are + playing loudly and the violins are playing softly". Yet another difficulty + are dialect variations. + + + The language model should be learned from a corpus of transcripts + of spoken language. But such a thing is more difficult than building + an n-gram model of text, because it requires a hidden Markov model. + + + All in all speech recognition is most effective when used for a specific + task against a restricted set of options. A general purpose system + can only work accurately if it creates one model for every speaker. + Prominent examples like Apple's siri are therefore not very accurate.}, + owner = {jim}, + timestamp = {2013.10.24} +} + +@INPROCEEDINGS{Sleator1993, + author = {Sleator, Daniel D. K. and Temperley, Davy}, + title = {Parsing English with a Link Grammar}, + booktitle = {Third Annual Workshop on Parsing technologies}, + year = {1993}, + owner = {jim}, + quality = {1}, + timestamp = {2013.10.29} +} + +@CONFERENCE{Smith2008, + author = {Smith, David A. and Eisner, Jason}, + title = {Dependency Parsing by Belief Propagation}, + booktitle = {Conference on Empirical Methods in Natural Language Processing}, + year = {2008}, + date = {October 25 - October 27}, + pages = {145-156}, + owner = {jim}, + quality = {1}, + timestamp = {2013.10.29} +} + diff --git a/prosem/prosempaper.tex b/prosem/prosempaper.tex new file mode 100755 index 0000000..4464910 --- /dev/null +++ b/prosem/prosempaper.tex @@ -0,0 +1,244 @@ +\documentclass[12pt,twoside]{scrartcl} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% Meta informations: +\newcommand{\trauthor}{Jim Martens} +\newcommand{\trtype}{Proseminar Paper} %{Seminararbeit} %{Proseminararbeit} +\newcommand{\trcourse}{Proseminar Artificial Intelligence} +\newcommand{\trtitle}{Methods for understanding natural language} +\newcommand{\trmatrikelnummer}{6420323} +\newcommand{\tremail}{2martens@informatik.uni-hamburg.de} +\newcommand{\trarbeitsbereich}{Knowledge Technology, WTM} +\newcommand{\trdate}{10.02.2014} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% Languages: + +% Falls die Ausarbeitung in Deutsch erfolgt: +% \usepackage[german]{babel} +% \usepackage[T1]{fontenc} +% \usepackage[latin1]{inputenc} +% \usepackage[latin9]{inputenc} +% \selectlanguage{german} + +% If the thesis is written in English: +\usepackage[english]{babel} +\selectlanguage{english} +\addto{\captionsenglish}{\renewcommand{\refname}{Bibliography}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% Bind packages: +\usepackage{acronym} % Acronyms +\usepackage{algorithmic} % Algorithms and Pseudocode +\usepackage{algorithm} % Algorithms and Pseudocode +\usepackage{amsfonts} % AMS Math Packet (Fonts) +\usepackage{amsmath} % AMS Math Packet +\usepackage{amssymb} % Additional mathematical symbols +\usepackage{amsthm} +\usepackage{booktabs} % Nicer tables +%\usepackage[font=small,labelfont=bf]{caption} % Numbered captions for figures +\usepackage{color} % Enables defining of colors via \definecolor +\definecolor{uhhRed}{RGB}{254,0,0} % Official Uni Hamburg Red +\definecolor{uhhGrey}{RGB}{122,122,120} % Official Uni Hamburg Grey +\usepackage{fancybox} % Gleichungen einrahmen +\usepackage{fancyhdr} % Packet for nicer headers +%\usepackage{fancyheadings} % Nicer numbering of headlines + +%\usepackage[outer=3.35cm]{geometry} % Type area (size, margins...) !!!Release version +%\usepackage[outer=2.5cm]{geometry} % Type area (size, margins...) !!!Print version +%\usepackage{geometry} % Type area (size, margins...) !!!Proofread version +\usepackage[outer=3.15cm]{geometry} % Type area (size, margins...) !!!Draft version +\geometry{a4paper,body={5.8in,9in}} + +\usepackage{graphicx} % Inclusion of graphics +%\usepackage{latexsym} % Special symbols +\usepackage{longtable} % Allow tables over several parges +\usepackage{listings} % Nicer source code listings +\usepackage{multicol} % Content of a table over several columns +\usepackage{multirow} % Content of a table over several rows +\usepackage{rotating} % Alows to rotate text and objects +\usepackage[hang]{subfigure} % Allows to use multiple (partial) figures in a fig +%\usepackage[font=footnotesize,labelfont=rm]{subfig} % Pictures in a floating environment +\usepackage{tabularx} % Tables with fixed width but variable rows +\usepackage{url,xspace,boxedminipage} % Accurate display of URLs + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% Configurationen: + +\hyphenation{whe-ther} % Manually use: "\-" in a word: Staats\-ver\-trag + +%\lstloadlanguages{C} % Set the default language for listings +\DeclareGraphicsExtensions{.pdf,.svg,.jpg,.png,.eps} % first try pdf, then eps, png and jpg +\graphicspath{{./src/}} % Path to a folder where all pictures are located +\pagestyle{fancy} % Use nicer header and footer + +% Redefine the environments for floating objects: +\setcounter{topnumber}{3} +\setcounter{bottomnumber}{2} +\setcounter{totalnumber}{4} +\renewcommand{\topfraction}{0.9} %Standard: 0.7 +\renewcommand{\bottomfraction}{0.5} %Standard: 0.3 +\renewcommand{\textfraction}{0.1} %Standard: 0.2 +\renewcommand{\floatpagefraction}{0.8} %Standard: 0.5 + +% Tables with a nicer padding: +\renewcommand{\arraystretch}{1.2} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% Additional 'theorem' and 'definition' blocks: +\theoremstyle{plain} +\newtheorem{theorem}{Theorem}[section] +%\newtheorem{theorem}{Satz}[section] % Wenn in Deutsch geschrieben wird. +\newtheorem{axiom}{Axiom}[section] +%\newtheorem{axiom}{Fakt}[chapter] % Wenn in Deutsch geschrieben wird. +%Usage:%\begin{axiom}[optional description]%Main part%\end{fakt} + +\theoremstyle{definition} +\newtheorem{definition}{Definition}[section] + +%Additional types of axioms: +\newtheorem{lemma}[axiom]{Lemma} +\newtheorem{observation}[axiom]{Observation} + +%Additional types of definitions: +\theoremstyle{remark} +%\newtheorem{remark}[definition]{Bemerkung} % Wenn in Deutsch geschrieben wird. +\newtheorem{remark}[definition]{Remark} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% Provides TODOs within the margin: +\newcommand{\TODO}[1]{\marginpar{\emph{\small{{\bf TODO: } #1}}}} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% Abbreviations and mathematical symbols +\newcommand{\modd}{\text{ mod }} +\newcommand{\RS}{\mathbb{R}} +\newcommand{\NS}{\mathbb{N}} +\newcommand{\ZS}{\mathbb{Z}} +\newcommand{\dnormal}{\mathit{N}} +\newcommand{\duniform}{\mathit{U}} + +\newcommand{\erdos}{Erd\H{o}s} +\newcommand{\renyi}{-R\'{e}nyi} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% Document: +\begin{document} +\renewcommand{\headheight}{14.5pt} + +\fancyhead{} +\fancyhead[LE]{ \slshape \trauthor} +\fancyhead[LO]{} +\fancyhead[RE]{} +\fancyhead[RO]{ \slshape \trtitle} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% Cover Header: +\begin{titlepage} + \begin{flushleft} + Universit\"at Hamburg\\ + Department Informatik\\ + \trarbeitsbereich\\ + \end{flushleft} + \vspace{3.5cm} + \begin{center} + \huge \trtitle\\ + \end{center} + \vspace{3.5cm} + \begin{center} + \normalsize\trtype\\ + [0.2cm] + \Large\trcourse\\ + [1.5cm] + \Large \trauthor\\ + [0.2cm] + \normalsize Matr.Nr. \trmatrikelnummer\\ + [0.2cm] + \normalsize\tremail\\ + [1.5cm] + \Large \trdate + \end{center} + \vfill +\end{titlepage} + + %backsite of cover sheet is empty! +\thispagestyle{empty} +\hspace{1cm} +\newpage + +%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% Abstract: + +% Abstract gives a brief summary of the main points of a paper: +\section*{Abstract} + Your text here... + +% Lists: +\setcounter{tocdepth}{2} % depth of the table of contents (for Seminars 2 is recommented) +\tableofcontents +\pagenumbering{arabic} +\clearpage + +%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% Content: + +% the actual content, usually separated over a number of sections +% each section is assigned a label, in order to be able to put a +% crossreference to it + +\section{Introduction} +\label{sec:introduction} + + \begin{itemize} + \item two kinds of natural language: spoken language and written language + \item will concentrate on written language + \item important method for written language: parsing + \item different approaches for the kind of grammar being used + \end{itemize} + +\section{Evaluation of approaches} +\label{sec:evalApproaches} + \subsection{CYK, PCFG, lexicalized PCFG, DCG} + \label{subSec:RusselParsing} + \begin{itemize} + \item presents the context-free approach explained by Norvig and Russel\cite{Russel2010} + \end{itemize} + \subsection{Link Grammar} + \label{subSec:linkGrammar} + \begin{itemize} + \item presents an alternative to PCFGs; referencing Sleator here\cite{Sleator1993} + \end{itemize} + \subsection{Dependency grammar} + \label{subSec:dependencyGrammar} + \begin{itemize} + \item presents dependency grammar here, referencing Paskin\cite{Paskin2001} + \end{itemize} + \subsection{Categorial grammar} + \label{subSec:categorialGrammar} + \begin{itemize} + \item presents categorial grammars, using Clark\cite{Clark2004} here + \end{itemize} + +\section{Critical discussion} +\label{sec:critDiscussion} + +\section{Conclusion} +\label{sec:concl} + + \begin{itemize} + \item summarizes the results of the critical discussion + \item depending on the results: may give an advice which approach is more useful/easier etc. + \end{itemize} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% hier werden - zum Ende des Textes - die bibliographischen Referenzen +% eingebunden +% +% Insbesondere stehen die eigentlichen Informationen in der Datei +% ``bib.bib'' +% +\clearpage +\bibliography{prosem-ki} +\bibliographystyle{plain} +\addcontentsline{toc}{section}{Bibliography}% Add to the TOC + +\end{document} + +