mirror of https://github.com/2martens/uni.git
ProSem: Outline, Bib-Datei und Ausgangsversion Paper hinzugefügt.
This commit is contained in:
parent
1fe6f198e6
commit
2e78256790
|
@ -0,0 +1,85 @@
|
|||
\documentclass[10pt,a4paper,oneside,english,numbers=noenddot,titlepage]{scrartcl}
|
||||
\usepackage[T1]{fontenc}
|
||||
\usepackage[utf8]{inputenc}
|
||||
\usepackage[english]{babel}
|
||||
\usepackage{amsmath}
|
||||
\usepackage{amsfonts}
|
||||
\usepackage{amssymb}
|
||||
\usepackage{paralist}
|
||||
\usepackage{gauss}
|
||||
\usepackage{pgfplots}
|
||||
\usepackage[locale=DE,exponent-product=\cdot,detect-all]{siunitx}
|
||||
\usepackage{tikz}
|
||||
\usetikzlibrary{matrix,fadings,calc,positioning,decorations.pathreplacing,arrows,decorations.markings}
|
||||
\usepackage{polynom}
|
||||
\polyset{style=C, div=:,vars=x}
|
||||
\pgfplotsset{compat=1.8}
|
||||
\pagenumbering{arabic}
|
||||
% ensures that paragraphs are separated by empty lines
|
||||
\parskip 12pt plus 1pt minus 1pt
|
||||
\parindent 0pt
|
||||
% define how the sections are rendered
|
||||
%\def\thesection{\arabic{section})}
|
||||
%\def\thesubsection{\alph{subsection})}
|
||||
%\def\thesubsubsection{(\roman{subsubsection})}
|
||||
% some matrix magic
|
||||
\makeatletter
|
||||
\renewcommand*\env@matrix[1][*\c@MaxMatrixCols c]{%
|
||||
\hskip -\arraycolsep
|
||||
\let\@ifnextchar\new@ifnextchar
|
||||
\array{#1}}
|
||||
\makeatother
|
||||
\addto{\captionsenglish}{\renewcommand{\refname}{Bibliography}}
|
||||
|
||||
\begin{document}
|
||||
\author{Jim Martens}
|
||||
\title{Outline about ``With what methods can we understand natural language to build dialog systems?''}
|
||||
%\title{Outline about "Mit welchen Methoden können wir natürliche Sprache verstehen um Dialogsysteme aufzubauen?"}
|
||||
\maketitle
|
||||
\section*{Abstract}
|
||||
This is a placeholder for the abstract.
|
||||
\tableofcontents
|
||||
\clearpage
|
||||
|
||||
\section{Introduction}
|
||||
\begin{itemize}
|
||||
\item two kinds of natural language: spoken language and written language
|
||||
\item will concentrate on written language
|
||||
\item important method for written language: parsing
|
||||
\item different approaches for the kind of grammar being used
|
||||
\end{itemize}
|
||||
\section{Evaluation of approaches}
|
||||
\subsection{CYK, PCFG, lexicalized PCFG, DCG}
|
||||
\begin{itemize}
|
||||
\item presents the context-free approach explained by Norvig and Russel\cite{Russel2010}
|
||||
\end{itemize}
|
||||
\subsection{Link Grammar}
|
||||
\begin{itemize}
|
||||
\item presents an alternative to PCFGs; referencing Sleator here\cite{Sleator1993}
|
||||
\end{itemize}
|
||||
\subsection{Dependency grammar}
|
||||
\begin{itemize}
|
||||
\item presents dependency grammar here, referencing Paskin\cite{Paskin2001}
|
||||
\end{itemize}
|
||||
\subsection{Categorial grammar}
|
||||
\begin{itemize}
|
||||
\item presents categorial grammars, using Clark\cite{Clark2004} here
|
||||
\end{itemize}
|
||||
|
||||
\section{Critical discussion}
|
||||
\begin{itemize}
|
||||
\item compares the presented grammar approaches with each other
|
||||
\end{itemize}
|
||||
|
||||
\section{Conclusion}
|
||||
\begin{itemize}
|
||||
\item summarizes the results of the critical discussion
|
||||
\item depending on the results: may give an advice which approach is more useful/easier etc.
|
||||
\end{itemize}
|
||||
|
||||
\clearpage
|
||||
|
||||
\bibliography{prosem-ki}
|
||||
\bibliographystyle{ieeetr}
|
||||
\addcontentsline{toc}{section}{Bibliography}
|
||||
\end{document}
|
|
@ -0,0 +1,339 @@
|
|||
% This file was created with JabRef 2.9b2.
|
||||
% Encoding: Cp1252
|
||||
|
||||
@INPROCEEDINGS{Brin1998,
|
||||
author = {Brin, Sergey and Page, Lawrence},
|
||||
title = {The Anatomy of a Large-Scale Hypertextual Web Search Engine},
|
||||
booktitle = {Seventh World Wide Web Conference},
|
||||
year = {1998},
|
||||
keywords = {World Wide Web, Search Engines, Information Retrieval, PageRank, Google},
|
||||
owner = {jim},
|
||||
quality = {1},
|
||||
timestamp = {2013.10.29}
|
||||
}
|
||||
|
||||
@CONFERENCE{Clark2004,
|
||||
author = {Clark, Stephen and Curran, James R.},
|
||||
title = {Parsing the {WSJ} using {CCG} and Log-Linear Models},
|
||||
booktitle = {Proceedings of the 42nd Annual Meeting of the Association for Computational
|
||||
Linguistics},
|
||||
year = {2004},
|
||||
pages = {104-111},
|
||||
owner = {jim},
|
||||
quality = {1},
|
||||
timestamp = {2013.10.29}
|
||||
}
|
||||
|
||||
@CONFERENCE{Kessler1997,
|
||||
author = {Kessler, Brett and Nunberg, Geoffrey and Schuetze, Hinrich},
|
||||
title = {Automatic Detection of Text Genre},
|
||||
booktitle = {Proceedings of the 35th Annual Meeting of the Association for Computational
|
||||
Linguistics},
|
||||
year = {1997},
|
||||
pages = {32-38},
|
||||
owner = {jim},
|
||||
quality = {1},
|
||||
timestamp = {2013.10.29}
|
||||
}
|
||||
|
||||
@CONFERENCE{Klein2003,
|
||||
author = {Klein, Dan and Smarr, Joseph and Nguyen, Huy and Manning, Christopher
|
||||
D.},
|
||||
title = {Named Entity Recognition with Character-Level Models},
|
||||
booktitle = {Conference on Natural Learning (CoNLL)},
|
||||
year = {2003},
|
||||
pages = {180-183},
|
||||
owner = {jim},
|
||||
quality = {1},
|
||||
timestamp = {2013.10.29}
|
||||
}
|
||||
|
||||
@TECHREPORT{Paskin2001,
|
||||
author = {Paskin, Mark A.},
|
||||
title = {Cubic-time Parsing and Learning Algorithms for Grammatical Bigram
|
||||
Models},
|
||||
institution = {University of California},
|
||||
year = {2001},
|
||||
number = {UCB/CSD-01-1148},
|
||||
month = {June},
|
||||
abstract = {In Dependency Grammar there are head words and dependents. Each phrase
|
||||
has only one head word. The head word determines how all of its dependents
|
||||
may be syntactically combined with other words to form a sentence.
|
||||
A head word and all of its dependents form a constituent. In every
|
||||
sentence there may be one or more dependency relationships with one
|
||||
head word each.
|
||||
|
||||
Dependents that precede their head are called predependents and dependents
|
||||
that follow their head are called postdependents.
|
||||
|
||||
|
||||
A dependency parse consists of a set of dependency relationships that
|
||||
satisfies three constraints: 1. Every word except one (the root)
|
||||
is dependent to exactly one head. 2. The dependency relationships
|
||||
are acyclic; no word is, through a sequence of dependency relationships,
|
||||
dependent to itself. 3. When drawn as a graph above the sentence,
|
||||
no two dependency relations cross - a property known as projectivity
|
||||
or planarity.
|
||||
|
||||
|
||||
The Grammatical Bigram Probability Model assumes that all the dependents
|
||||
of a head word are independent of one another and their relative
|
||||
order. This is a strong approximation as in full English there are
|
||||
argument structure constraints that rely on the order of dependents.
|
||||
This simplification allows for a reduced computational complexity
|
||||
for parsing and learning. The grammar model falls into the class
|
||||
of "Bilexical grammars".
|
||||
|
||||
|
||||
A dependency parse consists of multiple spans. A span has at least
|
||||
two words up to n words. Spans have one property: No word in the
|
||||
span has a parent outside the span. Spans can be joined and closed.
|
||||
To join the span one of them has to be connected (both end words
|
||||
are connected with an edge) and both spans have to share one endword.
|
||||
The new span will be connected if both subspans were connected. If
|
||||
that is not the case, it can be closed by adding an edge between
|
||||
the endwords of the new span.
|
||||
|
||||
|
||||
Every dependency parse has a unique span decomposition. For joining
|
||||
the left subspan has be simple. That means it has to have an edge
|
||||
between its endwords or consist of two words only. Relying on this
|
||||
ensures that each span is derived only once.
|
||||
|
||||
|
||||
Every span has a signature. This signature states the indexes of its
|
||||
endwords, if it is simple and whether the left or right endword have
|
||||
parents within the span. Spans where both the left and right endword
|
||||
have the parent within the string are called toplevel signatures
|
||||
as such signatures characterize valid parses.
|
||||
|
||||
|
||||
Parser operations take signatures as input rather than spans. They
|
||||
produce signatures as well. SEED creates an unconnected and simple
|
||||
span with two adjacent words. CLOSE-LEFT adds an edge between the
|
||||
endwords and makes the left endword the parent of the right one.
|
||||
CLOSE-RIGHT does the opposite and makes the right endword the parent
|
||||
of the left one. These operators require that neither the left nor
|
||||
the right endword have a parent within the span.
|
||||
|
||||
|
||||
JOIN takes two input spans and joins them. It requires that the spans
|
||||
share an endword (1.), the shared endword has one parent (2.) and
|
||||
the left input is simple (3.). The JOIN rule applies only if the
|
||||
left span doesn't start the sentence.
|
||||
|
||||
|
||||
These operators constitute an algebra over span signatures called
|
||||
span signature algebra. A derivation D is an expression in this algebra.
|
||||
Like operations it evaluates to span signatures. These expressions
|
||||
can be represented as trees where the nodes are operations. There
|
||||
is an isomorphism between dependency parses and their corresponding
|
||||
derivations.
|
||||
|
||||
|
||||
Optimal derivation must consist of an operation over the results of
|
||||
optimal sub-derivations. Therefore it is enough to record the parse
|
||||
operation with the most likely derivation of a given signature in
|
||||
order to reconstruct the most likely derivation of the entire sentence.
|
||||
|
||||
|
||||
The chart-parse algorithm returns the optimal parse. It uses a subprocedure
|
||||
called EXTRACT-OPT-PARSE that constructs the optimal parse by finding
|
||||
the top-level signature (sigma) with maximum optimal probability
|
||||
(pi*). It backtracks then recursively through the optimal derivation
|
||||
defined by (omega*). If CLOSE operations are encountered edges are
|
||||
recorded in the parse. The algorithm requires O(n³) time and O(n²)
|
||||
space.},
|
||||
owner = {jim},
|
||||
quality = {1},
|
||||
timestamp = {2013.10.29}
|
||||
}
|
||||
|
||||
@INBOOK{Russel2010,
|
||||
author = {Russel, Stuart J. and Norvig, Peter},
|
||||
title = {Artificial intelligence: A Modern Approach},
|
||||
booktitle = {Artificial intelligence: A Modern Approach},
|
||||
year = {2009},
|
||||
date = {December 11},
|
||||
bookauthor = {Russel, Stuart J. and Norvig, Peter},
|
||||
edition = {Third},
|
||||
series = {Prentice-Hall series in artificial intelligence},
|
||||
publisher = {Prentice Hall},
|
||||
chapter = {23},
|
||||
pages = {888-927},
|
||||
abstract = {The first method to understanding natural language is syntactic analysis
|
||||
or parsing. The goal is to find the phrase structure of a sequence
|
||||
of words according to the rules of the applied grammar.
|
||||
|
||||
A strict top-to-bottom or bottom-to-top parsing can be inefficient.
|
||||
Given two sentences with the same first 10 words and a difference
|
||||
only from the 11th word on, parsing from left-to-right would force
|
||||
the parser to make a guess about the nature of the sentence. But
|
||||
it doesn't know if it's right until the 11th word. From there it
|
||||
had to backtrack and reanalyze the sentence.
|
||||
|
||||
|
||||
To prevent that dynamic programming is used. Every analyzed substring
|
||||
gets stored for later. Once it is discovered that for example "the
|
||||
students in section 2 of Computer Science 101" is a noun phrase,
|
||||
this information can be stored in a structure known as chart. Algorithms
|
||||
that do such storing are called chart parsers. One of this chart
|
||||
parsers is a bottom-up version called CYK algorithm after its inventors
|
||||
John Cocke, Daniel Younger and Tadeo Kasami. This algorithm requires
|
||||
a grammar in the Chomsky Normal Form. The algorithm takes O(n²m)
|
||||
space for the P table with n being the number of words in the sentence
|
||||
and m the number of nonterminal symbols in the grammar. It takes
|
||||
O(n³m) time whereas m is constant for a particular grammar. That's
|
||||
why it is commonly described as O(n³). There is no faster algorithm
|
||||
for general context-free grammars.
|
||||
|
||||
|
||||
The CYK algorithm only co mputes the probability of the most probable
|
||||
tree. The subtrees are all represented in P table.
|
||||
|
||||
|
||||
PCFGs (Probabilistic context free grammars) have many rules with a
|
||||
probability for each one of them. Learning the grammar from data
|
||||
is better than a knowledge engineering approach. Learning is easiest
|
||||
if we are given a corpus of correctly parsed sentences; commonly
|
||||
known as a treebank. The best known treebank is the Penn Treebank
|
||||
as it consists of 3 million words which have been annotated with
|
||||
part of speech and parse-tree structure. Given an amount of trees,
|
||||
a PCFG can be created just by counting and smoothing.
|
||||
|
||||
|
||||
If no treebank is given it is still possible to learn the grammar
|
||||
but it is more difficult. In such a case there are actually two problems:
|
||||
First learning the structure of the grammar rules and second learning
|
||||
the probabilities associated with them.
|
||||
|
||||
|
||||
PCFGs have the problem that they are context-free. Combining a PCFG
|
||||
and Markov model will get the best of both. This leads ultimately
|
||||
to lexicalized PCFGs. But another problem of PCFGs is there preference
|
||||
for short sentences.
|
||||
|
||||
|
||||
Lexicalized PCFGs introduce so called head words. Such words are the
|
||||
most important words in a phrase and the probabilities are calculated
|
||||
between the head words. Example: "eat a banana" "eat" is the head
|
||||
of the verb phrase "eat a banana", whereas "banana" is the head of
|
||||
the noun phrase "a banana". Probability P1 now depends on "eat" and
|
||||
"banana" and the result would be very high. If the head of the noun
|
||||
phrase were "bandanna", the result would be significantly lower.
|
||||
|
||||
|
||||
The next step are definite clause grammars. They can be used to parse
|
||||
in a way of logical inference and makes it possible to reason about
|
||||
languages and strings in many different ways. Furthermore augmentations
|
||||
allow for distinctions in a single subphrase. For example the noun
|
||||
phrase (NP) depends on the subject case and the person and number
|
||||
of persons. A real world example would be "to smell". It is "I smell",
|
||||
"you smell", "we smell", "you smell" and "they smell" but "he/she/it
|
||||
smells". It depends on the person what version is taken.
|
||||
|
||||
|
||||
Semantic interpretation is used to give sentences a meaning. This
|
||||
is achieved through logical sentences. The semantics can be added
|
||||
to an already augmented grammar (created during the previous step),
|
||||
resulting in multiple augmentations at the same time. Chill is an
|
||||
inductive logic programming program that can learn to achieve 70%
|
||||
to 85% accuracy on various database query tasks.
|
||||
|
||||
|
||||
But there are several complications as English is endlessly complex.
|
||||
First there is the time at which things happened (present, past,
|
||||
future). Second you have the so called speech act which is the speaker's
|
||||
action that has to be deciphered by the hearer. The hearer has to
|
||||
find out what type of action it is (a statement, a question, an order,
|
||||
a warning, a promise and so on). Then there are so called long-distance
|
||||
dependencies and ambiguity. The ambiguity can reach from lexical
|
||||
ambiguity where a word has multiple usages, over syntactic ambiguity
|
||||
where a sentence has multiple parses up to semantic ambiguity where
|
||||
the meaning of and the same sentence can be different. Last there
|
||||
is ambiguity between literal meaning and figurative meanings.
|
||||
|
||||
|
||||
Finally there are four models that need to be combined to do disambiguation
|
||||
properly: the world model, the mental model, the language model and
|
||||
the acoustic model.
|
||||
|
||||
|
||||
-- not so much an abstract of the specific content of that section
|
||||
as an abstract about speech recognition in general --
|
||||
|
||||
|
||||
The second method is speech recognition. It has the added difficulty
|
||||
that the words are not clearly separated and every speaker can pronounce
|
||||
the same sentence with the same meaning different. An example is
|
||||
"The train is approaching". Another written form would be "The train's
|
||||
approaching". Both convey the same meaning in the written language.
|
||||
But if a BBC, a CNN and a german news anchor speeks this sentence
|
||||
it will sound dramatically different. Speech recognition has to deal
|
||||
with that problem to get the written text associated with the spoken
|
||||
words. From the text the first method can than be used to analyze
|
||||
the words and find a meaning. Finally this meaning can be used to
|
||||
create some kind of action in a dialog system.
|
||||
|
||||
|
||||
--
|
||||
|
||||
|
||||
Some problems of speech recognition are segmentation, coarticulation
|
||||
and homophones. Two used models are the acoustic model and the language
|
||||
model. Another major model is the noisy channel model, named after
|
||||
Claude Shannon (1948). He showed that the original message can always
|
||||
be recovered in a noisy channel if the original message is encoded
|
||||
in a redundant enough way.
|
||||
|
||||
|
||||
The acoustic model in particular is used to get to the really interesting
|
||||
parts. It is not interesting how words were spoken but more what
|
||||
words where spoken. That means that not all available information
|
||||
needs to be stored and a relative low sample rate is enough. 80 samples
|
||||
at 8kHz with a frame length of about 10 milliseconds is enough for
|
||||
that matter. To distinguish words so called phones are used. There
|
||||
are 49 phones used in English. A phoneme is the smallest unit of
|
||||
sound that has a distinct meaning to speakers of a particular language.
|
||||
Back to the frames: every frame is summarized by a vector of features.
|
||||
Features are important aspects of a speech signal. It can be compared
|
||||
to listening to an orchestra and saying "here the French horns are
|
||||
playing loudly and the violins are playing softly". Yet another difficulty
|
||||
are dialect variations.
|
||||
|
||||
|
||||
The language model should be learned from a corpus of transcripts
|
||||
of spoken language. But such a thing is more difficult than building
|
||||
an n-gram model of text, because it requires a hidden Markov model.
|
||||
|
||||
|
||||
All in all speech recognition is most effective when used for a specific
|
||||
task against a restricted set of options. A general purpose system
|
||||
can only work accurately if it creates one model for every speaker.
|
||||
Prominent examples like Apple's siri are therefore not very accurate.},
|
||||
owner = {jim},
|
||||
timestamp = {2013.10.24}
|
||||
}
|
||||
|
||||
@INPROCEEDINGS{Sleator1993,
|
||||
author = {Sleator, Daniel D. K. and Temperley, Davy},
|
||||
title = {Parsing English with a Link Grammar},
|
||||
booktitle = {Third Annual Workshop on Parsing technologies},
|
||||
year = {1993},
|
||||
owner = {jim},
|
||||
quality = {1},
|
||||
timestamp = {2013.10.29}
|
||||
}
|
||||
|
||||
@CONFERENCE{Smith2008,
|
||||
author = {Smith, David A. and Eisner, Jason},
|
||||
title = {Dependency Parsing by Belief Propagation},
|
||||
booktitle = {Conference on Empirical Methods in Natural Language Processing},
|
||||
year = {2008},
|
||||
date = {October 25 - October 27},
|
||||
pages = {145-156},
|
||||
owner = {jim},
|
||||
quality = {1},
|
||||
timestamp = {2013.10.29}
|
||||
}
|
||||
|
|
@ -0,0 +1,244 @@
|
|||
\documentclass[12pt,twoside]{scrartcl}
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
% Meta informations:
|
||||
\newcommand{\trauthor}{Jim Martens}
|
||||
\newcommand{\trtype}{Proseminar Paper} %{Seminararbeit} %{Proseminararbeit}
|
||||
\newcommand{\trcourse}{Proseminar Artificial Intelligence}
|
||||
\newcommand{\trtitle}{Methods for understanding natural language}
|
||||
\newcommand{\trmatrikelnummer}{6420323}
|
||||
\newcommand{\tremail}{2martens@informatik.uni-hamburg.de}
|
||||
\newcommand{\trarbeitsbereich}{Knowledge Technology, WTM}
|
||||
\newcommand{\trdate}{10.02.2014}
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
% Languages:
|
||||
|
||||
% Falls die Ausarbeitung in Deutsch erfolgt:
|
||||
% \usepackage[german]{babel}
|
||||
% \usepackage[T1]{fontenc}
|
||||
% \usepackage[latin1]{inputenc}
|
||||
% \usepackage[latin9]{inputenc}
|
||||
% \selectlanguage{german}
|
||||
|
||||
% If the thesis is written in English:
|
||||
\usepackage[english]{babel}
|
||||
\selectlanguage{english}
|
||||
\addto{\captionsenglish}{\renewcommand{\refname}{Bibliography}}
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
% Bind packages:
|
||||
\usepackage{acronym} % Acronyms
|
||||
\usepackage{algorithmic} % Algorithms and Pseudocode
|
||||
\usepackage{algorithm} % Algorithms and Pseudocode
|
||||
\usepackage{amsfonts} % AMS Math Packet (Fonts)
|
||||
\usepackage{amsmath} % AMS Math Packet
|
||||
\usepackage{amssymb} % Additional mathematical symbols
|
||||
\usepackage{amsthm}
|
||||
\usepackage{booktabs} % Nicer tables
|
||||
%\usepackage[font=small,labelfont=bf]{caption} % Numbered captions for figures
|
||||
\usepackage{color} % Enables defining of colors via \definecolor
|
||||
\definecolor{uhhRed}{RGB}{254,0,0} % Official Uni Hamburg Red
|
||||
\definecolor{uhhGrey}{RGB}{122,122,120} % Official Uni Hamburg Grey
|
||||
\usepackage{fancybox} % Gleichungen einrahmen
|
||||
\usepackage{fancyhdr} % Packet for nicer headers
|
||||
%\usepackage{fancyheadings} % Nicer numbering of headlines
|
||||
|
||||
%\usepackage[outer=3.35cm]{geometry} % Type area (size, margins...) !!!Release version
|
||||
%\usepackage[outer=2.5cm]{geometry} % Type area (size, margins...) !!!Print version
|
||||
%\usepackage{geometry} % Type area (size, margins...) !!!Proofread version
|
||||
\usepackage[outer=3.15cm]{geometry} % Type area (size, margins...) !!!Draft version
|
||||
\geometry{a4paper,body={5.8in,9in}}
|
||||
|
||||
\usepackage{graphicx} % Inclusion of graphics
|
||||
%\usepackage{latexsym} % Special symbols
|
||||
\usepackage{longtable} % Allow tables over several parges
|
||||
\usepackage{listings} % Nicer source code listings
|
||||
\usepackage{multicol} % Content of a table over several columns
|
||||
\usepackage{multirow} % Content of a table over several rows
|
||||
\usepackage{rotating} % Alows to rotate text and objects
|
||||
\usepackage[hang]{subfigure} % Allows to use multiple (partial) figures in a fig
|
||||
%\usepackage[font=footnotesize,labelfont=rm]{subfig} % Pictures in a floating environment
|
||||
\usepackage{tabularx} % Tables with fixed width but variable rows
|
||||
\usepackage{url,xspace,boxedminipage} % Accurate display of URLs
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
% Configurationen:
|
||||
|
||||
\hyphenation{whe-ther} % Manually use: "\-" in a word: Staats\-ver\-trag
|
||||
|
||||
%\lstloadlanguages{C} % Set the default language for listings
|
||||
\DeclareGraphicsExtensions{.pdf,.svg,.jpg,.png,.eps} % first try pdf, then eps, png and jpg
|
||||
\graphicspath{{./src/}} % Path to a folder where all pictures are located
|
||||
\pagestyle{fancy} % Use nicer header and footer
|
||||
|
||||
% Redefine the environments for floating objects:
|
||||
\setcounter{topnumber}{3}
|
||||
\setcounter{bottomnumber}{2}
|
||||
\setcounter{totalnumber}{4}
|
||||
\renewcommand{\topfraction}{0.9} %Standard: 0.7
|
||||
\renewcommand{\bottomfraction}{0.5} %Standard: 0.3
|
||||
\renewcommand{\textfraction}{0.1} %Standard: 0.2
|
||||
\renewcommand{\floatpagefraction}{0.8} %Standard: 0.5
|
||||
|
||||
% Tables with a nicer padding:
|
||||
\renewcommand{\arraystretch}{1.2}
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
% Additional 'theorem' and 'definition' blocks:
|
||||
\theoremstyle{plain}
|
||||
\newtheorem{theorem}{Theorem}[section]
|
||||
%\newtheorem{theorem}{Satz}[section] % Wenn in Deutsch geschrieben wird.
|
||||
\newtheorem{axiom}{Axiom}[section]
|
||||
%\newtheorem{axiom}{Fakt}[chapter] % Wenn in Deutsch geschrieben wird.
|
||||
%Usage:%\begin{axiom}[optional description]%Main part%\end{fakt}
|
||||
|
||||
\theoremstyle{definition}
|
||||
\newtheorem{definition}{Definition}[section]
|
||||
|
||||
%Additional types of axioms:
|
||||
\newtheorem{lemma}[axiom]{Lemma}
|
||||
\newtheorem{observation}[axiom]{Observation}
|
||||
|
||||
%Additional types of definitions:
|
||||
\theoremstyle{remark}
|
||||
%\newtheorem{remark}[definition]{Bemerkung} % Wenn in Deutsch geschrieben wird.
|
||||
\newtheorem{remark}[definition]{Remark}
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
% Provides TODOs within the margin:
|
||||
\newcommand{\TODO}[1]{\marginpar{\emph{\small{{\bf TODO: } #1}}}}
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
% Abbreviations and mathematical symbols
|
||||
\newcommand{\modd}{\text{ mod }}
|
||||
\newcommand{\RS}{\mathbb{R}}
|
||||
\newcommand{\NS}{\mathbb{N}}
|
||||
\newcommand{\ZS}{\mathbb{Z}}
|
||||
\newcommand{\dnormal}{\mathit{N}}
|
||||
\newcommand{\duniform}{\mathit{U}}
|
||||
|
||||
\newcommand{\erdos}{Erd\H{o}s}
|
||||
\newcommand{\renyi}{-R\'{e}nyi}
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
% Document:
|
||||
\begin{document}
|
||||
\renewcommand{\headheight}{14.5pt}
|
||||
|
||||
\fancyhead{}
|
||||
\fancyhead[LE]{ \slshape \trauthor}
|
||||
\fancyhead[LO]{}
|
||||
\fancyhead[RE]{}
|
||||
\fancyhead[RO]{ \slshape \trtitle}
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
% Cover Header:
|
||||
\begin{titlepage}
|
||||
\begin{flushleft}
|
||||
Universit\"at Hamburg\\
|
||||
Department Informatik\\
|
||||
\trarbeitsbereich\\
|
||||
\end{flushleft}
|
||||
\vspace{3.5cm}
|
||||
\begin{center}
|
||||
\huge \trtitle\\
|
||||
\end{center}
|
||||
\vspace{3.5cm}
|
||||
\begin{center}
|
||||
\normalsize\trtype\\
|
||||
[0.2cm]
|
||||
\Large\trcourse\\
|
||||
[1.5cm]
|
||||
\Large \trauthor\\
|
||||
[0.2cm]
|
||||
\normalsize Matr.Nr. \trmatrikelnummer\\
|
||||
[0.2cm]
|
||||
\normalsize\tremail\\
|
||||
[1.5cm]
|
||||
\Large \trdate
|
||||
\end{center}
|
||||
\vfill
|
||||
\end{titlepage}
|
||||
|
||||
%backsite of cover sheet is empty!
|
||||
\thispagestyle{empty}
|
||||
\hspace{1cm}
|
||||
\newpage
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
% Abstract:
|
||||
|
||||
% Abstract gives a brief summary of the main points of a paper:
|
||||
\section*{Abstract}
|
||||
Your text here...
|
||||
|
||||
% Lists:
|
||||
\setcounter{tocdepth}{2} % depth of the table of contents (for Seminars 2 is recommented)
|
||||
\tableofcontents
|
||||
\pagenumbering{arabic}
|
||||
\clearpage
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
% Content:
|
||||
|
||||
% the actual content, usually separated over a number of sections
|
||||
% each section is assigned a label, in order to be able to put a
|
||||
% crossreference to it
|
||||
|
||||
\section{Introduction}
|
||||
\label{sec:introduction}
|
||||
|
||||
\begin{itemize}
|
||||
\item two kinds of natural language: spoken language and written language
|
||||
\item will concentrate on written language
|
||||
\item important method for written language: parsing
|
||||
\item different approaches for the kind of grammar being used
|
||||
\end{itemize}
|
||||
|
||||
\section{Evaluation of approaches}
|
||||
\label{sec:evalApproaches}
|
||||
\subsection{CYK, PCFG, lexicalized PCFG, DCG}
|
||||
\label{subSec:RusselParsing}
|
||||
\begin{itemize}
|
||||
\item presents the context-free approach explained by Norvig and Russel\cite{Russel2010}
|
||||
\end{itemize}
|
||||
\subsection{Link Grammar}
|
||||
\label{subSec:linkGrammar}
|
||||
\begin{itemize}
|
||||
\item presents an alternative to PCFGs; referencing Sleator here\cite{Sleator1993}
|
||||
\end{itemize}
|
||||
\subsection{Dependency grammar}
|
||||
\label{subSec:dependencyGrammar}
|
||||
\begin{itemize}
|
||||
\item presents dependency grammar here, referencing Paskin\cite{Paskin2001}
|
||||
\end{itemize}
|
||||
\subsection{Categorial grammar}
|
||||
\label{subSec:categorialGrammar}
|
||||
\begin{itemize}
|
||||
\item presents categorial grammars, using Clark\cite{Clark2004} here
|
||||
\end{itemize}
|
||||
|
||||
\section{Critical discussion}
|
||||
\label{sec:critDiscussion}
|
||||
|
||||
\section{Conclusion}
|
||||
\label{sec:concl}
|
||||
|
||||
\begin{itemize}
|
||||
\item summarizes the results of the critical discussion
|
||||
\item depending on the results: may give an advice which approach is more useful/easier etc.
|
||||
\end{itemize}
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
% hier werden - zum Ende des Textes - die bibliographischen Referenzen
|
||||
% eingebunden
|
||||
%
|
||||
% Insbesondere stehen die eigentlichen Informationen in der Datei
|
||||
% ``bib.bib''
|
||||
%
|
||||
\clearpage
|
||||
\bibliography{prosem-ki}
|
||||
\bibliographystyle{plain}
|
||||
\addcontentsline{toc}{section}{Bibliography}% Add to the TOC
|
||||
|
||||
\end{document}
|
||||
|
||||
|
Loading…
Reference in New Issue