mirror of
https://github.com/2martens/uni.git
synced 2026-05-06 11:26:25 +02:00
ProSem: Outline, Bib-Datei und Ausgangsversion Paper hinzugefügt.
This commit is contained in:
339
prosem/prosem-ki.bib
Executable file
339
prosem/prosem-ki.bib
Executable file
@ -0,0 +1,339 @@
|
||||
% This file was created with JabRef 2.9b2.
|
||||
% Encoding: Cp1252
|
||||
|
||||
@INPROCEEDINGS{Brin1998,
|
||||
author = {Brin, Sergey and Page, Lawrence},
|
||||
title = {The Anatomy of a Large-Scale Hypertextual Web Search Engine},
|
||||
booktitle = {Seventh World Wide Web Conference},
|
||||
year = {1998},
|
||||
keywords = {World Wide Web, Search Engines, Information Retrieval, PageRank, Google},
|
||||
owner = {jim},
|
||||
quality = {1},
|
||||
timestamp = {2013.10.29}
|
||||
}
|
||||
|
||||
@CONFERENCE{Clark2004,
|
||||
author = {Clark, Stephen and Curran, James R.},
|
||||
title = {Parsing the {WSJ} using {CCG} and Log-Linear Models},
|
||||
booktitle = {Proceedings of the 42nd Annual Meeting of the Association for Computational
|
||||
Linguistics},
|
||||
year = {2004},
|
||||
pages = {104-111},
|
||||
owner = {jim},
|
||||
quality = {1},
|
||||
timestamp = {2013.10.29}
|
||||
}
|
||||
|
||||
@CONFERENCE{Kessler1997,
|
||||
author = {Kessler, Brett and Nunberg, Geoffrey and Schuetze, Hinrich},
|
||||
title = {Automatic Detection of Text Genre},
|
||||
booktitle = {Proceedings of the 35th Annual Meeting of the Association for Computational
|
||||
Linguistics},
|
||||
year = {1997},
|
||||
pages = {32-38},
|
||||
owner = {jim},
|
||||
quality = {1},
|
||||
timestamp = {2013.10.29}
|
||||
}
|
||||
|
||||
@CONFERENCE{Klein2003,
|
||||
author = {Klein, Dan and Smarr, Joseph and Nguyen, Huy and Manning, Christopher
|
||||
D.},
|
||||
title = {Named Entity Recognition with Character-Level Models},
|
||||
booktitle = {Conference on Natural Learning (CoNLL)},
|
||||
year = {2003},
|
||||
pages = {180-183},
|
||||
owner = {jim},
|
||||
quality = {1},
|
||||
timestamp = {2013.10.29}
|
||||
}
|
||||
|
||||
@TECHREPORT{Paskin2001,
|
||||
author = {Paskin, Mark A.},
|
||||
title = {Cubic-time Parsing and Learning Algorithms for Grammatical Bigram
|
||||
Models},
|
||||
institution = {University of California},
|
||||
year = {2001},
|
||||
number = {UCB/CSD-01-1148},
|
||||
month = {June},
|
||||
abstract = {In Dependency Grammar there are head words and dependents. Each phrase
|
||||
has only one head word. The head word determines how all of its dependents
|
||||
may be syntactically combined with other words to form a sentence.
|
||||
A head word and all of its dependents form a constituent. In every
|
||||
sentence there may be one or more dependency relationships with one
|
||||
head word each.
|
||||
|
||||
Dependents that precede their head are called predependents and dependents
|
||||
that follow their head are called postdependents.
|
||||
|
||||
|
||||
A dependency parse consists of a set of dependency relationships that
|
||||
satisfies three constraints: 1. Every word except one (the root)
|
||||
is dependent to exactly one head. 2. The dependency relationships
|
||||
are acyclic; no word is, through a sequence of dependency relationships,
|
||||
dependent to itself. 3. When drawn as a graph above the sentence,
|
||||
no two dependency relations cross - a property known as projectivity
|
||||
or planarity.
|
||||
|
||||
|
||||
The Grammatical Bigram Probability Model assumes that all the dependents
|
||||
of a head word are independent of one another and their relative
|
||||
order. This is a strong approximation as in full English there are
|
||||
argument structure constraints that rely on the order of dependents.
|
||||
This simplification allows for a reduced computational complexity
|
||||
for parsing and learning. The grammar model falls into the class
|
||||
of "Bilexical grammars".
|
||||
|
||||
|
||||
A dependency parse consists of multiple spans. A span has at least
|
||||
two words up to n words. Spans have one property: No word in the
|
||||
span has a parent outside the span. Spans can be joined and closed.
|
||||
To join the span one of them has to be connected (both end words
|
||||
are connected with an edge) and both spans have to share one endword.
|
||||
The new span will be connected if both subspans were connected. If
|
||||
that is not the case, it can be closed by adding an edge between
|
||||
the endwords of the new span.
|
||||
|
||||
|
||||
Every dependency parse has a unique span decomposition. For joining
|
||||
the left subspan has be simple. That means it has to have an edge
|
||||
between its endwords or consist of two words only. Relying on this
|
||||
ensures that each span is derived only once.
|
||||
|
||||
|
||||
Every span has a signature. This signature states the indexes of its
|
||||
endwords, if it is simple and whether the left or right endword have
|
||||
parents within the span. Spans where both the left and right endword
|
||||
have the parent within the string are called toplevel signatures
|
||||
as such signatures characterize valid parses.
|
||||
|
||||
|
||||
Parser operations take signatures as input rather than spans. They
|
||||
produce signatures as well. SEED creates an unconnected and simple
|
||||
span with two adjacent words. CLOSE-LEFT adds an edge between the
|
||||
endwords and makes the left endword the parent of the right one.
|
||||
CLOSE-RIGHT does the opposite and makes the right endword the parent
|
||||
of the left one. These operators require that neither the left nor
|
||||
the right endword have a parent within the span.
|
||||
|
||||
|
||||
JOIN takes two input spans and joins them. It requires that the spans
|
||||
share an endword (1.), the shared endword has one parent (2.) and
|
||||
the left input is simple (3.). The JOIN rule applies only if the
|
||||
left span doesn't start the sentence.
|
||||
|
||||
|
||||
These operators constitute an algebra over span signatures called
|
||||
span signature algebra. A derivation D is an expression in this algebra.
|
||||
Like operations it evaluates to span signatures. These expressions
|
||||
can be represented as trees where the nodes are operations. There
|
||||
is an isomorphism between dependency parses and their corresponding
|
||||
derivations.
|
||||
|
||||
|
||||
Optimal derivation must consist of an operation over the results of
|
||||
optimal sub-derivations. Therefore it is enough to record the parse
|
||||
operation with the most likely derivation of a given signature in
|
||||
order to reconstruct the most likely derivation of the entire sentence.
|
||||
|
||||
|
||||
The chart-parse algorithm returns the optimal parse. It uses a subprocedure
|
||||
called EXTRACT-OPT-PARSE that constructs the optimal parse by finding
|
||||
the top-level signature (sigma) with maximum optimal probability
|
||||
(pi*). It backtracks then recursively through the optimal derivation
|
||||
defined by (omega*). If CLOSE operations are encountered edges are
|
||||
recorded in the parse. The algorithm requires O(n<>) time and O(n<>)
|
||||
space.},
|
||||
owner = {jim},
|
||||
quality = {1},
|
||||
timestamp = {2013.10.29}
|
||||
}
|
||||
|
||||
@INBOOK{Russel2010,
|
||||
author = {Russel, Stuart J. and Norvig, Peter},
|
||||
title = {Artificial intelligence: A Modern Approach},
|
||||
booktitle = {Artificial intelligence: A Modern Approach},
|
||||
year = {2009},
|
||||
date = {December 11},
|
||||
bookauthor = {Russel, Stuart J. and Norvig, Peter},
|
||||
edition = {Third},
|
||||
series = {Prentice-Hall series in artificial intelligence},
|
||||
publisher = {Prentice Hall},
|
||||
chapter = {23},
|
||||
pages = {888-927},
|
||||
abstract = {The first method to understanding natural language is syntactic analysis
|
||||
or parsing. The goal is to find the phrase structure of a sequence
|
||||
of words according to the rules of the applied grammar.
|
||||
|
||||
A strict top-to-bottom or bottom-to-top parsing can be inefficient.
|
||||
Given two sentences with the same first 10 words and a difference
|
||||
only from the 11th word on, parsing from left-to-right would force
|
||||
the parser to make a guess about the nature of the sentence. But
|
||||
it doesn't know if it's right until the 11th word. From there it
|
||||
had to backtrack and reanalyze the sentence.
|
||||
|
||||
|
||||
To prevent that dynamic programming is used. Every analyzed substring
|
||||
gets stored for later. Once it is discovered that for example "the
|
||||
students in section 2 of Computer Science 101" is a noun phrase,
|
||||
this information can be stored in a structure known as chart. Algorithms
|
||||
that do such storing are called chart parsers. One of this chart
|
||||
parsers is a bottom-up version called CYK algorithm after its inventors
|
||||
John Cocke, Daniel Younger and Tadeo Kasami. This algorithm requires
|
||||
a grammar in the Chomsky Normal Form. The algorithm takes O(n<>m)
|
||||
space for the P table with n being the number of words in the sentence
|
||||
and m the number of nonterminal symbols in the grammar. It takes
|
||||
O(n<>m) time whereas m is constant for a particular grammar. That's
|
||||
why it is commonly described as O(n<>). There is no faster algorithm
|
||||
for general context-free grammars.
|
||||
|
||||
|
||||
The CYK algorithm only co mputes the probability of the most probable
|
||||
tree. The subtrees are all represented in P table.
|
||||
|
||||
|
||||
PCFGs (Probabilistic context free grammars) have many rules with a
|
||||
probability for each one of them. Learning the grammar from data
|
||||
is better than a knowledge engineering approach. Learning is easiest
|
||||
if we are given a corpus of correctly parsed sentences; commonly
|
||||
known as a treebank. The best known treebank is the Penn Treebank
|
||||
as it consists of 3 million words which have been annotated with
|
||||
part of speech and parse-tree structure. Given an amount of trees,
|
||||
a PCFG can be created just by counting and smoothing.
|
||||
|
||||
|
||||
If no treebank is given it is still possible to learn the grammar
|
||||
but it is more difficult. In such a case there are actually two problems:
|
||||
First learning the structure of the grammar rules and second learning
|
||||
the probabilities associated with them.
|
||||
|
||||
|
||||
PCFGs have the problem that they are context-free. Combining a PCFG
|
||||
and Markov model will get the best of both. This leads ultimately
|
||||
to lexicalized PCFGs. But another problem of PCFGs is there preference
|
||||
for short sentences.
|
||||
|
||||
|
||||
Lexicalized PCFGs introduce so called head words. Such words are the
|
||||
most important words in a phrase and the probabilities are calculated
|
||||
between the head words. Example: "eat a banana" "eat" is the head
|
||||
of the verb phrase "eat a banana", whereas "banana" is the head of
|
||||
the noun phrase "a banana". Probability P1 now depends on "eat" and
|
||||
"banana" and the result would be very high. If the head of the noun
|
||||
phrase were "bandanna", the result would be significantly lower.
|
||||
|
||||
|
||||
The next step are definite clause grammars. They can be used to parse
|
||||
in a way of logical inference and makes it possible to reason about
|
||||
languages and strings in many different ways. Furthermore augmentations
|
||||
allow for distinctions in a single subphrase. For example the noun
|
||||
phrase (NP) depends on the subject case and the person and number
|
||||
of persons. A real world example would be "to smell". It is "I smell",
|
||||
"you smell", "we smell", "you smell" and "they smell" but "he/she/it
|
||||
smells". It depends on the person what version is taken.
|
||||
|
||||
|
||||
Semantic interpretation is used to give sentences a meaning. This
|
||||
is achieved through logical sentences. The semantics can be added
|
||||
to an already augmented grammar (created during the previous step),
|
||||
resulting in multiple augmentations at the same time. Chill is an
|
||||
inductive logic programming program that can learn to achieve 70%
|
||||
to 85% accuracy on various database query tasks.
|
||||
|
||||
|
||||
But there are several complications as English is endlessly complex.
|
||||
First there is the time at which things happened (present, past,
|
||||
future). Second you have the so called speech act which is the speaker's
|
||||
action that has to be deciphered by the hearer. The hearer has to
|
||||
find out what type of action it is (a statement, a question, an order,
|
||||
a warning, a promise and so on). Then there are so called long-distance
|
||||
dependencies and ambiguity. The ambiguity can reach from lexical
|
||||
ambiguity where a word has multiple usages, over syntactic ambiguity
|
||||
where a sentence has multiple parses up to semantic ambiguity where
|
||||
the meaning of and the same sentence can be different. Last there
|
||||
is ambiguity between literal meaning and figurative meanings.
|
||||
|
||||
|
||||
Finally there are four models that need to be combined to do disambiguation
|
||||
properly: the world model, the mental model, the language model and
|
||||
the acoustic model.
|
||||
|
||||
|
||||
-- not so much an abstract of the specific content of that section
|
||||
as an abstract about speech recognition in general --
|
||||
|
||||
|
||||
The second method is speech recognition. It has the added difficulty
|
||||
that the words are not clearly separated and every speaker can pronounce
|
||||
the same sentence with the same meaning different. An example is
|
||||
"The train is approaching". Another written form would be "The train's
|
||||
approaching". Both convey the same meaning in the written language.
|
||||
But if a BBC, a CNN and a german news anchor speeks this sentence
|
||||
it will sound dramatically different. Speech recognition has to deal
|
||||
with that problem to get the written text associated with the spoken
|
||||
words. From the text the first method can than be used to analyze
|
||||
the words and find a meaning. Finally this meaning can be used to
|
||||
create some kind of action in a dialog system.
|
||||
|
||||
|
||||
--
|
||||
|
||||
|
||||
Some problems of speech recognition are segmentation, coarticulation
|
||||
and homophones. Two used models are the acoustic model and the language
|
||||
model. Another major model is the noisy channel model, named after
|
||||
Claude Shannon (1948). He showed that the original message can always
|
||||
be recovered in a noisy channel if the original message is encoded
|
||||
in a redundant enough way.
|
||||
|
||||
|
||||
The acoustic model in particular is used to get to the really interesting
|
||||
parts. It is not interesting how words were spoken but more what
|
||||
words where spoken. That means that not all available information
|
||||
needs to be stored and a relative low sample rate is enough. 80 samples
|
||||
at 8kHz with a frame length of about 10 milliseconds is enough for
|
||||
that matter. To distinguish words so called phones are used. There
|
||||
are 49 phones used in English. A phoneme is the smallest unit of
|
||||
sound that has a distinct meaning to speakers of a particular language.
|
||||
Back to the frames: every frame is summarized by a vector of features.
|
||||
Features are important aspects of a speech signal. It can be compared
|
||||
to listening to an orchestra and saying "here the French horns are
|
||||
playing loudly and the violins are playing softly". Yet another difficulty
|
||||
are dialect variations.
|
||||
|
||||
|
||||
The language model should be learned from a corpus of transcripts
|
||||
of spoken language. But such a thing is more difficult than building
|
||||
an n-gram model of text, because it requires a hidden Markov model.
|
||||
|
||||
|
||||
All in all speech recognition is most effective when used for a specific
|
||||
task against a restricted set of options. A general purpose system
|
||||
can only work accurately if it creates one model for every speaker.
|
||||
Prominent examples like Apple's siri are therefore not very accurate.},
|
||||
owner = {jim},
|
||||
timestamp = {2013.10.24}
|
||||
}
|
||||
|
||||
@INPROCEEDINGS{Sleator1993,
|
||||
author = {Sleator, Daniel D. K. and Temperley, Davy},
|
||||
title = {Parsing English with a Link Grammar},
|
||||
booktitle = {Third Annual Workshop on Parsing technologies},
|
||||
year = {1993},
|
||||
owner = {jim},
|
||||
quality = {1},
|
||||
timestamp = {2013.10.29}
|
||||
}
|
||||
|
||||
@CONFERENCE{Smith2008,
|
||||
author = {Smith, David A. and Eisner, Jason},
|
||||
title = {Dependency Parsing by Belief Propagation},
|
||||
booktitle = {Conference on Empirical Methods in Natural Language Processing},
|
||||
year = {2008},
|
||||
date = {October 25 - October 27},
|
||||
pages = {145-156},
|
||||
owner = {jim},
|
||||
quality = {1},
|
||||
timestamp = {2013.10.29}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user