mirror of
https://github.com/2martens/uni.git
synced 2026-05-06 19:36:26 +02:00
536 lines
25 KiB
TeX
536 lines
25 KiB
TeX
\documentclass[12pt,twoside]{scrartcl}
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
% Meta informations:
|
|
\newcommand{\trauthor}{Jim Martens}
|
|
\newcommand{\trtype}{Seminar Paper} %{Seminararbeit} %{Proseminararbeit}
|
|
\newcommand{\trcourse}{Neural Networks}
|
|
\newcommand{\trtitle}{Outline - Second Environmental Feedback Loop}
|
|
\newcommand{\trmatrikelnummer}{6420323}
|
|
\newcommand{\tremail}{2martens@informatik.uni-hamburg.de}
|
|
\newcommand{\trarbeitsbereich}{Knowledge Technology, WTM}
|
|
\newcommand{\trdate}{26.04.2018}
|
|
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
% Languages:
|
|
|
|
% Falls die Ausarbeitung in Deutsch erfolgt:
|
|
% \usepackage[german]{babel}
|
|
% \usepackage[T1]{fontenc}
|
|
% \usepackage[latin1]{inputenc}
|
|
% \usepackage[latin9]{inputenc}
|
|
% \selectlanguage{german}
|
|
|
|
% If the thesis is written in English:
|
|
\usepackage[spanish,english]{babel}
|
|
\selectlanguage{english}
|
|
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
% Bind packages:
|
|
\usepackage[utf8]{inputenc} % Unicode funktioniert unter Windows, Linux und Mac
|
|
\usepackage[T1]{fontenc}
|
|
\usepackage{acronym} % Acronyms
|
|
\usepackage{algorithmic} % Algorithms and Pseudocode
|
|
\usepackage{algorithm} % Algorithms and Pseudocode
|
|
\usepackage{amsfonts} % AMS Math Packet (Fonts)
|
|
\usepackage{amsmath} % AMS Math Packet
|
|
\usepackage{amssymb} % Additional mathematical symbols
|
|
\usepackage{amsthm}
|
|
\usepackage{booktabs} % Nicer tables
|
|
%\usepackage[font=small,labelfont=bf]{caption} % Numbered captions for figures
|
|
\usepackage{color} % Enables defining of colors via \definecolor
|
|
\definecolor{uhhRed}{RGB}{254,0,0} % Official Uni Hamburg Red
|
|
\definecolor{uhhGrey}{RGB}{122,122,120} % Official Uni Hamburg Grey
|
|
\usepackage{fancybox} % Gleichungen einrahmen
|
|
%\usepackage{fancyhdr} % Packet for nicer headers
|
|
\usepackage[automark]{scrlayer-scrpage}
|
|
\usepackage[hidelinks]{hyperref}\urlstyle{rm}
|
|
%\usepackage{fancyheadings} % Nicer numbering of headlines
|
|
|
|
%\usepackage[outer=3.35cm]{geometry} % Type area (size, margins...) !!!Release version
|
|
%\usepackage[outer=2.5cm]{geometry} % Type area (size, margins...) !!!Print version
|
|
%\usepackage{geometry} % Type area (size, margins...) !!!Proofread version
|
|
\usepackage[outer=3.15cm]{geometry} % Type area (size, margins...) !!!Draft version
|
|
\geometry{a4paper,body={5.8in,9in}}
|
|
|
|
\usepackage{graphicx} % Inclusion of graphics
|
|
%\usepackage{latexsym} % Special symbols
|
|
\usepackage{longtable} % Allow tables over several parges
|
|
\usepackage{listings} % Nicer source code listings
|
|
\usepackage{multicol} % Content of a table over several columns
|
|
\usepackage{multirow} % Content of a table over several rows
|
|
\usepackage{rotating} % Alows to rotate text and objects
|
|
\usepackage[hang]{subfigure} % Allows to use multiple (partial) figures in a fig
|
|
%\usepackage[font=footnotesize,labelfont=rm]{subfig} % Pictures in a floating environment
|
|
\usepackage{tabularx} % Tables with fixed width but variable rows
|
|
\usepackage{url,xspace,boxedminipage} % Accurate display of URLs
|
|
|
|
\usepackage{csquotes}
|
|
\usepackage[
|
|
backend=biber,
|
|
bibstyle=ieee,
|
|
citestyle=ieee,
|
|
minnames=1,
|
|
maxnames=2
|
|
]{biblatex}
|
|
|
|
\addbibresource{bib.bib}
|
|
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
% Configurationen:
|
|
|
|
\hyphenation{whe-ther} % Manually use: "\-" in a word: Staats\-ver\-trag
|
|
|
|
%\lstloadlanguages{C} % Set the default language for listings
|
|
\DeclareGraphicsExtensions{.pdf,.svg,.jpg,.png,.eps} % first try pdf, then eps, png and jpg
|
|
\graphicspath{{./src/}} % Path to a folder where all pictures are located
|
|
%\pagestyle{fancy} % Use nicer header and footer
|
|
\pagestyle{scrheadings}
|
|
|
|
% Redefine the environments for floating objects:
|
|
\setcounter{topnumber}{3}
|
|
\setcounter{bottomnumber}{2}
|
|
\setcounter{totalnumber}{4}
|
|
\renewcommand{\topfraction}{0.9} %Standard: 0.7
|
|
\renewcommand{\bottomfraction}{0.5} %Standard: 0.3
|
|
\renewcommand{\textfraction}{0.1} %Standard: 0.2
|
|
\renewcommand{\floatpagefraction}{0.8} %Standard: 0.5
|
|
|
|
% Tables with a nicer padding:
|
|
\renewcommand{\arraystretch}{1.2}
|
|
\MakeOuterQuote{"}
|
|
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
% Additional 'theorem' and 'definition' blocks:
|
|
\theoremstyle{plain}
|
|
\newtheorem{theorem}{Theorem}[section]
|
|
%\newtheorem{theorem}{Satz}[section] % Wenn in Deutsch geschrieben wird.
|
|
\newtheorem{axiom}{Axiom}[section]
|
|
%\newtheorem{axiom}{Fakt}[chapter] % Wenn in Deutsch geschrieben wird.
|
|
%Usage:%\begin{axiom}[optional description]%Main part%\end{fakt}
|
|
|
|
\theoremstyle{definition}
|
|
\newtheorem{definition}{Definition}[section]
|
|
|
|
%Additional types of axioms:
|
|
\newtheorem{lemma}[axiom]{Lemma}
|
|
\newtheorem{observation}[axiom]{Observation}
|
|
|
|
%Additional types of definitions:
|
|
\theoremstyle{remark}
|
|
%\newtheorem{remark}[definition]{Bemerkung} % Wenn in Deutsch geschrieben wird.
|
|
\newtheorem{remark}[definition]{Remark}
|
|
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
% Provides TODOs within the margin:
|
|
\newcommand{\TODO}[1]{\marginpar{\emph{\small{{\bf TODO: } #1}}}}
|
|
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
% Abbreviations and mathematical symbols
|
|
\newcommand{\modd}{\text{ mod }}
|
|
\newcommand{\RS}{\mathbb{R}}
|
|
\newcommand{\NS}{\mathbb{N}}
|
|
\newcommand{\ZS}{\mathbb{Z}}
|
|
\newcommand{\dnormal}{\mathit{N}}
|
|
\newcommand{\duniform}{\mathit{U}}
|
|
|
|
\newcommand{\erdos}{Erd\H{o}s}
|
|
\newcommand{\renyi}{-R\'{e}nyi}
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
% Document:
|
|
\begin{document}
|
|
\renewcommand{\headheight}{14.5pt}
|
|
|
|
%\fancyhead{}
|
|
%\fancyhead[LE]{ \slshape \trauthor}
|
|
%\fancyhead[LO]{}
|
|
%\fancyhead[RE]{}
|
|
%\fancyhead[RO]{ \slshape \trtitle}
|
|
\lehead{\slshape \trauthor}
|
|
\rohead{\slshape \trtitle}
|
|
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
% Cover Header:
|
|
\begin{titlepage}
|
|
\begin{flushleft}
|
|
Universit\"at Hamburg\\
|
|
Department Informatik\\
|
|
\trarbeitsbereich\\
|
|
\end{flushleft}
|
|
\vspace{3.5cm}
|
|
\begin{center}
|
|
\huge \trtitle\\
|
|
\end{center}
|
|
\vspace{3.5cm}
|
|
\begin{center}
|
|
\normalsize\trtype\\
|
|
[0.2cm]
|
|
\Large\trcourse\\
|
|
[1.5cm]
|
|
\Large \trauthor\\
|
|
[0.2cm]
|
|
\normalsize Matr.Nr. \trmatrikelnummer\\
|
|
[0.2cm]
|
|
\normalsize\tremail\\
|
|
[1.5cm]
|
|
\Large \trdate
|
|
\end{center}
|
|
\vfill
|
|
\end{titlepage}
|
|
|
|
%backsite of cover sheet is empty!
|
|
\thispagestyle{empty}
|
|
\hspace{1cm}
|
|
\newpage
|
|
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
% Abstract:
|
|
|
|
% Abstract gives a brief summary of the main points of a paper:
|
|
\section*{Abstract}
|
|
Catastrophic forgetting is a huge problem for neural networks, in particular
|
|
for autonomous systems. This paper will showcase three approaches using
|
|
diffusion-based neuromodulation and compare them with respect to catastrophic
|
|
forgetting. The results of the comparison being that modulated random search
|
|
is not useful to combat catastrophic forgetting, modulated gaussian walk is
|
|
significantly better on that front and that the localized learning approach of
|
|
Velez and Clune overcomes catastrophic forgetting for small networks.
|
|
|
|
% Lists:
|
|
\setcounter{tocdepth}{2} % depth of the table of contents (for Seminars 2 is recommented)
|
|
\tableofcontents
|
|
\pagenumbering{arabic}
|
|
\clearpage
|
|
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
% Content:
|
|
|
|
% the actual content, usually separated over a number of sections
|
|
% each section is assigned a label, in order to be able to put a
|
|
% crossreference to it
|
|
|
|
\section{Introduction}
|
|
\label{sec:introduction}
|
|
|
|
Autonomous robots need to adapt to new situations. They have a need to learn
|
|
for an entire life. In order to do this they need a second environmental feedback
|
|
loop that tells them when to learn.\cite{Toutounji2016}
|
|
|
|
The learning itself is also described as plasticity. In the context of this paper
|
|
the definition of synaptic plasticity given by Citri\cite{Citri2008} will be used.
|
|
In short the process of learning itself, changing the weights, is already
|
|
considered plasticity. This can occur throughout the lifetime of a network or
|
|
during the training phase of networks using for example supervised learning
|
|
and backpropagation.
|
|
|
|
When a network has to adapt to new situations, it has to learn new tasks. Usually
|
|
the previously learned weights are largely forgotten. This phenomenon is called
|
|
catastrophic forgetting.\cite{French1999,McCloskey1989}
|
|
|
|
Since catastrophic forgetting is a key problem for autonomous learning, it is
|
|
crucial to overcome it. In this paper I will present some approaches for
|
|
learning in an autonomous setup to analyse which of them if any can overcome
|
|
catastrophic forgetting.
|
|
|
|
The next section will go into more detail about the history of research about
|
|
catastrophic forgetting. Afterwards the three approached will be explained.
|
|
A comparison of the three approaches with respect to catastrophic forgetting
|
|
will follow, before a conclusion wraps up this paper.
|
|
|
|
\section{Catastrophic Forgetting}
|
|
\label{sec:catastrophicforgetting}
|
|
|
|
This section presents the major research developments related to catastrophic
|
|
forgetting and explains what it actually is. It follows the review of French\cite{French1999}.
|
|
|
|
McCloskey and Cohen\cite{McCloskey1989} originally discovered the problem of
|
|
catastrophic forgetting, which was referred to as catastrophic interference. This
|
|
discovery of a fundamental limitation of the classic neural network was as
|
|
important as the work of Minsky and Papert\cite{Minsky1969} who described the
|
|
limitations of a perceptron twenty years prior. The key discovery of McCloskey
|
|
and Cohen was that previously learned patterns were completely forgotten after
|
|
a few training cycles of learning a new pattern. The reason behind this
|
|
behaviour was the real problem. They identified the single set of shared
|
|
weights as responsible for it.
|
|
|
|
If one thinks a few minutes about it this makes absolute sense. The classic
|
|
backpropagation algorithm works by modifying the weights that have contributed
|
|
the most to a bad outcome. When the set of targets is changing then the network
|
|
will perform badly for the new pattern. In order to rectify this the backpropagation
|
|
algorithm will change many of the weights so that the network is delivering a
|
|
good result for the new pattern. This on the other hand results in an increasingly
|
|
worse performance on previously learned patterns. If this worsening was gradual
|
|
it would still be unfortunate but understandable. It is called catastrophic
|
|
because this performance change is not gradual but rather abrupt. Even small
|
|
changes in the weights can have a huge impact on the output.
|
|
|
|
In fact catastrophic forgetting is only a very radical example of a more general
|
|
problem for all models of memory, the so called "stability-plasticity" problem\cite{Grossberg1982}.
|
|
This problem, sometimes called dilemma, basically puts up the question how to
|
|
design a system in such a way that it is both sensitive to new input and not
|
|
radically disrupted by it. In other words: That it can learn new things
|
|
without largely or completely forgetting already learned things.
|
|
|
|
Early attempts to alleviate or overcome catastrophic forgetting required a more
|
|
sparse representation. This means that not every weight is responsible for all
|
|
possible inputs. The downside is a worse ability to generalize to new input
|
|
and overall a worse ability to discriminate. In an extreme form this can
|
|
lead to catastrophic remembering\cite{Sharkey1995}.
|
|
The idea here is that a
|
|
network learns the function describing the inputs too well and therefore
|
|
loses its ability to differentiate between new and already learned input.
|
|
This can be understood well with the example given by French\cite{French1999},
|
|
where a network has the task to reproduce the input at the output. It can detect
|
|
a new input if the output is diverging by large margin. It has learned too well
|
|
if it learned the identity function and is therefore able to reproduce any
|
|
input perfectly at the output and hence loses the ability to detect new input.
|
|
|
|
Significant improvements were made by rehearsing previously learned input.
|
|
Robins\cite{Robins1995} found a way to rehearse prior input if it is no longer
|
|
available and called it "pseudo-patterns". The idea being that the weights
|
|
of the trained network resemble a function. A random input and the predicted
|
|
output together somewhat describe this function and are such a pattern. Robins
|
|
used a bunch of them interleaved with new input and the results were promising
|
|
as the forgetting became more gradual. This insight together with
|
|
the findings of McClelland\cite{McClelland1995} resulted in the development
|
|
of dual-network models.
|
|
In short one network would model and the hippocampus and be able to quickly learn new
|
|
information without disrupting previously learned regularities. This network
|
|
would then serve as teacher for the second network which models the neocortex
|
|
and is responsible for generalizing.
|
|
|
|
In the time between 1999 and 2018 more work was done with regards to catastrophic
|
|
forgetting. Most recently the work of Kirkpatrick\cite{Kirkpatrick2017},
|
|
Velez\cite{Velez2017} and Shmelkov\cite{Shmelkov2017} has to be named. The main
|
|
focus here was a brief introduction of catastrophic forgetting and therefore
|
|
these newest approaches will not be described here.
|
|
|
|
\section{Plasticity}
|
|
\label{sec:plasticity}
|
|
|
|
Every neural network involves a learning aspect and hence plasticity, given our
|
|
definition of it. In this section three approaches for plasticity using diffusion-based
|
|
neuromodulation are presented in more detail. Modulated Random Search and
|
|
Modulated Gaussian Walk are using linearly modulated neural networks. They
|
|
are taken from Toutounji and Pasemann\cite{Toutounji2016}. The third approach
|
|
was introduced by Velez and Clune\cite{Velez2017} uses diffusion-based neuromodulation
|
|
for localized learning hence the name of the subsection here.
|
|
|
|
\subsection{Modulated Random Search}
|
|
\label{subsec:mrs}
|
|
|
|
\subsubsection*{Modulated Neural Network}
|
|
|
|
Since both approaches from Toutounji and Pasemann are using linearly-modulated
|
|
neural networks the structure of these networks is described first. Linearly-modulated
|
|
neural networks (LMNN) are a specific variant of modulated neural
|
|
networks (MNN). Any artifical neural network (ANN) or simply neural network
|
|
in the context of Computer Science can become a modulated neural network by
|
|
adding a neuromodulator layer.
|
|
|
|
Toutounji and Pasemann describe a variant of this layer that uses neuromodulator
|
|
cells (NMCs). Each NMC produces a specific type of neuromodulator (NM) and
|
|
saves its own concentration level of it. The network wide concentration level at
|
|
a certain point in space and time can be obtained by summing up all concentration
|
|
levels saved in NMCs at the point in space. Produced neuromodulators usually
|
|
impact nearby network parts. This type of spatial impact requires a spatial
|
|
representation in the network where all network elements have a location in
|
|
the space.
|
|
|
|
There are a production and a reduction mode for the NMCs. During the production
|
|
mode the concentration of neuromodulator can be increased and during reduction
|
|
mode it can be decreased. A cell can enter production mode if it was stimulated
|
|
for some time while it falls back to reduction mode when this stimulation
|
|
does not happen for some time.
|
|
|
|
\subsubsection*{Linearly-Modulated Neural Network}
|
|
|
|
A linearly-modulated neural network uses discrete time and stimulates NMCs
|
|
with a simple linear model. Each NMC is connected to a carrier cell or neuron
|
|
which itself is part of a modulatory subnetwork. The NMC is stimulated if the
|
|
output of the carrier neuron is within a specified range
|
|
(\(\text{S}^{\text{min}}\), \(\text{S}^{\text{max}}\)). In every time step
|
|
is checked if the output of the carrier cell is high enough to stimulate the
|
|
NMC. If it is the stimulation level of the NMC increases. Otherwise it decreases.
|
|
Once the stimulation level reaches the threshold \(\text{T}^{\text{prod}}\)
|
|
the cell goes into the production mode. If it falls below \(\text{T}^{\text{red}}\)
|
|
the cell goes back into reduction mode.
|
|
Over time the neuromodulator diffuses to the surrounding control subnetwork
|
|
where it initiates plasticity that is dependent on the concentration of it at
|
|
the respective synapse.
|
|
|
|
\subsubsection*{Modulated Random Search}
|
|
|
|
\begin{table}
|
|
\begin{tabular}{l|l}
|
|
\textbf{Parameter} & \textbf{Description} \\
|
|
\(Type\) & The neuromodulator type the synapse is sensitive to \\
|
|
\(W\) & Weight change probability \\
|
|
\(D\) & Disable / enable probability \\
|
|
\(W^{min}, W^{max}\) & Minimum and maximum weight of synapse \\
|
|
\(M\) & Maximum neuromodulator sensitivity limit of the synapse
|
|
\end{tabular}
|
|
\caption{Parameters stored for each synapse.
|
|
Replication of Table 1 in Toutunji and Pasemann\cite{Toutounji2016}.}
|
|
\label{tab:mrs-synapse}
|
|
\end{table}
|
|
|
|
Modulated random search means essentially random weight changes. Each synapse
|
|
has some parameters that are used (see table \ref{tab:mrs-synapse}). The weight
|
|
change probability is the product of the intrinsic weight change probability
|
|
and the concentration of the neuromodulator the synapse is sensitive to
|
|
at its location. Additionally the maximum neuromodulator sensitivity is
|
|
the ceiling for the second part of that product. This means there is a maximum
|
|
weight change probability for each synapse. Should a weight change occur a new
|
|
weight is chosen randomly from the range of values described by the minimum and
|
|
maximum weight of the synapse.
|
|
|
|
Moreover a synapse can disable or enable itself. The actual disable/enable
|
|
probability is the product of the intrinsic value saved as parameter and
|
|
the neuromodulator concentration. The concentration is again ceiled by the
|
|
maximum sensitivity limit given as parameter. This means there is a maximum
|
|
disable/enable probability as well. A disabled synapse is treated as having
|
|
weight 0 but the actual value is stored so that it can be restored when the
|
|
synapse is enabled again.
|
|
|
|
Given a so called neural network structure or substrate this makes it easier
|
|
to find different network topologies (structure and weights combined).
|
|
|
|
\subsection{Modulated Gaussian Walk}
|
|
\label{subsec:mgw}
|
|
|
|
Toutounji and Pasemann introduce the modulated gaussian walk. The key differences
|
|
start with the parameters. There is no maximum sensitivity for the neuromodulator
|
|
concentration. When a weight change occurs the new weight is not chosen randomly
|
|
but rather the difference to be added to the current weight is sampled from a
|
|
normal distribution with a mean of zero and \(\sigma^2\)-variance. The sampled
|
|
value could be infinitely large and hence the new weight outside of the given
|
|
bounds for it. Therefore the value is sampled until the sum of the
|
|
current weight and the sampled value are within the range.
|
|
|
|
Toutunji and Pasemann implemented a mechanism for disabling synapses
|
|
in the modulated gaussian walk as well but did not make use of it later and
|
|
therefore they did not describe how it works.
|
|
|
|
\subsection{Localized learning}
|
|
\label{subsec:diffusion}
|
|
|
|
Velez and Clune are using a small network to solve the foraging task. The network
|
|
represents an agent that has a lifetime of three years. Each year consists of
|
|
the seasons summer and winter. During each season the agent is presented with
|
|
food and has to either eat the food or not. Half of the food is nutritious
|
|
and the other poisonous. The target is a fitness value which is best if the
|
|
agent eats all nutritious food and none of the poisonous. The associations of
|
|
nutritious and poisonous are different between summer and winter but within
|
|
a season remain the same during the lifetime. Therefore a nutritious food in
|
|
summer will always be nutritious.
|
|
This setup makes it easy to measure if the agent is able to remember the learned
|
|
associations from the previous seasons.
|
|
|
|
The initial weights of the network are derived from an evolutionary algorithm.
|
|
All later learning uses neuromodulation. The neurons of the network are spatially
|
|
located and there are two sources of neuromodulators in the network - one on either
|
|
side. The sources are only active in their respective season and encode whether
|
|
the previously eaten food was nutritious (1) or poisonous (-1). If they are
|
|
not active their value is zero. As soon as the sources are activated the neuromodulators
|
|
fill a space within a radius of 1.5 units of distance from the source and potentially
|
|
trigger weight changes of neurons inside the radius. The strength of the neuromodulators
|
|
is decreasing with further distance from the source.
|
|
|
|
This explanation should suffice for the general understanding of their method.
|
|
The neurons within the vicinity of these sources only update their weights
|
|
in one of the seasons. Therefore they only learn for one season and are unaffected
|
|
by the other season. This results in a localized learning.
|
|
|
|
\section{Comparison regarding catastrophic forgetting}
|
|
\label{sec:comparison}
|
|
|
|
In this section the three presented approaches for plasticity are compared with
|
|
regard to their ability to mitigate or overcome catastrophic forgetting. For both
|
|
the modulated random search and the modulated gaussian walk this aspect was
|
|
analyzed in the experiments conducted by Toutunji and Pasemann\cite{Toutounji2016}.
|
|
Therefore the results of their work will be utilized for this comparison.
|
|
Velez and Clune\cite{Velez2017} introduced the presented approach of localized
|
|
learning to analyze its capability with respect to overcoming catastrophic
|
|
forgetting. Hence their results will be used for the comparison in this section.
|
|
|
|
Over multiple experiments of increasing difficulty the performance of modulated
|
|
random search and modulated gaussian walk were tested. The difficulty ranged
|
|
from a positive light-tropism task in the first experiment(E1) over an
|
|
obstacle-avoidance task in the second experiment (E2), a combination of E1 and E2
|
|
in the third experiment (E3) to a more difficult variant of E3 in the fourth
|
|
experiment (E4). The fifth experiment (E5) was a pendulum experiment.
|
|
In each experiment a robot had to learn the task from scratch. A pre-designed
|
|
LMNN was given in each case and defined the boundaries in which the learning
|
|
took place. If a temporary solution was discarded the learning started again.
|
|
|
|
Modulated random search was able to find successful behaviours in almost all
|
|
cases in E1 despite a short training time of only two hours. The slightly
|
|
longer training time for E2 of four hours however was apparently far too short
|
|
to find consistently good solutions. Both in E3 and E4 the number of intermediate
|
|
temporary solutions is significantly higher than the final number of solutions.
|
|
The pendulum experiment was an easier task and therefore many successful
|
|
behaviours were found.
|
|
|
|
Toutounji and Pasemann note that even almost stable networks are destroyed
|
|
if they have the slightest weakness. Therefore modulated random search
|
|
does not help at all against catastrophic forgetting.
|
|
|
|
Modulated gaussian walk contrary to the random search tends to improve temporary
|
|
solutions when they have weaknesses. For E3 the random search resulted in 34
|
|
temporary solutions which lasted longer than five minutes, averaging at \(5.7\)
|
|
minutes per solution. The gaussian walk found roughly twice that many temporary
|
|
solutions and averaged at \(12.5\) minutes per solution. This indicates that
|
|
gaussian walk mitigates catastrophic forgetting although it does not completely
|
|
remove it.
|
|
|
|
The experiment setup for the localized learning approach was already mentioned.
|
|
After performing some tests Velez and Clune discovered that two functional
|
|
modules formed. One set of connections is learning during sommer and the other
|
|
during winter. The connections learning in summer do not change in winter and
|
|
vice versa. This completely removes catastrophic forgetting.
|
|
|
|
If catastrophic forgetting is the only measurement then localized learning
|
|
seems to be the supreme solution to the problem. But Velez and Clune only
|
|
showed that it works in a very bespoke setup which a priori information about
|
|
the linear separability of the learning areas and correct solution. It has yet
|
|
to be shown that localized learning can be generalized to larger problems.
|
|
Modulated random search can be completely discarded as a potential solution.
|
|
Modulated gaussian walk is a clear improvement compared to the random search
|
|
in the analyzed experiments.
|
|
|
|
While all three approaches used diffusion-based neuromodulation the first two and
|
|
the third are quite different in their setup. For the future it would be interesting
|
|
to combine localized learning with gaussian walk on the experiments of Toutunji
|
|
and Pasemann. In particular the combined experiment might benefit from this as
|
|
localized learning could separate the learning for one task from the other
|
|
and gaussian walk could then improve the particular part that was problematic.
|
|
|
|
\section{Conclusion}
|
|
\label{sec:concl}
|
|
|
|
A second environmental feedback loop is important to tell autonomous systems
|
|
when to learn. But the method to learn is important as well to be any use
|
|
in a practical environment. The comparison has shown that localized learning
|
|
can overcome catastrophic forgetting for small networks in a very restricted
|
|
setup. Furthermore the comparison revealed that modulated random search is
|
|
not part of a solution to catastrophic forgetting and modulated gaussian
|
|
walk is significantly better in that regard.
|
|
|
|
Future work should look into the effects of localized learning on the kind of
|
|
autonomous robot experiments that were conducted by Toutunji and Pasemann and
|
|
in general research the applicability to bigger problems for example in the area
|
|
of deep neural networks.
|
|
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
% hier werden - zum Ende des Textes - die bibliographischen Referenzen
|
|
% eingebunden
|
|
%
|
|
% Insbesondere stehen die eigentlichen Informationen in der Datei
|
|
% ``bib.bib''
|
|
%
|
|
\newpage
|
|
\printbibliography
|
|
\addcontentsline{toc}{section}{Bibliography}% Add to the TOC
|
|
|
|
\end{document}
|