@inproceedings{cybenko:qlearn,
author = {George Cybenko and Robert Gray and Katsuhiro Moizumi},
title = {{Q-Learning}: A Tutorial and Extensions},
booktitle = {Proceedings of the Workshop on Mathematics of Artificial Neural
Networks},
year = {1997},
month = {July},
publisher = {Kluwer Academic Publishers},
copyright = {Kluwer Academic Publishers},
address = {Oxford University, England},
group = {agents},
later = {cybenko:q-learning},
url = {http://agent.cs.dartmouth.edu/papers/cybenko:qlearn.ps.gz},
keyword = {neural networks, q-learning, information processing},
abstract = {In the past decade, research in neurocomputing has been divided
into two relatively well-defined tracks: one track dealing with cognition and
the other with behavior. Cognition deals with organizing, classifying and
recognizing sensory stimuli. Behavior is more dynamic, involving sequences of
actions and changing interactions with an external environment. The
mathematical techniques that apply to these areas, at least from the point of
neurocomputing, appear to have been quite separate as well. The purpose of
this paper is to give an overview of some recent powerful mathematical
results in behavioral neurocomputing, specifically the concept of Q-learning
due to C.~Watkins, and some new extensions. Finally, we propose ways in which
the mathematics of cognition and the mathematics of behavior can move closer
to build more unified systems of information processing and action.}
}