@inproceedings{cybenko:qlearn, author = {George Cybenko and Robert Gray and Katsuhiro Moizumi}, title = {{Q-Learning}: A Tutorial and Extensions}, booktitle = {Proceedings of the Workshop on Mathematics of Artificial Neural Networks}, year = {1997}, month = {July}, publisher = {Kluwer Academic Publishers}, copyright = {Kluwer Academic Publishers}, address = {Oxford University, England}, group = {agents}, later = {cybenko:q-learning}, url = {http://agent.cs.dartmouth.edu/papers/cybenko:qlearn.ps.gz}, keyword = {neural networks, q-learning, information processing}, abstract = {In the past decade, research in neurocomputing has been divided into two relatively well-defined tracks: one track dealing with cognition and the other with behavior. Cognition deals with organizing, classifying and recognizing sensory stimuli. Behavior is more dynamic, involving sequences of actions and changing interactions with an external environment. The mathematical techniques that apply to these areas, at least from the point of neurocomputing, appear to have been quite separate as well. The purpose of this paper is to give an overview of some recent powerful mathematical results in behavioral neurocomputing, specifically the concept of Q-learning due to C.~Watkins, and some new extensions. Finally, we propose ways in which the mathematics of cognition and the mathematics of behavior can move closer to build more unified systems of information processing and action.} }