@inproceedings{0b0365c201f04cbdab73627e9e081798,
title = "Hypervolume-based multi-objective reinforcement learning",
abstract = "Indicator-based evolutionary algorithms are amongst the best performing methods for solving multi-objective optimization (MOO) problems. In reinforcement learning (RL), introducing a quality indicator in an algorithm{\textquoteright}s decision logic was not attempted before. In this paper, we propose a novel on-line multi-objective reinforcement learning (MORL) algorithm that uses the hypervolume indicator as an action selection strategy. We call this algorithm the hypervolume-based MORL algorithm or HB-MORL and conduct an empirical study of the performance of the algorithm using multiple quality assessment metrics from multi-objective optimization. We compare the hypervolume-based learning algorithm on different environments to two multi-objective algorithms that rely on scalarization techniques, such as the linear scalarization and the weighted Chebyshev function. We conclude that HB-MORL significantly outperforms the linear scalarization method and performs similarly to the Chebyshev algorithm without requiring any user-specified emphasis on particular objectives.",
keywords = "multi-objective optimization, hypervolume unary indicator, reinforcement learning",
author = "{Van Moffaert}, Kristof and M.M. Drugan and Ann Nowe",
year = "2013",
doi = "10.1007/978-3-642-37140-0_28",
language = "English",
isbn = "978-3-642-37139-4",
series = "Lecture Notes in Computer Sciences",
publisher = "Springer",
pages = "352--366",
editor = "R.C. Purshouse and P.J. Fleming and C.M. Fonseca and {Greco }, S. and J. Shaw",
booktitle = "Evolutionary Multi-Criterion Optimization",
address = "Germany",
}