@inproceedings{99d886d7196a4cdcbf6da00a55cb2657,
title = "Scalarized lower upper confidence bound algorithm",
abstract = "Multi-objective evolutionary optimisation algorithms and stochastic multi-armed bandits techniques are combined in designing stochastic multi-objective multi-armed bandits (MOMAB) with an efficient exploration and exploitation trade-off. Lower upper confidence bound (LUCB) focuses on sampling the arms that are most probable to be misclassified (i.e., optimal or suboptimal arms) in order to identify the set of best arms aka the Pareto front. Our scalarized multi-objective LUCB (sMO-LUCB) is an adaptation of LUCB to reward vectors. Preliminary empirical results show good performance of the proposed algorithm on a bi-objective environment.",
author = "M.M. Drugan",
year = "2015",
doi = "10.1007/978-3-319-19084-6_21",
language = "English",
isbn = "978-3-319-19083-9",
series = "Lecture Notes in Computer Science",
publisher = "Springer",
pages = "229--235",
editor = "Cl. Dhaenens and L. Jourdan and M.-E. Marmion",
booktitle = "Learning and Intelligent Optimization",
address = "Germany",
}