@book{277f3ddfca454b219b09e6a1a0c309ab,
title = "A stopping time-based policy iteration algorithm for average reward Markov decision processes",
abstract = "We consider Howard's policy iteration algorithm for multichained finite state and action Markov decision processes at the criterion of average reward per unit time. Using stopping times as has been done by Wessels in the total reward case we obtain a set of policy improvement stepst among which Gauss Seidel, which as we show give convergent algorithms and produce average optimal strategies.",
author = "{Wal, van der}, J.",
year = "1978",
language = "English",
series = "Memorandum COSOR",
publisher = "Technische Hogeschool Eindhoven",
}