i try to create a simplified rl4j example based on the existing Gym and Malmo examples. Given is a sine wave and the AI should say if we are on top of the wave, on bottom or somewhere else(noop).
The SineRider is the "Game", State is the value of the sine function(Just one double)
The problem is it never calls the step function in SineRider to get a reward. What do i wrong?
Kotlin:
package aiexample
import org.deeplearning4j.gym.StepReply
import org.deeplearning4j.rl4j.learning.sync.qlearning.QLearning
import org.deeplearning4j.rl4j.learning.sync.qlearning.discrete.QLearningDiscreteDense
import org.deeplearning4j.rl4j.mdp.MDP
import org.deeplearning4j.rl4j.network.dqn.DQNFactoryStdDense
import org.deeplearning4j.rl4j.space.DiscreteSpace
import org.deeplearning4j.rl4j.space.Encodable
import org.deeplearning4j.rl4j.space.ObservationSpace
import org.nd4j.linalg.api.ndarray.INDArray
import org.nd4j.linalg.factory.Nd4j
import org.nd4j.linalg.learning.config.Adam
import kotlin.math.sin
object Example {
var ql: QLearning.QLConfiguration = QLearning.QLConfiguration(
123, //Random seed
1000, //Max step By epoch
8000, //Max step
1000, //Max size of experience replay
32, //size of batches
100, //target update (hard)
0, //num step noop warmup
0.05, //reward scaling
0.99, //gamma
10.0, //td-error clipping
0.1f, //min epsilon
2000, //num step for eps greedy anneal
true //double DQN
)
var net: DQNFactoryStdDense.Configuration = DQNFactoryStdDense.Configuration.builder()
.l2(0.01).updater(Adam(1e-2)).numLayer(3).numHiddenNodes(16).build()
@JvmStatic
fun main(args: Array<String>) {
simpleSine()
}
private fun simpleSine() {
val mdp = Env.create()
val dql = QLearningDiscreteDense(mdp, net, ql)
dql.train()
mdp.close()
}
}
class Action(val name:String) {
companion object {
val noop = Action("noop")
val top = Action("top")
val bottom = Action("bottom")
}
}
class State(private val inputs: DoubleArray): Encodable {
override fun toArray(): DoubleArray {
return inputs
}
}
class SineObservationSpace: ObservationSpace<State> {
override fun getLow(): INDArray {
return Nd4j.create(doubleArrayOf(-1.0))
}
override fun getHigh(): INDArray {
return Nd4j.create(doubleArrayOf(1.0))
}
override fun getName(): String {
return "Discrete"
}
override fun getShape(): IntArray {
return intArrayOf(1)
}
}
class SineRider{
companion object {
val actions = mapOf(
0 to Action.noop,
1 to Action.top,
2 to Action.bottom)
}
var i = 0.0
fun step(action:Int): Double{
val act = actions[action]
if(act == Action.top){
return if(i > 0.9) 1.0 else -1.0
}
if(act == Action.bottom){
return if(i < -0.9) 1.0 else -1.0
}
if(act == Action.noop){
return if(i < 0.9 && i > -0.9) 1.0 else -1.0
}
return 0.0
}
fun reset(){
}
fun next(){
i += 0.1
}
fun state(): State {
val sine = sin(i)
next()
return State(arrayOf(sine).toDoubleArray())
}
}
class Env(private val sineRider: SineRider) : MDP<State, Int, DiscreteSpace> {
private val actionSpace = DiscreteSpace(3)
private var done = false
override fun getObservationSpace(): ObservationSpace<State> {
return SineObservationSpace()
}
override fun getActionSpace(): DiscreteSpace {
return actionSpace
}
override fun step(action: Int): StepReply<State> {
val reward = sineRider.step(action)
val state = sineRider.state()
return StepReply(state, reward, true, null)
}
override fun isDone(): Boolean {
return true
}
override fun reset(): State? {
done = false
sineRider.reset()
return sineRider.state()
}
override fun close() {
}
override fun newInstance(): Env {
return create()
}
companion object {
fun create() : Env {
val sinRider = SineRider()
return Env(sinRider)
}
}
}
The problem was the isDone() function. It say always the game is over.
Code changes:
class Env...
var stepCount = 0
override fun isDone(): Boolean {
return stepCount > 1000
}
override fun reset(): State? {
stepCount = 0
...
}
Thanks to Paul Dubs -> https://community.konduit.ai/t/simplified-example/621