changed reward to be in the range [0,1]
parent
df6a3c546a
commit
9c9bf47abe
|
@ -101,6 +101,10 @@ public class MCTSAI implements MagicAI {
|
||||||
C * Math.sqrt(Math.log(parent.getNumSim()) / child.getNumSim());
|
C * Math.sqrt(Math.log(parent.getNumSim()) / child.getNumSim());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private double selectMOSS(final MCTSGameTree parent, final MCTSGameTree child) {
|
||||||
|
return 0.0;
|
||||||
|
}
|
||||||
|
|
||||||
private double selectRatio(final MCTSGameTree parent, final MCTSGameTree child) {
|
private double selectRatio(final MCTSGameTree parent, final MCTSGameTree child) {
|
||||||
return (parent.sign() * child.getScore() + 10)/(child.getNumSim() + 10);
|
return (parent.sign() * child.getScore() + 10)/(child.getNumSim() + 10);
|
||||||
}
|
}
|
||||||
|
@ -362,6 +366,7 @@ public class MCTSAI implements MagicAI {
|
||||||
return path;
|
return path;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//returns a reward in the range [0, 1]
|
||||||
private double randomPlay(final MCTSGameTree node, final MagicGame game) {
|
private double randomPlay(final MCTSGameTree node, final MagicGame game) {
|
||||||
//terminal node, no need for random play
|
//terminal node, no need for random play
|
||||||
if (game.isFinished()) {
|
if (game.isFinished()) {
|
||||||
|
@ -369,7 +374,7 @@ public class MCTSAI implements MagicAI {
|
||||||
|
|
||||||
if (game.getLosingPlayer() == game.getScorePlayer()) {
|
if (game.getLosingPlayer() == game.getScorePlayer()) {
|
||||||
node.setAILose(0);
|
node.setAILose(0);
|
||||||
return -1.0;
|
return 0.0;
|
||||||
} else {
|
} else {
|
||||||
node.setAIWin(0);
|
node.setAIWin(0);
|
||||||
return 1.0;
|
return 1.0;
|
||||||
|
@ -387,9 +392,9 @@ public class MCTSAI implements MagicAI {
|
||||||
if (game.getLosingPlayer() == null) {
|
if (game.getLosingPlayer() == null) {
|
||||||
return 0;
|
return 0;
|
||||||
} else if (game.getLosingPlayer() == game.getScorePlayer()) {
|
} else if (game.getLosingPlayer() == game.getScorePlayer()) {
|
||||||
return -(1.0 - actions/((double)MAX_ACTIONS));
|
return actions/(2.0 * MAX_ACTIONS);
|
||||||
} else {
|
} else {
|
||||||
return 1.0 - actions/((double)MAX_ACTIONS);
|
return 1.0 - actions/(2.0 * MAX_ACTIONS);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue