-
Notifications
You must be signed in to change notification settings - Fork 4
/
simulatePOMDP.wppl
93 lines (70 loc) · 3.15 KB
/
simulatePOMDP.wppl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
var getSimulateFunction = function(beliefOrBeliefDelay) {
//TODO add test that agent's prior contains the true startState
assert.ok(_.includes(['belief', 'beliefDelay'], beliefOrBeliefDelay),
'getSimulateFunction invalid input: ' + beliefOrBeliefDelay);
var useDelays = beliefOrBeliefDelay == 'beliefDelay';
return function(startState, world, agent, outputType) {
var useManifestLatent = agent.POMDPFunctions.useManifestLatent;
var totalTime = useManifestLatent ? startState.manifestState.timeLeft : startState.timeLeft;
//assert.ok(isPOMDPState(startState), 'simulateBeliefDelayAgent args');
var validOutputTypes = ['states', 'actions', 'stateAction',
'stateBelief', 'stateObservationAction'];
assert.ok(_.includes(validOutputTypes, outputType),
"outputType '" + outputType + "' not in " + JSON.stringify(validOutputTypes));
var terminateAfterAction = (useManifestLatent ?
startState.manifestState.terminateAfterAction :
startState.terminateAfterAction);
assert.ok(totalTime > 1 || startState.manifestState.terminateAfterAction,
'if totalTime==1, must also terminateAfterAction');
// agent methods
var agentAct = agent.act;
var agentUpdateBelief = agent.updateBelief;
var priorBelief = agent.params.priorBelief;
// world methods
var transition = world.transition;
var observe = agent.POMDPFunctions.observe;
var shouldTerminate = agent.POMDPFunctions.shouldTerminate;
var selectOutput = function(state, observation, action, belief) {
var table = {
states: state,
actions: action,
stateAction: [state, action],
stateBelief: [state, belief],
stateObservationAction: {
state: state,
observation: observation,
action: action
}
};
return table[outputType];
};
var sampleSequence = function(state, priorBelief, action) {
var observation = observe(state);
var delay = 0;
var belief = useDelays ? agentUpdateBelief(priorBelief, observation,
action, delay) :
agentUpdateBelief(priorBelief, observation, action);
var action = useDelays ? sample(agentAct(belief, delay)) :
sample(agentAct(belief));
var output = [selectOutput(state, observation, action, belief)];
if (shouldTerminate(state)) {
return output;
} else {
var nextState = transition(state, action);
return output.concat(sampleSequence(nextState, belief, action));
}
};
var startAction = 'noAction';
return sampleSequence(startState, priorBelief, startAction);
};
};
var simulatePOMDPAgent = getSimulateFunction('belief');
var simulatePOMDPAgentDelay = getSimulateFunction('beliefDelay');
var simulatePOMDP = function(startState, world, agent, outputType) {
var params = agent.params;
if (isOptimalPOMDPAgent(params)) {
return simulatePOMDPAgent(startState, world, agent, outputType)
} else {
return simulatePOMDPAgentDelay(startState, world, agent, outputType);
}
};