-
Notifications
You must be signed in to change notification settings - Fork 0
/
mujoco_inverted_pendulum.ink
129 lines (108 loc) · 3.43 KB
/
mujoco_inverted_pendulum.ink
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
inkling "2.0"
#using Goal - OPTIONAL
using Goal
using Math
using Number
#Action Space Box(-1.0, 1.0, (1,), float32)
#action 0 Force applied on the cart -1 1 slider slide Force (N)
#Observation Shape (11,)
#Observation High [inf inf inf inf inf inf inf inf inf inf inf]
#Observation Low [-inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf]
#0 - position of the cart along the linear surface - position (m)
#1 - sine of the angle between the cart and the first pole - sin(hinge)
#2 - sine of the angle between the two poles - sin(hinge2)
#3 - cosine of the angle between the cart and the first pole - cos(hinge)
#4 - cosine of the angle between the two poles - cos(hinge2)
#5 - velocity of the cart - velocity (m/s)
#6 - angular velocity of the angle between the cart and the first pole - angular velocity (rad/s)
#7 - angular velocity of the angle between the two poles - angular velocity (rad/s)
#8 - constraint force - 1 - Force (N) - see https://mujoco.readthedocs.io/en/latest/computation.html
#9 - constraint force - 2 - Force (N)
#10 - constraint force - 3 - Force (N)
#Import gym.make("InvertedDoublePendulum-v4")
function Reward(gs: MujocoState) {
return gs._gym_reward
}
function Terminal(gs: MujocoState) {
return gs._gym_terminal
}
const max_position = 100 #m
const max_speed = 100 #m/s
const max_ang_speed = 100 #rad/s
const max_constraint = 100 #N
type MujocoState {
pos: Number.Float32,
sin_hinge1: Number.Float32,
sin_hinge2: Number.Float32,
cos_hinge1: Number.Float32,
cos_hinge2: Number.Float32,
velocity: Number.Float32,
ang_velocity1: Number.Float32,
ang_velocity2: Number.Float32,
constraint1: Number.Float32,
constraint2: Number.Float32,
constraint3: Number.Float32,
_gym_reward: number,
_gym_terminal: number
}
type ObservableState {
pos: Number.Float32,
sin_hinge1: Number.Float32,
sin_hinge2: Number.Float32,
cos_hinge1: Number.Float32,
cos_hinge2: Number.Float32,
velocity: Number.Float32,
ang_velocity1: Number.Float32,
ang_velocity2: Number.Float32,
constraint1: Number.Float32,
constraint2: Number.Float32,
constraint3: Number.Float32,
}
type MujocoAction {
action: Number.Float32<-1 .. 1>[1],
}
type SimAction {
input_force: Number.Float32<-1 .. 1>,
}
type SimConfig {
deque_size: Number.UInt8
}
function TransformState(State: MujocoState): ObservableState {
return {
pos: State.pos,
sin_hinge1: State.sin_hinge1,
sin_hinge2: State.sin_hinge2,
cos_hinge1: State.cos_hinge1,
cos_hinge2: State.cos_hinge2,
velocity: State.velocity,
ang_velocity1: State.ang_velocity1,
ang_velocity2: State.ang_velocity2,
constraint1: State.constraint1,
constraint2: State.constraint2,
constraint3: State.constraint3,
}
}
function TransformAction(State: SimAction): MujocoAction {
var command: Number.Float32<-1 .. 1>[1] = [State.input_force]
return {
action: command
}
}
simulator Mujoco(action: MujocoAction, config: SimConfig): MujocoState {
}
graph (input: ObservableState): SimAction {
concept StayUp(input): SimAction {
curriculum {
source Mujoco
reward Reward
terminal Terminal
state TransformState
action TransformAction
lesson first_lesson {
scenario {
deque_size: 2
}
}
}
}
}