-
Notifications
You must be signed in to change notification settings - Fork 1
/
main.py
61 lines (52 loc) · 2.14 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
from osim.env import RunEnv
import numpy as np
import time
from Preprocessing import Preprocessing
import sys
#sys.path.insert(0, sys.path[0] + '/DDPG/DDPG.py')
from DDPG.DDPG import DDPG
#env = RunEnv(visualize=False)
env = RunEnv(visualize=True)
observation = env.reset(difficulty=0)
episodes = 100000
agent = DDPG(.9, 2000, 54, 18, .0001,criticpath='critic', actorpath='actor')
for episode in range(0, episodes):
#env.step(action)
# action is a list of length 18. values between [0,1]
## specifics: 9 muscles per leg, 2 legs = 18.
action = env.action_space.sample()
observation, reward, done, info = env.step(action)
observation = np.array(observation)
Preprocess = Preprocessing(observation, delta=0.01)
prevState = Preprocess.GetState(observation)
agent.step = 0
agent.OUprocess(.312, 0.15, 0.0)
for i in range(1,1000):
if i > 1:
pelvis_y = observation[2]
observation, reward, done, info = env.step(action)
if i > 1:
reward += (observation[2] - pelvis_y)*0.01 #penalty for pelvis going down
reward = env.current_state[4] * 0.01
reward += 0.01 # small reward for still standing
reward += min(0, env.current_state[22] - env.current_state[1]) * 0.1 # penalty for head behind pelvis
reward -= sum([max(0.0, k - 0.1) for k in [env.current_state[7], env.current_state[10]]]) * 0.02 # penalty for straight legs
observation = np.array(observation)
#means it didn't go the full simulation
if done:
reward = 0
state = Preprocess.GetState(observation)
s,a,r,sp = Preprocess.ConvertToTensor(prevState,action, reward, state)
agent.addToMemory(s,a,r,sp)
if(agent.primedToLearn()):
agent.PerformUpdate(16)
agent.UpdateTargetNetworks()
# env.render()
if done:
env.reset(difficulty = 0, seed = None) #resets the environment if done is true
agent.saveActorCritic()
print("reseting environment" + str(episode))
break
action = agent.selectAction(s)
action = action.numpy()
prevState = state;