From 18e7419a575e2db429a12c4248e8a6110d402b64 Mon Sep 17 00:00:00 2001 From: CovERUshKA Date: Sat, 24 Aug 2024 11:17:28 +0300 Subject: [PATCH] Add skip ticks, critic and actor learning rate --- src/engine/server/NN/ModelManager.cpp | 117 +++++++++--- src/engine/server/NN/ModelManager.h | 6 +- src/engine/server/NN/Models.h | 68 +++++-- .../server/NN/ProximalPolicyOptimization.h | 47 +++-- src/engine/server/server.cpp | 175 +++++++++++------- src/game/server/player.cpp | 8 +- 6 files changed, 293 insertions(+), 128 deletions(-) diff --git a/src/engine/server/NN/ModelManager.cpp b/src/engine/server/NN/ModelManager.cpp index 5504904..9d9a513 100644 --- a/src/engine/server/NN/ModelManager.cpp +++ b/src/engine/server/NN/ModelManager.cpp @@ -11,9 +11,14 @@ #include int64_t n_in = 3345; // 78 + 1089 * 3 +int64_t n_scalar_in = 78; +int64_t n_grid_channels = 3; int64_t n_out = 9; double stdrt = 2e-2; -double learning_rate = 5e-5; // Default: 1e-3 +double learning_rate = 3e-5; // Default: 5e-5 +double actor_learning_rate = 5e-5; // Default: 5e-5 +double critic_learning_rate = 2e-4; // Default: 1e-4 +//double weight_decay = 0.0001; int64_t mini_batch_size = 8000; // 4096, 8192, 16384, 32768 int64_t ppo_epochs = 2; // Default: 4 @@ -24,6 +29,8 @@ float lambda = 0.95f; ActorCritic ac(n_in, n_out, stdrt); std::shared_ptr opt; //(ac->parameters(), 1e-2); +//std::shared_ptr actor_opt; +//std::shared_ptr ocritic; std::shared_ptr scheduler; VT states; @@ -63,7 +70,7 @@ void generate_random_hyperparameters() std::uniform_int_distribution<> epochs_dist(0, epochs_set.size() - 1); // Epochs range std::uniform_int_distribution<> mini_batch_size_dist(0, mini_batch_sizes_set.size() - 1); // Batch size range - learning_rate = lr_set[lr_dist(gen)]; + //learning_rate = lr_set[lr_dist(gen)]; //gamma = gamma_set[gamma_dist(gen)]; //dbeta = beta_set[beta_dist(gen)]; //clip_param = clip_set[clip_dist(gen)]; @@ -84,22 +91,83 @@ ModelManager::ModelManager(size_t batch_size, size_t count_players) : ac->to(precision); //ac->normal(0., stdrt); //ac->eval(); - //learning_rate = 2e-5; - opt = std::make_shared(ac->parameters(), learning_rate); - //torch::load(ac, "train\\1723320877699\\models\\best_model.pt"); - //torch::load(*opt, "train\\1723320877699\\models\\best_optimizer.pt"); - scheduler = std::make_shared(*opt, /* mode */ torch::optim::ReduceLROnPlateauScheduler::max, /* factor */ 0.5, /* patience */ 20); + //learning_rate = 1e-6; + //torch::optim::AdamOptions opts(learning_rate); + //opts.weight_decay(weight_decay); + //std::vector options = {torch::optim::AdamOptions(actor_learning_rate), torch::optim::AdamOptions(critic_learning_rate)}; + //std::vector params; + // Create parameter groups + //printf("1\n"); + //torch::optim::OptimizerOptions critic_options; + //critic_options.set_lr(critic_learning_rate); + //torch::optim::AdamOptions options; + //torch::optim::OptimizerParamGroup actor_group(ac->actor_network->parameters()); + //torch::optim::OptimizerParamGroup critic_group(ac->critic_network->parameters()); + //printf("1\n"); + // Create the first parameter group with drive_db_ + //std::vector params1 = {drive_db_}; + + // Initialize the Adam optimizer with the parameter group + std::vector param_groups; + + param_groups.push_back(torch::optim::OptimizerParamGroup({ac->actor_network->parameters()}, + std::make_unique(actor_learning_rate))); + param_groups.push_back(torch::optim::OptimizerParamGroup({ac->critic_network->parameters()}, + std::make_unique(critic_learning_rate))); + param_groups.push_back(torch::optim::OptimizerParamGroup({ac->log_std_}, + std::make_unique(actor_learning_rate))); + + // Set different learning rates for each group + //static_cast(actor_group.options()).lr(actor_learning_rate); + //actor_group.options().set_lr(actor_learning_rate); + //printf("1\n"); + //static_cast(critic_group.options()).lr(critic_learning_rate); + //critic_group.options().set_lr(critic_learning_rate); + //printf("1\n"); + + // Create a vector of parameter groups + //std::vector param_groups = {actor_group, critic_group}; + //printf("1\n"); + + // Create the optimizer with parameter groups + /*torch::optim::Adam optimizer({actor_group, + critic_group});*/ + /*params.push_back(ac->actor_network->parameters()); + params.push_back(ac->critic_network->parameters());*/ + //actor_opt = std::make_shared(ac->actor_parameters(), actor_learning_rate); + //critic_opt = std::make_shared(ac->critic_parameters(), critic_learning_rate); + //opt = std::make_shared(ac->parameters(), learning_rate); + opt = std::make_shared(param_groups); + //torch::load(ac, "train\\1724427150860\\models\\last_model.pt"); + //torch::load(*opt, "train\\1724427150860\\models\\last_optimizer.pt"); + //scheduler = std::make_shared(*opt, /* mode */ torch::optim::ReduceLROnPlateauScheduler::max, /* factor */ 0.5, /* patience */ 20); /*for(auto ¶m_group : opt->param_groups()) { - param_group.options().set_lr(learning_rate); + if(param_group.options().get_lr() == 5e-5) + { + printf("Setting\n"); + param_group.options().set_lr(1e-5); + printf("Setted\n"); + } + + if(param_group.options().get_lr() == 1e-4) + { + printf("Setting\n"); + param_group.options().set_lr(2e-5); + printf("Setted\n"); + } }*/ - cout << "Learning rate: " << learning_rate << " Gamma: " << gamma << " Beta: " << dbeta << " clip_param: " << clip_param << " Epochs: " << ppo_epochs << " Mini batch size: " << mini_batch_size << endl; //Sleep(7000); ac->to(device); - ac->presample_normal(iReplaysPerBot, count_bots); //Sleep(7000); // opt(ac->parameters(), 1e-3); - PPO::Initilize(batch_size, count_bots); + //ac->eval(); + if(ac->is_training()) + { + PPO::Initilize(batch_size, count_bots); + ac->presample_normal(iReplaysPerBot, count_bots); + cout << "Learning rate: " << learning_rate << " Gamma: " << gamma << " Beta: " << dbeta << " clip_param: " << clip_param << " Epochs: " << ppo_epochs << " Mini batch size: " << mini_batch_size << endl; + } //at::cuda::setCurrentCUDAStream(myStream); } @@ -124,11 +192,9 @@ std::vector ModelManager::Decide( //std::memcpy(state.data_ptr(), &(input), sizeof(input)); auto blocks_input_gpu = blocks_input_cpu.to(device, true); auto state_inputs_gpu = state_inputs_cpu.to(device, true); - - //printf("1.1\n"); - auto one_hotted_blocks = torch::one_hot(blocks_input_gpu, 3); + auto one_hotted_blocks = torch::one_hot(blocks_input_gpu, n_grid_channels); //printf("1.2\n"); one_hotted_blocks = one_hotted_blocks.to(precision); //printf("1.3\n"); @@ -139,14 +205,14 @@ std::vector ModelManager::Decide( //states.push_back(state); // Play. //cout << state_forward.sizes() << endl; - at::cuda::getCurrentCUDAStream().synchronize(); + //at::cuda::getCurrentCUDAStream().synchronize(); auto now = std::chrono::high_resolution_clock::now(); time_pre_forward = std::chrono::duration(now - decide_time).count() * 1000.; //std::cout << "Time to allocate and transfer: " << std::chrono::duration(now - decide_time).count() << std::endl; decide_time = std::chrono::high_resolution_clock::now(); auto av = ac->actor_forward(state_forward); - at::cuda::getCurrentCUDAStream().synchronize(); + //at::cuda::getCurrentCUDAStream().synchronize(); now = std::chrono::high_resolution_clock::now(); time_forward = std::chrono::duration(now - decide_time).count() * 1000.; @@ -155,7 +221,7 @@ std::vector ModelManager::Decide( //printf("2.1\n"); av = ac->normal_actor(av); - at::cuda::getCurrentCUDAStream().synchronize(); + //at::cuda::getCurrentCUDAStream().synchronize(); now = std::chrono::high_resolution_clock::now(); time_normal = std::chrono::duration(now - decide_time).count() * 1000.; @@ -216,7 +282,7 @@ std::vector ModelManager::Decide( //tValues = tValues.to(torch::kCPU); //auto now = std::chrono::high_resolution_clock::now(); //std::cout << "Time to .to: " << std::chrono::duration(now - decide_time).count() << std::endl; - at::cuda::getCurrentCUDAStream().synchronize(); + //at::cuda::getCurrentCUDAStream().synchronize(); now = std::chrono::high_resolution_clock::now(); time_to_cpu = std::chrono::duration(now - decide_time).count() * 1000.; decide_time = std::chrono::high_resolution_clock::now(); @@ -515,7 +581,7 @@ void ModelManager::SaveReplays() return; } -void ModelManager::Update(double avg_reward, double& avg_training_loss) +void ModelManager::Update(double avg_reward, double &avg_training_loss, double &avg_actor_loss, double &avg_critic_loss) { // Update. //printf("Updating the network.\n"); @@ -545,20 +611,20 @@ void ModelManager::Update(double avg_reward, double& avg_training_loss) //printf("UPDATING111\n"); try { - avg_training_loss = PPO::update(ac, opt, rewards.size(), ppo_epochs, mini_batch_size, dbeta, gamma, lambda, device, clip_param); + PPO::update(ac, opt, rewards.size(), ppo_epochs, mini_batch_size, dbeta, gamma, lambda, device, avg_training_loss, avg_actor_loss, avg_critic_loss, clip_param); } catch(const std::exception &e) { std::cout << "PPO::update crashed with reason: " << e.what() << std::endl; exit(1); } - scheduler->step(avg_reward); + //scheduler->step(avg_reward); ac->presample_normal(iReplaysPerBot, count_bots); - for(auto &group : opt->param_groups()) + /*for(auto &group : opt->param_groups()) { auto lr = group.options().get_lr(); std::cout << "Current learning rate: " << lr << std::endl; - } + }*/ //printf("UPDATed\n"); //printf("4"); // c = 0; @@ -602,3 +668,8 @@ int64_t ModelManager::GetCountPPOEpochs() { return ppo_epochs; } + +bool ModelManager::IsTraining() +{ + return ac->is_training(); +} diff --git a/src/engine/server/NN/ModelManager.h b/src/engine/server/NN/ModelManager.h index d33ed58..396137b 100644 --- a/src/engine/server/NN/ModelManager.h +++ b/src/engine/server/NN/ModelManager.h @@ -37,7 +37,7 @@ struct ModelInputInputs // Old hook angle according to tee vec2 hook_old_angle; - // Path of the tee + // Path towards the finish containing 30 steps(blocks) vec2 path[30]; }; @@ -81,10 +81,12 @@ struct ModelManager void Reward(float reward, bool done); void SaveReplays(); - void Update(double avg_reward, double &avg_training_loss); + void Update(double avg_reward, double &avg_training_loss, double &avg_actor_loss, double &avg_critic_loss); void Save(std::string filename); + bool IsTraining(); + size_t GetCountOfReplays(); // Return starting learning rate double GetLearningRate(); diff --git a/src/engine/server/NN/Models.h b/src/engine/server/NN/Models.h index f5403f5..b1f677d 100644 --- a/src/engine/server/NN/Models.h +++ b/src/engine/server/NN/Models.h @@ -12,6 +12,7 @@ struct ActorCriticImpl : public torch::nn::Module { int64_t n_in, n_out, used_presamples; + // Actor. //torch::nn::Linear a_lin1_, a_lin2_, /*a_lin3_,*/ a_lin4_; torch::nn::Sequential actor_network; @@ -32,38 +33,64 @@ struct ActorCriticImpl : public torch::nn::Module actor_network(torch::nn::Sequential( torch::nn::Linear(n_in, 2048), torch::nn::ReLU(), - torch::nn::Linear(2048, 1024), + torch::nn::Linear(2048, 1024), torch::nn::ReLU(), //torch::nn::Dropout(0.2), torch::nn::Linear(1024, 512), torch::nn::ReLU(), - //torch::nn::Dropout(0.2), + // torch::nn::Dropout(0.2), torch::nn::Linear(512, 256), - torch::nn::ReLU(), + torch::nn::ReLU(), //torch::nn::Dropout(0.2), torch::nn::Linear(256, 128), torch::nn::ReLU(), - torch::nn::Linear(128, n_out), - torch::nn::Tanh())), + //torch::nn::Dropout(0.2), + //torch::nn::Linear(1024, 1024), + //torch::nn::ReLU(), + // torch::nn::Dropout(0.2), + //torch::nn::Linear(1024, 1024), + //torch::nn::ReLU(), + // torch::nn::Dropout(0.2), + //torch::nn::Linear(1024, 1024), + //torch::nn::ReLU(), + // torch::nn::Dropout(0.2), + //torch::nn::Linear(256, 128), + //torch::nn::ReLU(), + torch::nn::Linear(128, n_out)/*, + torch::nn::Tanh()*/)), mu_(torch::full(n_out, 0.)), log_std_(torch::full(n_out, std)), critic_network(torch::nn::Sequential( - torch::nn::Linear(n_in, 1024), + torch::nn::Linear(n_in, 2048), torch::nn::ReLU(), - torch::nn::Linear(1024, 512), + //torch::nn::Dropout(0.2), + torch::nn::Linear(2048, 1024), torch::nn::ReLU(), //torch::nn::Dropout(0.2), - torch::nn::Linear(512, 256), + torch::nn::Linear(1024, 512), torch::nn::ReLU(), + //torch::nn::Dropout(0.2), + torch::nn::Linear(512, 256), + torch::nn::ReLU(), //torch::nn::Dropout(0.2), torch::nn::Linear(256, 128), torch::nn::ReLU(), - torch::nn::Linear(128, 64), - torch::nn::ReLU(), //torch::nn::Dropout(0.2), - torch::nn::Linear(64, n_out), - torch::nn::Tanh(), - torch::nn::Linear(n_out, 1) + //torch::nn::Linear(1024, 1024), + //torch::nn::ReLU(), + //torch::nn::Dropout(0.2), + //torch::nn::Linear(1024, 1024), + //torch::nn::ReLU(), + //torch::nn::Dropout(0.2), + //torch::nn::Linear(1024, 1024), + //torch::nn::ReLU(), + // torch::nn::Dropout(0.2), + //torch::nn::Linear(128, 64), + //torch::nn::ReLU(), + //torch::nn::Dropout(0.2), + //torch::nn::Linear(64, n_out), + //torch::nn::ReLU(), + torch::nn::Linear(128, 1) )) // Critic @@ -73,6 +100,8 @@ struct ActorCriticImpl : public torch::nn::Module // c_lin4_(torch::nn::Linear(32, n_out)), // c_val_(torch::nn::Linear(n_out, 1)) { + //register_module("conv_layers", conv_layers); + //register_module("scalar_fc_layers", scalar_fc_layers); register_module("actor_network", actor_network); // Register the modules. // register_module("a_lin1", a_lin1_); @@ -81,6 +110,7 @@ struct ActorCriticImpl : public torch::nn::Module //register_module("a_lin4", a_lin4_); register_parameter("log_std", log_std_); register_module("critic_network", critic_network); + // register_module("c_lin1", c_lin1_); // register_module("c_lin2", c_lin2_); @@ -118,6 +148,18 @@ struct ActorCriticImpl : public torch::nn::Module return val; } + // Forward pass. + auto actor_parameters() + { + return actor_network->parameters(); + } + + // Forward pass. + auto critic_parameters() + { + return critic_network->parameters(); + } + // Forward pass. auto normal_actor(torch::Tensor x) -> torch::Tensor { diff --git a/src/engine/server/NN/ProximalPolicyOptimization.h b/src/engine/server/NN/ProximalPolicyOptimization.h index 007ca71..062930f 100644 --- a/src/engine/server/NN/ProximalPolicyOptimization.h +++ b/src/engine/server/NN/ProximalPolicyOptimization.h @@ -30,7 +30,7 @@ class PPO static auto Initilize(size_t batch_size, size_t count_players) -> void; static auto update(ActorCritic& ac, std::shared_ptr &opt, - uint steps, uint epochs, uint mini_batch_size, double beta, float gamma, float lambda, c10::DeviceType device, double clip_param = .2) -> double; + uint steps, uint epochs, uint mini_batch_size, double beta, float gamma, float lambda, c10::DeviceType device, double &avg_training_loss, double &avg_actor_loss, double &avg_critic_loss, double clip_param = .2) -> void; static auto save_replay(torch::Tensor &state, torch::Tensor &action, torch::Tensor &log_prob, @@ -180,10 +180,10 @@ class ReplayBuffer // advantages_concat = torch::cat(rewards, 1); //printf("1\n"); - states_concatenated_reshaped = states_concatenated.reshape({states_concatenated.sizes()[0] * states_concatenated.sizes()[1], states_concatenated.sizes()[2]}); - actions_concat_reshaped = actions_concat.reshape({actions_concat.sizes()[0] * actions_concat.sizes()[1], actions_concat.sizes()[2]}); + states_concatenated_reshaped = states_concatenated.view({states_concatenated.sizes()[0] * states_concatenated.sizes()[1], states_concatenated.sizes()[2]}); + actions_concat_reshaped = actions_concat.view({actions_concat.sizes()[0] * actions_concat.sizes()[1], actions_concat.sizes()[2]}); //printf("1\n"); - log_probs_concat_reshaped = log_probs_concat.reshape({log_probs_concat.sizes()[0] * log_probs_concat.sizes()[1], log_probs_concat.sizes()[2]}); + log_probs_concat_reshaped = log_probs_concat.view({log_probs_concat.sizes()[0] * log_probs_concat.sizes()[1], log_probs_concat.sizes()[2]}); //printf("1\n"); // std::cout << "Rewards size: " << rewards.sizes() << " " << rewards.size(0) << std::endl; //rewards_concat = rewards_concat.reshape({rewards_concat.numel(), 1}); @@ -338,9 +338,9 @@ torch::Tensor calculate_returns(std::vector &rewards, std::vector & { float delta = 0; if(i == rewards.size() - 1) - delta = rewards[i] + gamma * vValues[i] * (1 - dones[i]) - vValues[i]; + delta = (rewards[i] / 200.f) + gamma * vValues[i] * (1 - dones[i]) - vValues[i]; else - delta = rewards[i] + gamma * vValues[i + 1] * (1 - dones[i]) - vValues[i]; + delta = (rewards[i] / 200.f) + gamma * vValues[i + 1] * (1 - dones[i]) - vValues[i]; gae = delta + gamma * lambda * (1 - dones[i]) * gae; // printf("FINNNN1.4\n"); @@ -455,9 +455,11 @@ auto PPO::count_of_replays() -> size_t auto PPO::update(ActorCritic &ac, std::shared_ptr &opt, - uint steps, uint epochs, uint mini_batch_size, double beta, float gamma, float lambda, c10::DeviceType device, double clip_param) -> double + uint steps, uint epochs, uint mini_batch_size, double beta, float gamma, float lambda, c10::DeviceType device, double &avg_training_loss, double &avg_actor_loss, double &avg_critic_loss, double clip_param) -> void { torch::Tensor total_loss_tensor = torch::zeros({}, torch::kCUDA); // Initialize tensor to accumulate loss + torch::Tensor total_actor_loss_tensor = torch::zeros({}, torch::kCUDA); // Initialize tensor to accumulate loss + torch::Tensor total_critic_loss_tensor = torch::zeros({}, torch::kCUDA); // Initialize tensor to accumulate loss { std::deque states, actions, values, log_probs; @@ -466,7 +468,7 @@ auto PPO::update(ActorCritic &ac, // Wait for all log probs to come to cpu at::cuda::getCurrentCUDAStream().synchronize(); //at::cuda::stream_synchronize(at::cuda::getCurrentCUDAStream()); - + //printf("1\n"); //printf("1\n"); for(size_t i = 0; i < replay_buffer->capacity() / mini_batch_size /*&& i < epochs*/; i++) { @@ -486,7 +488,7 @@ auto PPO::update(ActorCritic &ac, torch::Tensor cpy_inputs = state.index({"...", torch::indexing::Slice(0, 78)}); //printf("UPDATING0.3\n"); //std::cout << state.sizes() << std::endl; - torch::Tensor cpy_blocks = torch::one_hot(state.index({"...", torch::indexing::Slice(78, 1167)}).to(torch::kInt64), 3).to(torch::kF32).view({state.size(0), -1}); + torch::Tensor cpy_blocks = torch::one_hot(state.index({"...", torch::indexing::Slice(78, 1167)}).to(torch::kInt64), 3).to(torch::kF32).view({(long long)mini_batch_size, -1}); //printf("UPDATING0.4\n"); auto cpy_state_forward = torch::cat({cpy_inputs, cpy_blocks}, 1); //printf("UPDATING0.5\n"); @@ -508,6 +510,7 @@ auto PPO::update(ActorCritic &ac, //std::cout << mini_batch_size << std::endl; //std::cout << replay_buffer->size() / mini_batch_size << std::endl; //printf("2\n"); + //Sleep(5000); replay_buffer->clear(); //printf("CHECK\n"); @@ -522,6 +525,7 @@ auto PPO::update(ActorCritic &ac, { for(size_t i = 0; i < replay_buffer->capacity() / mini_batch_size; i++) { + //c10::cuda::CUDACachingAllocator::emptyCache(); //auto decide_time = std::chrono::high_resolution_clock::now(); torch::Tensor states_cpy = states[i]; @@ -557,7 +561,7 @@ auto PPO::update(ActorCritic &ac, torch::Tensor cpy_inputs = cpy_sta.index({"...", torch::indexing::Slice(0, 78)}); // printf("UPDATING0.3\n"); - torch::Tensor cpy_blocks = torch::one_hot(cpy_sta.index({"...", torch::indexing::Slice(78, 1167)}).to(torch::kInt64), 3).to(torch::kF32).view({cpy_sta.size(0), -1}); + torch::Tensor cpy_blocks = torch::one_hot(cpy_sta.index({"...", torch::indexing::Slice(78, 1167)}).to(torch::kInt64), 3).to(torch::kF32).view({(long long)mini_batch_size, -1}); // printf("UPDATING0.4\n"); cpy_sta = torch::cat({cpy_inputs, cpy_blocks}, 1); @@ -573,13 +577,15 @@ auto PPO::update(ActorCritic &ac, // printf("UPDATING0.1.3.2\n"); // std::cout << dones_cpy.sizes() << std::endl; // std::cout << dones_cpy << std::endl; - + //printf("3\n"); + //Sleep(7000); + //std::cout << cpy_values << std::endl; auto returnsee = calculate_returns(rewards[i], dones[i], cpy_values, gamma, lambda); //auto now = std::chrono::high_resolution_clock::now(); //std::cout << "Time to prepare: " << (float)(std::chrono::duration_cast(now - decide_time).count()) << std::endl; // std::cout << returnsee.sizes() << std::endl; - // std::cout << returnsee << std::endl; + //std::cout << returnsee << std::endl; // auto decide_time = std::chrono::high_resolution_clock::now(); torch::Tensor cpy_ret = returnsee; // normalize_rewards(returnsee); @@ -603,6 +609,8 @@ auto PPO::update(ActorCritic &ac, // printf("UPDATING1.1\n"); auto action = ac->actor_forward(cpy_sta); + //printf("4\n"); + //Sleep(7000); // printf("33.0\n"); // std::cout << action.sizes() << std::endl; // std::cout << cpy_act.sizes() << std::endl; @@ -625,8 +633,11 @@ auto PPO::update(ActorCritic &ac, // printf("UPDATING1.5.1\n"); auto surr2 = torch::clamp(ratio, 1. - clip_param, 1. + clip_param) * cpy_adv; // printf("UPDATING1.6\n"); - + //printf("4.9\n"); + //Sleep(7000); auto val = ac->critic_forward(cpy_sta); + //printf("5\n"); + //Sleep(7000); auto actor_loss = -torch::min(surr1, surr2).mean(); // printf("UPDATING1.7\n"); auto critic_loss = torch::nn::functional::mse_loss(val, cpy_ret); //(cpy_ret - val).pow(2).mean(); @@ -653,7 +664,11 @@ auto PPO::update(ActorCritic &ac, // printf("UPDATING1.12\n"); // total_loss += loss.item(); + total_actor_loss_tensor += actor_loss; + total_critic_loss_tensor += critic_loss; total_loss_tensor += loss; + //printf("Pre next\n"); + //Sleep(5000); // printf("Chillin\n"); // Sleep(10000); @@ -673,12 +688,14 @@ auto PPO::update(ActorCritic &ac, double avg_loss = 0; //auto decide_time = std::chrono::high_resolution_clock::now(); - avg_loss = total_loss_tensor.item() / (epochs * replay_buffer->capacity() / mini_batch_size); + avg_training_loss = total_loss_tensor.item() / (epochs * replay_buffer->capacity() / mini_batch_size); + avg_actor_loss = total_actor_loss_tensor.item() / (epochs * replay_buffer->capacity() / mini_batch_size); + avg_critic_loss = total_critic_loss_tensor.item() / (epochs * replay_buffer->capacity() / mini_batch_size); //auto now = std::chrono::high_resolution_clock::now(); //std::cout << "Time to calculate loss: " << (float)(std::chrono::duration_cast(now - decide_time).count()) << std::endl; //std::cout << "Average training Loss: " << avg_loss << std::endl; //c10::cuda::CUDACachingAllocator::emptyCache(); - return avg_loss; + return; } diff --git a/src/engine/server/server.cpp b/src/engine/server/server.cpp index fbb07ea..b701cc0 100644 --- a/src/engine/server/server.cpp +++ b/src/engine/server/server.cpp @@ -2991,7 +2991,8 @@ int CServer::Run() dbg_msg("server", "+-------------------------+"); } - int update_tick = 1000; + int skip_tick = 4; + int update_tick = 1000 * skip_tick; std::random_device rd; std::mt19937 gen(rd()); @@ -3047,6 +3048,36 @@ int CServer::Run() AStar astar(pathfinding_grid, vFinishPoses); printf("Pathfinder created.\n"); + printf("Creating train directory with folders...\n"); + + static auto dir_name = to_string(std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch()).count()); + + if(fs_makedir("train") != 0) + { + cout << "Can't make train directory" << endl; + exit(1); + } + + if(fs_makedir((string("train\\") + dir_name).c_str()) != 0) + { + cout << "Can't make dir for this learning directory" << endl; + exit(1); + } + + if(fs_makedir(string("train\\" + dir_name + "\\models").c_str()) != 0) + { + cout << "Can't make models directory" << endl; + exit(1); + } + + if(fs_makedir(string("train\\" + dir_name + "\\demos").c_str()) != 0) + { + cout << "Can't make demos directory" << endl; + exit(1); + } + + printf("Train directory with folders created.\n"); + //std::pair start = {4, 4}; //std::pair goal = {4, 62}; @@ -3134,44 +3165,18 @@ int CServer::Run() vBotBestDistance[i] = {astar.distanceToGoal(spawn_point_pos), 0}; // auto tr = std::thread(RunNNForward, &model_manager, i, &vEvents, &vFinishEvents, &vInputs, &vOutputs); // tr.detach(); + //char aFilename[IO_MAX_PATH_LENGTH]; + //str_format(aFilename, sizeof(aFilename), "%s_%s_%d_%llu.demo", m_aCurrentMap, name.c_str(), m_NetServer.Address().port, time_get()); + //string path_demo = "train/" + dir_name + "/demos/" + aFilename; + //int ret = m_aDemoRecorder[i].Start(Storage(), m_pConsole, path_demo.c_str(), GameServer()->NetVersion(), m_aCurrentMap, &m_aCurrentMapSha256[MAP_TYPE_SIX], m_aCurrentMapCrc[MAP_TYPE_SIX], "server", m_aCurrentMapSize[MAP_TYPE_SIX], m_apCurrentMapData[MAP_TYPE_SIX]); } } printf("Bots added\n"); printf("Initializing neural model...\n"); - ModelManager model_manager(count_bots * update_tick, count_bots); + ModelManager model_manager(count_bots * update_tick / skip_tick, count_bots); printf("Model initialized.\n"); - printf("Creating train directory with folders...\n"); - - static auto dir_name = to_string(std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch()).count()); - - if(fs_makedir("train") != 0) - { - cout << "Can't make train directory" << endl; - exit(1); - } - - if(fs_makedir((string("train\\") + dir_name).c_str()) != 0) - { - cout << "Can't make dir for this learning directory" << endl; - exit(1); - } - - if(fs_makedir(string("train\\" + dir_name + "\\models").c_str()) != 0) - { - cout << "Can't make models directory" << endl; - exit(1); - } - - if(fs_makedir(string("train\\" + dir_name + "\\demos").c_str()) != 0) - { - cout << "Can't make demos directory" << endl; - exit(1); - } - - printf("Train directory with folders created.\n"); - printf("Creating data.csv file for statistics...\n"); std::ofstream logger; { @@ -3183,7 +3188,7 @@ int CServer::Run() std::cout << update_tick << std::endl;*/ sprintf_s(aFilename, sizeof(aFilename), "lr%.1embs%lldppoe%lldbots%drpb%d.csv", model_manager.GetLearningRate(), model_manager.GetMiniBatchSize(), model_manager.GetCountPPOEpochs(), count_bots, update_tick); logger.open("train\\" + dir_name + "\\" + aFilename); - logger << "Step,Average reward,TPS,Dies,Average distance,Training loss,Learning rate,Time since start,Time to decide,Time to tick,Time rest,Time pre forward,Time forward,Time normal,Time to cpu,Time process last" << endl; + logger << "Step,Average reward,TPS,Dies,Average distance,Training loss,Actor loss,Critic loss,Learning rate,Time since start,Time to decide,Time to tick,Time rest,Time pre forward,Time forward,Time normal,Time to cpu,Time process last" << endl; } printf("data.csv file created and initialized.\n"); @@ -3192,7 +3197,7 @@ int CServer::Run() char aFilename[IO_MAX_PATH_LENGTH]; str_format(aFilename, sizeof(aFilename), "%s_%d_%llu.demo", m_aCurrentMap, m_NetServer.Address().port, time_get()); path_demo = "train/" + dir_name + "/demos/" + aFilename; - int ret = m_aDemoRecorder[MAX_CLIENTS].Start(Storage(), m_pConsole, path_demo.c_str(), GameServer()->NetVersion(), m_aCurrentMap, &m_aCurrentMapSha256[MAP_TYPE_SIX], m_aCurrentMapCrc[MAP_TYPE_SIX], "server", m_aCurrentMapSize[MAP_TYPE_SIX], m_apCurrentMapData[MAP_TYPE_SIX]); + int ret = m_aDemoRecorder[0].Start(Storage(), m_pConsole, path_demo.c_str(), GameServer()->NetVersion(), m_aCurrentMap, &m_aCurrentMapSha256[MAP_TYPE_SIX], m_aCurrentMapCrc[MAP_TYPE_SIX], "server", m_aCurrentMapSize[MAP_TYPE_SIX], m_apCurrentMapData[MAP_TYPE_SIX]); } // start game @@ -3351,6 +3356,7 @@ int CServer::Run() static double cumulative_time_process_last = 0; // Handle bots + if(m_CurrentGameTick % skip_tick == 0) { auto gamelayer = gamecontext->Layers()->GameLayer(); const CTile *pTiles = static_cast(Kernel()->RequestInterface()->GetData(gamelayer->m_Data)); @@ -3363,7 +3369,7 @@ int CServer::Run() // Rewards static float checkpoint_reward = 100.f / 32.f; - static float die_reward = -500.f / 32.f; // -100.f / 32.f + static float die_reward = -250.f / 32.f; // -500.f / 32.f static float finish_reward = 1000.f / 32.f; static float step_reward = -0.01f; @@ -3418,7 +3424,19 @@ int CServer::Run() } else if(finished) { + //m_aDemoRecorder[bot->GetCID()].Stop(); + + /*char aNewFilename[IO_MAX_PATH_LENGTH]; + str_format(aNewFilename, sizeof(aNewFilename), "average_dist_%.2f_rew_%.2f_%s_%llu.demo", avg_dist, avg_reward, m_aCurrentMap, time_get_impl()); + path_demo = "train/" + dir_name + "/demos/" + aNewFilename; + Storage()->RenameFile(m_aDemoRecorder[bot->GetCID()].GetCurrentFilename(), path_demo.c_str(), IStorage::TYPE_ABSOLUTE);*/ + bot->KillCharacter(); + + /*char aFilename[IO_MAX_PATH_LENGTH]; + str_format(aFilename, sizeof(aFilename), "%s_%s_%d_%llu.demo", m_aCurrentMap, name.c_str(), m_NetServer.Address().port, time_get()); + string path_demo = "train/" + dir_name + "/demos/" + aFilename; + int ret = m_aDemoRecorder[i].Start(Storage(), m_pConsole, path_demo.c_str(), GameServer()->NetVersion(), m_aCurrentMap, &m_aCurrentMapSha256[MAP_TYPE_SIX], m_aCurrentMapCrc[MAP_TYPE_SIX], "server", m_aCurrentMapSize[MAP_TYPE_SIX], m_apCurrentMapData[MAP_TYPE_SIX]);*/ } //decide_time = time_get_impl(); @@ -3500,7 +3518,9 @@ int CServer::Run() } - auto long_stay_penalty = -0.0003f * (m_CurrentGameTick - vBotBestDistance[i].second); + auto tick_diff = m_CurrentGameTick - vBotBestDistance[i].second; + + auto long_stay_penalty = -0.0003f * tick_diff; //cout << reward << endl; @@ -3569,7 +3589,7 @@ int CServer::Run() float avg_dist = ((float)moved_distance / (float)(dies + count_bots)); rewards.clear(); - if(m_CurrentGameTick % 20000 == 0) + if(m_CurrentGameTick % (20000 * skip_tick) == 0 && model_manager.IsTraining()) { printf("UPDATING\n"); std::vector vAverageDistancePerSpawn(vSpawnCumulativeReward.size()); @@ -3628,9 +3648,9 @@ int CServer::Run() }*/ } - auto demo_recorder = &m_aDemoRecorder[MAX_CLIENTS]; + auto demo_recorder = &m_aDemoRecorder[0]; - if(demo_recorder->IsRecording()) + if(demo_recorder->IsRecording() && model_manager.IsTraining()) { demo_recorder->Stop(); char aNewFilename[IO_MAX_PATH_LENGTH]; @@ -3639,12 +3659,12 @@ int CServer::Run() Storage()->RenameFile(demo_recorder->GetCurrentFilename(), path_demo.c_str(), IStorage::TYPE_ABSOLUTE); } //printf("111\n"); - if(m_CurrentGameTick % 20000 == 0) + if(m_CurrentGameTick % (20000 * skip_tick) == 0 && model_manager.IsTraining()) { model_manager.Save("train\\" + dir_name + "\\models\\last"); } //printf("222\n"); - if(avg_dist > best_average) + if(avg_dist > best_average && model_manager.IsTraining()) { best_average = avg_dist; model_manager.Save("train\\" + dir_name + "\\models\\best"); // best" + to_string(average) @@ -3670,15 +3690,19 @@ int CServer::Run() //printf("ret: %i\n", ret); //printf("start_u\n"); //int64_t update_time = time_get_impl(); - double avg_loss = 0; - model_manager.Update(avg_dist, avg_loss); + double avg_training_loss = 0; + double avg_actor_loss = 0; + double avg_critic_loss = 0; + model_manager.Update(avg_dist, avg_training_loss, avg_actor_loss, avg_critic_loss); //cout << "Time update: " << (float)(time_get_impl() - decide_time) / (float)time_freq() << endl; logger << m_CurrentGameTick / update_tick << "," << avg_reward << "," << ticks_per_second << "," << dies << "," << avg_dist - << "," << avg_loss + << "," << avg_training_loss + << "," << avg_actor_loss + << "," << avg_critic_loss << "," << model_manager.GetCurrentLearningRate() << "," << (float)time_get_impl() / (float)time_freq() << "," << (cumulative_time_to_decide / (float)update_tick) @@ -3690,7 +3714,7 @@ int CServer::Run() << "," << (cumulative_time_to_cpu / (float)update_tick) << "," << (cumulative_time_process_last / (float)update_tick) << endl; - cout << "Avg. reward: " << avg_reward << " TPS: " << ticks_per_second << " Avg. Training Loss: " << avg_loss + cout << "Avg. reward: " << avg_reward << " TPS: " << ticks_per_second << " Avg. Training Loss: " << avg_training_loss << " Dies: " << dies << " Avg. distance: " << avg_dist << endl; dies \ = moved_distance \ @@ -3707,12 +3731,12 @@ int CServer::Run() = 0; //cout << "Time to update: " << (float)(time_get_impl() - update_time) / (float)time_freq() << endl; //printf("end\n"); - if(m_CurrentGameTick % 20000 == 0) + if(m_CurrentGameTick % 20000 == 0 && model_manager.IsTraining()) { char aFilename[IO_MAX_PATH_LENGTH]; str_format(aFilename, sizeof(aFilename), "%s_%d_%llu.demo", m_aCurrentMap, m_NetServer.Address().port, time_get()); path_demo = "train\\" + dir_name + "\\demos\\" + aFilename; - //int ret = demo_recorder->Start(Storage(), m_pConsole, path_demo.c_str(), GameServer()->NetVersion(), m_aCurrentMap, &m_aCurrentMapSha256[MAP_TYPE_SIX], m_aCurrentMapCrc[MAP_TYPE_SIX], "server", m_aCurrentMapSize[MAP_TYPE_SIX], m_apCurrentMapData[MAP_TYPE_SIX]); + int ret = demo_recorder->Start(Storage(), m_pConsole, path_demo.c_str(), GameServer()->NetVersion(), m_aCurrentMap, &m_aCurrentMapSha256[MAP_TYPE_SIX], m_aCurrentMapCrc[MAP_TYPE_SIX], "server", m_aCurrentMapSize[MAP_TYPE_SIX], m_apCurrentMapData[MAP_TYPE_SIX]); } decide_time = std::chrono::high_resolution_clock::now(); } @@ -3731,7 +3755,7 @@ int CServer::Run() auto bot_character_core = bot_character->Core(); // auto bot_2_character = gamecontext->GetPlayerChar(bot_2->GetCID()); //printf("HAHHA3.3\n"); - if(m_CurrentGameTick != 0 && m_CurrentGameTick % update_tick == 0) + if(m_CurrentGameTick != 0 && m_CurrentGameTick % update_tick == 0 && model_manager.IsTraining()) { // Add to cumulative spawn distance vector //int iOldSpawnPoint = vBotsSpawnPos[i]; @@ -3990,28 +4014,29 @@ int CServer::Run() // //printf(buf); //} //printf("HAHHA4\n"); + started = true; + auto now = std::chrono::high_resolution_clock::now(); + cumulative_time_rest += std::chrono::duration_cast>(now - decide_time).count() * 1000.f; + // cout << "Time rest: " << std::chrono::duration_cast>(now - decide_time).count() << endl; + double time_pre_forward = 0; + double time_forward = 0; + double time_normal = 0; + double time_to_cpu = 0; + double time_process_last = 0; + decide_time = std::chrono::high_resolution_clock::now(); + vOutputs = model_manager.Decide(vInputInputs, vInputBlocks, time_pre_forward, time_forward, time_normal, time_to_cpu, time_process_last); + now = std::chrono::high_resolution_clock::now(); + cumulative_time_to_decide += std::chrono::duration_cast>(now - decide_time).count() * 1000.f; + cumulative_time_pre_forward += time_pre_forward; + cumulative_time_forward += time_forward; + cumulative_time_normal += time_normal; + cumulative_time_to_cpu += time_to_cpu; + cumulative_time_process_last += time_process_last; + // cout << "Time to decide: " << std::chrono::duration_cast>(now - decide_time).count() << endl; + decide_time = std::chrono::high_resolution_clock::now(); } //printf("13\n"); - started = true; - auto now = std::chrono::high_resolution_clock::now(); - cumulative_time_rest += std::chrono::duration_cast>(now - decide_time).count() * 1000.f; - //cout << "Time rest: " << std::chrono::duration_cast>(now - decide_time).count() << endl; - double time_pre_forward = 0; - double time_forward = 0; - double time_normal = 0; - double time_to_cpu = 0; - double time_process_last = 0; - decide_time = std::chrono::high_resolution_clock::now(); - vOutputs = model_manager.Decide(vInputInputs, vInputBlocks, time_pre_forward, time_forward, time_normal, time_to_cpu, time_process_last); - now = std::chrono::high_resolution_clock::now(); - cumulative_time_to_decide += std::chrono::duration_cast>(now - decide_time).count() * 1000.f; - cumulative_time_pre_forward += time_pre_forward; - cumulative_time_forward += time_forward; - cumulative_time_normal += time_normal; - cumulative_time_to_cpu += time_to_cpu; - cumulative_time_process_last += time_process_last; - //cout << "Time to decide: " << std::chrono::duration_cast>(now - decide_time).count() << endl; - decide_time = std::chrono::high_resolution_clock::now(); + //printf("KEK\n"); //printf("14\n"); for(int c = 0; c < MAX_CLIENTS; c++) @@ -4089,7 +4114,15 @@ int CServer::Run() model_angle = returned_model.angle * 299.f; model_direction = returned_model.direction; model_hook = returned_model.hook; - model_jump = returned_model.jump; + + if(m_CurrentGameTick % skip_tick != 1) + { + model_jump = 0; + } + else + { + model_jump = returned_model.jump; + } // printf("HAHHA33.3\n"); // printf("HAHHA34\n"); // static ModelManager model_manager; @@ -4265,7 +4298,7 @@ int CServer::Run() break; } //printf("21\n"); - now = std::chrono::high_resolution_clock::now(); + auto now = std::chrono::high_resolution_clock::now(); cumulative_time_to_tick += std::chrono::duration_cast>(now - decide_time).count() * 1000.f; //cout << "Time to tick: " << (float)(now - decide_time) / (float)time_freq() << endl; decide_time = std::chrono::high_resolution_clock::now(); @@ -4285,7 +4318,7 @@ int CServer::Run() // snap game if(NewTicks) { - if((Config()->m_SvHighBandwidth || (m_CurrentGameTick % 2) == 0) && m_aDemoRecorder[MAX_CLIENTS].IsRecording()) + if((Config()->m_SvHighBandwidth || (m_CurrentGameTick % 2) == 0) && m_aDemoRecorder[0].IsRecording()) DoSnapshot(); UpdateClientRconCommands(); diff --git a/src/game/server/player.cpp b/src/game/server/player.cpp index 7678d27..e73a7a0 100644 --- a/src/game/server/player.cpp +++ b/src/game/server/player.cpp @@ -247,8 +247,8 @@ void CPlayer::Tick() m_pCharacter = 0; } } - else if(m_Spawning && !m_WeakHookSpawn) - TryRespawn(); + /*else if(m_Spawning && !m_WeakHookSpawn) + TryRespawn();*/ } else { @@ -307,8 +307,8 @@ void CPlayer::PostPostTick() if(!Server()->ClientIngame(m_ClientID)) return; - if(!GameServer()->m_World.m_Paused && !m_pCharacter && m_Spawning && m_WeakHookSpawn) - TryRespawn(); + /*if(!GameServer()->m_World.m_Paused && !m_pCharacter && m_Spawning && m_WeakHookSpawn) + TryRespawn();*/ } void CPlayer::Snap(int SnappingClient)