diff --git a/src/engine/server/NN/ModelManager.cpp b/src/engine/server/NN/ModelManager.cpp
index 5504904..9d9a513 100644
--- a/src/engine/server/NN/ModelManager.cpp
+++ b/src/engine/server/NN/ModelManager.cpp
@@ -11,9 +11,14 @@
 #include <torch/optim/schedulers/reduce_on_plateau_scheduler.h>
 
 int64_t n_in = 3345; // 78 + 1089 * 3
+int64_t n_scalar_in = 78;
+int64_t n_grid_channels = 3;
 int64_t n_out = 9;
 double stdrt = 2e-2;
-double learning_rate = 5e-5; // Default: 1e-3
+double learning_rate = 3e-5; // Default: 5e-5
+double actor_learning_rate = 5e-5; // Default: 5e-5
+double critic_learning_rate = 2e-4; // Default: 1e-4
+//double weight_decay = 0.0001;
 
 int64_t mini_batch_size = 8000; // 4096, 8192, 16384, 32768
 int64_t ppo_epochs = 2; // Default: 4
@@ -24,6 +29,8 @@ float lambda = 0.95f;
 
 ActorCritic ac(n_in, n_out, stdrt);
 std::shared_ptr<torch::optim::Adam> opt; //(ac->parameters(), 1e-2);
+//std::shared_ptr<torch::optim::Adam> actor_opt;
+//std::shared_ptr<torch::optim::Adam> ocritic;
 std::shared_ptr<torch::optim::ReduceLROnPlateauScheduler> scheduler;
 
 VT states;
@@ -63,7 +70,7 @@ void generate_random_hyperparameters()
 	std::uniform_int_distribution<> epochs_dist(0, epochs_set.size() - 1); // Epochs range
 	std::uniform_int_distribution<> mini_batch_size_dist(0, mini_batch_sizes_set.size() - 1); // Batch size range
 
-	learning_rate = lr_set[lr_dist(gen)];
+	//learning_rate = lr_set[lr_dist(gen)];
 	//gamma = gamma_set[gamma_dist(gen)];
 	//dbeta = beta_set[beta_dist(gen)];
 	//clip_param = clip_set[clip_dist(gen)];
@@ -84,22 +91,83 @@ ModelManager::ModelManager(size_t batch_size, size_t count_players) :
 	ac->to(precision);
 	//ac->normal(0., stdrt);
 	//ac->eval();
-	//learning_rate = 2e-5;
-	opt = std::make_shared<torch::optim::Adam>(ac->parameters(), learning_rate);
-	//torch::load(ac, "train\\1723320877699\\models\\best_model.pt");
-	//torch::load(*opt, "train\\1723320877699\\models\\best_optimizer.pt");
-	scheduler = std::make_shared<torch::optim::ReduceLROnPlateauScheduler>(*opt, /* mode */ torch::optim::ReduceLROnPlateauScheduler::max, /* factor */ 0.5, /* patience */ 20);
+	//learning_rate = 1e-6;
+	//torch::optim::AdamOptions opts(learning_rate);
+	//opts.weight_decay(weight_decay);
+	//std::vector<torch::optim::AdamOptions> options = {torch::optim::AdamOptions(actor_learning_rate), torch::optim::AdamOptions(critic_learning_rate)};
+	//std::vector<torch::optim::OptimizerParamGroup> params;
+	// Create parameter groups
+	//printf("1\n");
+	//torch::optim::OptimizerOptions critic_options;
+	//critic_options.set_lr(critic_learning_rate);
+	//torch::optim::AdamOptions options;
+	//torch::optim::OptimizerParamGroup actor_group(ac->actor_network->parameters());
+	//torch::optim::OptimizerParamGroup critic_group(ac->critic_network->parameters());
+	//printf("1\n");
+	// Create the first parameter group with drive_db_
+	//std::vector<torch::Tensor> params1 = {drive_db_};
+
+	// Initialize the Adam optimizer with the parameter group
+	std::vector<torch::optim::OptimizerParamGroup> param_groups;
+
+	param_groups.push_back(torch::optim::OptimizerParamGroup({ac->actor_network->parameters()},
+							std::make_unique<torch::optim::AdamOptions>(actor_learning_rate)));
+	param_groups.push_back(torch::optim::OptimizerParamGroup({ac->critic_network->parameters()},
+							std::make_unique<torch::optim::AdamOptions>(critic_learning_rate)));
+	param_groups.push_back(torch::optim::OptimizerParamGroup({ac->log_std_},
+		std::make_unique<torch::optim::AdamOptions>(actor_learning_rate)));
+
+	// Set different learning rates for each group
+	//static_cast<torch::optim::AdamOptions &>(actor_group.options()).lr(actor_learning_rate);
+	//actor_group.options().set_lr(actor_learning_rate);
+	//printf("1\n");
+	//static_cast<torch::optim::AdamOptions &>(critic_group.options()).lr(critic_learning_rate);
+	//critic_group.options().set_lr(critic_learning_rate);
+	//printf("1\n");
+
+	// Create a vector of parameter groups
+	//std::vector<torch::optim::OptimizerParamGroup> param_groups = {actor_group, critic_group};
+	//printf("1\n");
+
+	// Create the optimizer with parameter groups
+	/*torch::optim::Adam optimizer({actor_group,
+		critic_group});*/
+	/*params.push_back(ac->actor_network->parameters());
+	params.push_back(ac->critic_network->parameters());*/
+	//actor_opt = std::make_shared<torch::optim::Adam>(ac->actor_parameters(), actor_learning_rate);
+	//critic_opt = std::make_shared<torch::optim::Adam>(ac->critic_parameters(), critic_learning_rate);
+	//opt = std::make_shared<torch::optim::Adam>(ac->parameters(), learning_rate);
+	opt = std::make_shared<torch::optim::Adam>(param_groups);
+	//torch::load(ac, "train\\1724427150860\\models\\last_model.pt");
+	//torch::load(*opt, "train\\1724427150860\\models\\last_optimizer.pt");
+	//scheduler = std::make_shared<torch::optim::ReduceLROnPlateauScheduler>(*opt, /* mode */ torch::optim::ReduceLROnPlateauScheduler::max, /* factor */ 0.5, /* patience */ 20);
 	/*for(auto &param_group : opt->param_groups())
 	{
-		param_group.options().set_lr(learning_rate);
+		if(param_group.options().get_lr() == 5e-5)
+		{
+			printf("Setting\n");
+			param_group.options().set_lr(1e-5);
+			printf("Setted\n");
+		}
+
+		if(param_group.options().get_lr() == 1e-4)
+		{
+			printf("Setting\n");
+			param_group.options().set_lr(2e-5);
+			printf("Setted\n");
+		}
 	}*/
-	cout << "Learning rate: " << learning_rate << " Gamma: " << gamma << " Beta: " << dbeta << " clip_param: " << clip_param << " Epochs: " << ppo_epochs << " Mini batch size: " << mini_batch_size << endl;
 	//Sleep(7000);
 	ac->to(device);
-	ac->presample_normal(iReplaysPerBot, count_bots);
 	//Sleep(7000);
 	// opt(ac->parameters(), 1e-3);
-	PPO::Initilize(batch_size, count_bots);
+	//ac->eval();
+	if(ac->is_training())
+	{
+		PPO::Initilize(batch_size, count_bots);
+		ac->presample_normal(iReplaysPerBot, count_bots);
+		cout << "Learning rate: " << learning_rate << " Gamma: " << gamma << " Beta: " << dbeta << " clip_param: " << clip_param << " Epochs: " << ppo_epochs << " Mini batch size: " << mini_batch_size << endl;
+	}
 	//at::cuda::setCurrentCUDAStream(myStream);
 }
 
@@ -124,11 +192,9 @@ std::vector<ModelOutput> ModelManager::Decide(
 	//std::memcpy(state.data_ptr(), &(input), sizeof(input));
 	auto blocks_input_gpu = blocks_input_cpu.to(device, true);
 	auto state_inputs_gpu = state_inputs_cpu.to(device, true);
-
-	
 	
 	//printf("1.1\n");
-	auto one_hotted_blocks = torch::one_hot(blocks_input_gpu, 3);
+	auto one_hotted_blocks = torch::one_hot(blocks_input_gpu, n_grid_channels);
 	//printf("1.2\n");
 	one_hotted_blocks = one_hotted_blocks.to(precision);
 	//printf("1.3\n");
@@ -139,14 +205,14 @@ std::vector<ModelOutput> ModelManager::Decide(
 	//states.push_back(state);
 	//  Play.
 	//cout << state_forward.sizes() << endl;
-	at::cuda::getCurrentCUDAStream().synchronize();
+	//at::cuda::getCurrentCUDAStream().synchronize();
 
 	auto now = std::chrono::high_resolution_clock::now();
 	time_pre_forward = std::chrono::duration<double>(now - decide_time).count() * 1000.;
 	//std::cout << "Time to allocate and transfer: " << std::chrono::duration<double>(now - decide_time).count() << std::endl;
 	decide_time = std::chrono::high_resolution_clock::now();
 	auto av = ac->actor_forward(state_forward);
-	at::cuda::getCurrentCUDAStream().synchronize();
+	//at::cuda::getCurrentCUDAStream().synchronize();
 
 	now = std::chrono::high_resolution_clock::now();
 	time_forward = std::chrono::duration<double>(now - decide_time).count() * 1000.;
@@ -155,7 +221,7 @@ std::vector<ModelOutput> ModelManager::Decide(
 	//printf("2.1\n");
 
 	av = ac->normal_actor(av);
-	at::cuda::getCurrentCUDAStream().synchronize();
+	//at::cuda::getCurrentCUDAStream().synchronize();
 
 	now = std::chrono::high_resolution_clock::now();
 	time_normal = std::chrono::duration<double>(now - decide_time).count() * 1000.;
@@ -216,7 +282,7 @@ std::vector<ModelOutput> ModelManager::Decide(
 	//tValues = tValues.to(torch::kCPU);
 	//auto now = std::chrono::high_resolution_clock::now();
 	//std::cout << "Time to .to: " << std::chrono::duration<double>(now - decide_time).count() << std::endl;
-	at::cuda::getCurrentCUDAStream().synchronize();
+	//at::cuda::getCurrentCUDAStream().synchronize();
 	now = std::chrono::high_resolution_clock::now();
 	time_to_cpu = std::chrono::duration<double>(now - decide_time).count() * 1000.;
 	decide_time = std::chrono::high_resolution_clock::now();
@@ -515,7 +581,7 @@ void ModelManager::SaveReplays()
 	return;
 }
 
-void ModelManager::Update(double avg_reward, double& avg_training_loss)
+void ModelManager::Update(double avg_reward, double &avg_training_loss, double &avg_actor_loss, double &avg_critic_loss)
 {
 	// Update.
 	//printf("Updating the network.\n");
@@ -545,20 +611,20 @@ void ModelManager::Update(double avg_reward, double& avg_training_loss)
 	//printf("UPDATING111\n");
 	try
 	{
-		avg_training_loss = PPO::update(ac, opt, rewards.size(), ppo_epochs, mini_batch_size, dbeta, gamma, lambda, device, clip_param);
+		PPO::update(ac, opt, rewards.size(), ppo_epochs, mini_batch_size, dbeta, gamma, lambda, device, avg_training_loss, avg_actor_loss, avg_critic_loss, clip_param);
 	}
 	catch(const std::exception &e)
 	{
 		std::cout << "PPO::update crashed with reason: " << e.what() << std::endl;
 		exit(1);
 	}
-	scheduler->step(avg_reward);
+	//scheduler->step(avg_reward);
 	ac->presample_normal(iReplaysPerBot, count_bots);
-	for(auto &group : opt->param_groups())
+	/*for(auto &group : opt->param_groups())
 	{
 		auto lr = group.options().get_lr();
 		std::cout << "Current learning rate: " << lr << std::endl;
-	}
+	}*/
 	//printf("UPDATed\n");
 	//printf("4");
 	// c = 0;
@@ -602,3 +668,8 @@ int64_t ModelManager::GetCountPPOEpochs()
 {
 	return ppo_epochs;
 }
+
+bool ModelManager::IsTraining()
+{
+	return ac->is_training();
+}
diff --git a/src/engine/server/NN/ModelManager.h b/src/engine/server/NN/ModelManager.h
index d33ed58..396137b 100644
--- a/src/engine/server/NN/ModelManager.h
+++ b/src/engine/server/NN/ModelManager.h
@@ -37,7 +37,7 @@ struct ModelInputInputs
 	// Old hook angle according to tee
 	vec2 hook_old_angle;
 
-	// Path of the tee
+	// Path towards the finish containing 30 steps(blocks)
 	vec2 path[30]; 
 };
 
@@ -81,10 +81,12 @@ struct ModelManager
 	void Reward(float reward, bool done);
 	void SaveReplays();
 
-	void Update(double avg_reward, double &avg_training_loss);
+	void Update(double avg_reward, double &avg_training_loss, double &avg_actor_loss, double &avg_critic_loss);
 
 	void Save(std::string filename);
 
+	bool IsTraining();
+
 	size_t GetCountOfReplays();
 	// Return starting learning rate
 	double GetLearningRate();
diff --git a/src/engine/server/NN/Models.h b/src/engine/server/NN/Models.h
index f5403f5..b1f677d 100644
--- a/src/engine/server/NN/Models.h
+++ b/src/engine/server/NN/Models.h
@@ -12,6 +12,7 @@
 struct ActorCriticImpl : public torch::nn::Module 
 {
 	int64_t n_in, n_out, used_presamples;
+
     // Actor.
 	//torch::nn::Linear a_lin1_, a_lin2_, /*a_lin3_,*/ a_lin4_;
     torch::nn::Sequential actor_network;
@@ -32,38 +33,64 @@ struct ActorCriticImpl : public torch::nn::Module
 	    actor_network(torch::nn::Sequential(
 		    torch::nn::Linear(n_in, 2048),
 		    torch::nn::ReLU(),
-		    torch::nn::Linear(2048, 1024),
+			torch::nn::Linear(2048, 1024),
 		    torch::nn::ReLU(),
 		    //torch::nn::Dropout(0.2),
 		    torch::nn::Linear(1024, 512),
 		    torch::nn::ReLU(),
-		    //torch::nn::Dropout(0.2),
+		    // torch::nn::Dropout(0.2),
 		    torch::nn::Linear(512, 256),
-		    torch::nn::ReLU(),
+			torch::nn::ReLU(),
 		    //torch::nn::Dropout(0.2),
 		    torch::nn::Linear(256, 128),
 		    torch::nn::ReLU(),
-		    torch::nn::Linear(128, n_out),
-			torch::nn::Tanh())),
+		    //torch::nn::Dropout(0.2),
+		    //torch::nn::Linear(1024, 1024),
+			//torch::nn::ReLU(),
+		    // torch::nn::Dropout(0.2),
+		    //torch::nn::Linear(1024, 1024),
+			//torch::nn::ReLU(),
+		    // torch::nn::Dropout(0.2),
+		    //torch::nn::Linear(1024, 1024),
+			//torch::nn::ReLU(),
+		    // torch::nn::Dropout(0.2),
+		    //torch::nn::Linear(256, 128),
+		    //torch::nn::ReLU(),
+		    torch::nn::Linear(128, n_out)/*,
+			torch::nn::Tanh()*/)),
           mu_(torch::full(n_out, 0.)),
           log_std_(torch::full(n_out, std)),
 	    critic_network(torch::nn::Sequential(
-		    torch::nn::Linear(n_in, 1024),
+			torch::nn::Linear(n_in, 2048),
 		    torch::nn::ReLU(),
-		    torch::nn::Linear(1024, 512),
+			//torch::nn::Dropout(0.2),
+		    torch::nn::Linear(2048, 1024),
 		    torch::nn::ReLU(),
 		    //torch::nn::Dropout(0.2),
-		    torch::nn::Linear(512, 256),
+		    torch::nn::Linear(1024, 512),
 		    torch::nn::ReLU(),
+			//torch::nn::Dropout(0.2),
+		    torch::nn::Linear(512, 256),
+			torch::nn::ReLU(),
 		    //torch::nn::Dropout(0.2),
 		    torch::nn::Linear(256, 128),
 		    torch::nn::ReLU(),
-		    torch::nn::Linear(128, 64),
-		    torch::nn::ReLU(),
 		    //torch::nn::Dropout(0.2),
-		    torch::nn::Linear(64, n_out),
-		    torch::nn::Tanh(),
-		    torch::nn::Linear(n_out, 1)
+		    //torch::nn::Linear(1024, 1024),
+			//torch::nn::ReLU(),
+		    //torch::nn::Dropout(0.2),
+		    //torch::nn::Linear(1024, 1024),
+			//torch::nn::ReLU(),
+		    //torch::nn::Dropout(0.2),
+		    //torch::nn::Linear(1024, 1024),
+		    //torch::nn::ReLU(),
+		    // torch::nn::Dropout(0.2),
+		    //torch::nn::Linear(128, 64),
+		    //torch::nn::ReLU(),
+		    //torch::nn::Dropout(0.2),
+		    //torch::nn::Linear(64, n_out),
+			//torch::nn::ReLU(),
+		    torch::nn::Linear(128, 1)
         ))
           
           // Critic
@@ -73,6 +100,8 @@ struct ActorCriticImpl : public torch::nn::Module
 	   // c_lin4_(torch::nn::Linear(32, n_out)),
     //      c_val_(torch::nn::Linear(n_out, 1)) 
     {
+	    //register_module("conv_layers", conv_layers);
+	    //register_module("scalar_fc_layers", scalar_fc_layers);
 	    register_module("actor_network", actor_network);
         // Register the modules.
      //   register_module("a_lin1", a_lin1_);
@@ -81,6 +110,7 @@ struct ActorCriticImpl : public torch::nn::Module
 	    //register_module("a_lin4", a_lin4_);
         register_parameter("log_std", log_std_);
 	    register_module("critic_network", critic_network);
+	
 
      //   register_module("c_lin1", c_lin1_);
      //   register_module("c_lin2", c_lin2_);
@@ -118,6 +148,18 @@ struct ActorCriticImpl : public torch::nn::Module
 	    return val;
     }
 
+	// Forward pass.
+    auto actor_parameters()
+    {
+	    return actor_network->parameters();
+    }
+
+	// Forward pass.
+    auto critic_parameters()
+    {
+	    return critic_network->parameters();
+    }
+
 	// Forward pass.
     auto normal_actor(torch::Tensor x) -> torch::Tensor
     {
diff --git a/src/engine/server/NN/ProximalPolicyOptimization.h b/src/engine/server/NN/ProximalPolicyOptimization.h
index 007ca71..062930f 100644
--- a/src/engine/server/NN/ProximalPolicyOptimization.h
+++ b/src/engine/server/NN/ProximalPolicyOptimization.h
@@ -30,7 +30,7 @@ class PPO
 	static auto Initilize(size_t batch_size, size_t count_players) -> void;
     static auto update(ActorCritic& ac,
 	    std::shared_ptr<torch::optim::Adam> &opt, 
-                       uint steps, uint epochs, uint mini_batch_size, double beta, float gamma, float lambda, c10::DeviceType device, double clip_param = .2) -> double;
+                       uint steps, uint epochs, uint mini_batch_size, double beta, float gamma, float lambda, c10::DeviceType device, double &avg_training_loss, double &avg_actor_loss, double &avg_critic_loss, double clip_param = .2) -> void;
     static auto save_replay(torch::Tensor &state,
 	    torch::Tensor &action,
 	    torch::Tensor &log_prob,
@@ -180,10 +180,10 @@ class ReplayBuffer
 			//  advantages_concat = torch::cat(rewards, 1);
 			//printf("1\n");
 
-			states_concatenated_reshaped = states_concatenated.reshape({states_concatenated.sizes()[0] * states_concatenated.sizes()[1], states_concatenated.sizes()[2]});
-			actions_concat_reshaped = actions_concat.reshape({actions_concat.sizes()[0] * actions_concat.sizes()[1], actions_concat.sizes()[2]});
+			states_concatenated_reshaped = states_concatenated.view({states_concatenated.sizes()[0] * states_concatenated.sizes()[1], states_concatenated.sizes()[2]});
+			actions_concat_reshaped = actions_concat.view({actions_concat.sizes()[0] * actions_concat.sizes()[1], actions_concat.sizes()[2]});
 			//printf("1\n");
-			log_probs_concat_reshaped = log_probs_concat.reshape({log_probs_concat.sizes()[0] * log_probs_concat.sizes()[1], log_probs_concat.sizes()[2]});
+			log_probs_concat_reshaped = log_probs_concat.view({log_probs_concat.sizes()[0] * log_probs_concat.sizes()[1], log_probs_concat.sizes()[2]});
 			//printf("1\n");
 			// std::cout << "Rewards size: " << rewards.sizes() << " " << rewards.size(0) << std::endl;
 			//rewards_concat = rewards_concat.reshape({rewards_concat.numel(), 1});
@@ -338,9 +338,9 @@ torch::Tensor calculate_returns(std::vector<float> &rewards, std::vector<bool> &
 	{
 		float delta = 0;
 		if(i == rewards.size() - 1)
-			delta = rewards[i] + gamma * vValues[i] * (1 - dones[i]) - vValues[i];
+			delta = (rewards[i] / 200.f) + gamma * vValues[i] * (1 - dones[i]) - vValues[i];
 		else
-			delta = rewards[i] + gamma * vValues[i + 1] * (1 - dones[i]) - vValues[i];
+			delta = (rewards[i] / 200.f) + gamma * vValues[i + 1] * (1 - dones[i]) - vValues[i];
 
 		gae = delta + gamma * lambda * (1 - dones[i]) * gae;
 		// printf("FINNNN1.4\n");
@@ -455,9 +455,11 @@ auto PPO::count_of_replays() -> size_t
 
 auto PPO::update(ActorCritic &ac,
 	std::shared_ptr<torch::optim::Adam> &opt,
-	uint steps, uint epochs, uint mini_batch_size, double beta, float gamma, float lambda, c10::DeviceType device, double clip_param) -> double
+	uint steps, uint epochs, uint mini_batch_size, double beta, float gamma, float lambda, c10::DeviceType device, double &avg_training_loss, double &avg_actor_loss, double &avg_critic_loss, double clip_param) -> void
 {
 	torch::Tensor total_loss_tensor = torch::zeros({}, torch::kCUDA); // Initialize tensor to accumulate loss
+	torch::Tensor total_actor_loss_tensor = torch::zeros({}, torch::kCUDA); // Initialize tensor to accumulate loss
+	torch::Tensor total_critic_loss_tensor = torch::zeros({}, torch::kCUDA); // Initialize tensor to accumulate loss
 
 	{
 		std::deque<torch::Tensor> states, actions, values, log_probs;
@@ -466,7 +468,7 @@ auto PPO::update(ActorCritic &ac,
 		// Wait for all log probs to come to cpu
 		at::cuda::getCurrentCUDAStream().synchronize();
 		//at::cuda::stream_synchronize(at::cuda::getCurrentCUDAStream());
-
+		//printf("1\n");
 		//printf("1\n");
 		for(size_t i = 0; i < replay_buffer->capacity() / mini_batch_size /*&&  i < epochs*/; i++)
 		{
@@ -486,7 +488,7 @@ auto PPO::update(ActorCritic &ac,
 				torch::Tensor cpy_inputs = state.index({"...", torch::indexing::Slice(0, 78)});
 				//printf("UPDATING0.3\n");
 				//std::cout << state.sizes() << std::endl;
-				torch::Tensor cpy_blocks = torch::one_hot(state.index({"...", torch::indexing::Slice(78, 1167)}).to(torch::kInt64), 3).to(torch::kF32).view({state.size(0), -1});
+				torch::Tensor cpy_blocks = torch::one_hot(state.index({"...", torch::indexing::Slice(78, 1167)}).to(torch::kInt64), 3).to(torch::kF32).view({(long long)mini_batch_size, -1});
 				//printf("UPDATING0.4\n");
 				auto cpy_state_forward = torch::cat({cpy_inputs, cpy_blocks}, 1);
 				//printf("UPDATING0.5\n");
@@ -508,6 +510,7 @@ auto PPO::update(ActorCritic &ac,
 		//std::cout << mini_batch_size << std::endl;
 		//std::cout << replay_buffer->size() / mini_batch_size << std::endl;
 		//printf("2\n");
+		//Sleep(5000);
 		replay_buffer->clear();
 		
 		//printf("CHECK\n");
@@ -522,6 +525,7 @@ auto PPO::update(ActorCritic &ac,
 		{
 			for(size_t i = 0; i < replay_buffer->capacity() / mini_batch_size; i++)
 			{
+				//c10::cuda::CUDACachingAllocator::emptyCache();
 				//auto decide_time = std::chrono::high_resolution_clock::now();
 
 				torch::Tensor states_cpy = states[i];
@@ -557,7 +561,7 @@ auto PPO::update(ActorCritic &ac,
 				
 				torch::Tensor cpy_inputs = cpy_sta.index({"...", torch::indexing::Slice(0, 78)});
 				// printf("UPDATING0.3\n");
-				torch::Tensor cpy_blocks = torch::one_hot(cpy_sta.index({"...", torch::indexing::Slice(78, 1167)}).to(torch::kInt64), 3).to(torch::kF32).view({cpy_sta.size(0), -1});
+				torch::Tensor cpy_blocks = torch::one_hot(cpy_sta.index({"...", torch::indexing::Slice(78, 1167)}).to(torch::kInt64), 3).to(torch::kF32).view({(long long)mini_batch_size, -1});
 				// printf("UPDATING0.4\n");
 				cpy_sta = torch::cat({cpy_inputs, cpy_blocks}, 1);
 
@@ -573,13 +577,15 @@ auto PPO::update(ActorCritic &ac,
 				// printf("UPDATING0.1.3.2\n");
 				// std::cout << dones_cpy.sizes() << std::endl;
 				// std::cout << dones_cpy << std::endl;
-				
+				//printf("3\n");
+				//Sleep(7000);
+				//std::cout << cpy_values << std::endl;
 				auto returnsee = calculate_returns(rewards[i], dones[i], cpy_values, gamma, lambda);
 				//auto now = std::chrono::high_resolution_clock::now();
 				//std::cout << "Time to prepare: " << (float)(std::chrono::duration_cast<std::chrono::milliseconds>(now - decide_time).count()) << std::endl;
 				
 				// std::cout << returnsee.sizes() << std::endl;
-				// std::cout << returnsee << std::endl;
+				//std::cout << returnsee << std::endl;
 				// auto decide_time = std::chrono::high_resolution_clock::now();
 
 				torch::Tensor cpy_ret = returnsee; // normalize_rewards(returnsee);
@@ -603,6 +609,8 @@ auto PPO::update(ActorCritic &ac,
 
 				// printf("UPDATING1.1\n");
 				auto action = ac->actor_forward(cpy_sta);
+				//printf("4\n");
+				//Sleep(7000);
 				// printf("33.0\n");
 				// std::cout << action.sizes() << std::endl;
 				// std::cout << cpy_act.sizes() << std::endl;
@@ -625,8 +633,11 @@ auto PPO::update(ActorCritic &ac,
 				// printf("UPDATING1.5.1\n");
 				auto surr2 = torch::clamp(ratio, 1. - clip_param, 1. + clip_param) * cpy_adv;
 				// printf("UPDATING1.6\n");
-
+				//printf("4.9\n");
+				//Sleep(7000);
 				auto val = ac->critic_forward(cpy_sta);
+				//printf("5\n");
+				//Sleep(7000);
 				auto actor_loss = -torch::min(surr1, surr2).mean();
 				// printf("UPDATING1.7\n");
 				auto critic_loss = torch::nn::functional::mse_loss(val, cpy_ret); //(cpy_ret - val).pow(2).mean();
@@ -653,7 +664,11 @@ auto PPO::update(ActorCritic &ac,
 
 				// printf("UPDATING1.12\n");
 				// total_loss += loss.item<double>();
+				total_actor_loss_tensor += actor_loss;
+				total_critic_loss_tensor += critic_loss;
 				total_loss_tensor += loss;
+				//printf("Pre next\n");
+				//Sleep(5000);
 
 				// printf("Chillin\n");
 				// Sleep(10000);
@@ -673,12 +688,14 @@ auto PPO::update(ActorCritic &ac,
 	double avg_loss = 0;
 	//auto decide_time = std::chrono::high_resolution_clock::now();
 
-	avg_loss = total_loss_tensor.item<double>() / (epochs * replay_buffer->capacity() / mini_batch_size);
+	avg_training_loss = total_loss_tensor.item<double>() / (epochs * replay_buffer->capacity() / mini_batch_size);
+	avg_actor_loss = total_actor_loss_tensor.item<double>() / (epochs * replay_buffer->capacity() / mini_batch_size);
+	avg_critic_loss = total_critic_loss_tensor.item<double>() / (epochs * replay_buffer->capacity() / mini_batch_size);
 	//auto now = std::chrono::high_resolution_clock::now();
 	//std::cout << "Time to calculate loss: " << (float)(std::chrono::duration_cast<std::chrono::milliseconds>(now - decide_time).count()) << std::endl;
 	//std::cout << "Average training Loss: " << avg_loss << std::endl;
 
 	//c10::cuda::CUDACachingAllocator::emptyCache();
 
-	return avg_loss;
+	return;
 }
diff --git a/src/engine/server/server.cpp b/src/engine/server/server.cpp
index fbb07ea..b701cc0 100644
--- a/src/engine/server/server.cpp
+++ b/src/engine/server/server.cpp
@@ -2991,7 +2991,8 @@ int CServer::Run()
 		dbg_msg("server", "+-------------------------+");
 	}
 
-	int update_tick = 1000;
+	int skip_tick = 4;
+	int update_tick = 1000 * skip_tick;
 
 	std::random_device rd;
 	std::mt19937 gen(rd());
@@ -3047,6 +3048,36 @@ int CServer::Run()
 	AStar astar(pathfinding_grid, vFinishPoses);
 	printf("Pathfinder created.\n");
 
+	printf("Creating train directory with folders...\n");
+
+	static auto dir_name = to_string(std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::system_clock::now().time_since_epoch()).count());
+
+	if(fs_makedir("train") != 0)
+	{
+		cout << "Can't make train directory" << endl;
+		exit(1);
+	}
+
+	if(fs_makedir((string("train\\") + dir_name).c_str()) != 0)
+	{
+		cout << "Can't make dir for this learning directory" << endl;
+		exit(1);
+	}
+
+	if(fs_makedir(string("train\\" + dir_name + "\\models").c_str()) != 0)
+	{
+		cout << "Can't make models directory" << endl;
+		exit(1);
+	}
+
+	if(fs_makedir(string("train\\" + dir_name + "\\demos").c_str()) != 0)
+	{
+		cout << "Can't make demos directory" << endl;
+		exit(1);
+	}
+
+	printf("Train directory with folders created.\n");
+
 	//std::pair<int, int> start = {4, 4};
 	//std::pair<int, int> goal = {4, 62};
 
@@ -3134,44 +3165,18 @@ int CServer::Run()
 			vBotBestDistance[i] = {astar.distanceToGoal(spawn_point_pos), 0};
 			// auto tr = std::thread(RunNNForward, &model_manager, i, &vEvents, &vFinishEvents, &vInputs, &vOutputs);
 			// tr.detach();
+			//char aFilename[IO_MAX_PATH_LENGTH];
+			//str_format(aFilename, sizeof(aFilename), "%s_%s_%d_%llu.demo", m_aCurrentMap, name.c_str(), m_NetServer.Address().port, time_get());
+			//string path_demo = "train/" + dir_name + "/demos/" + aFilename;
+			//int ret = m_aDemoRecorder[i].Start(Storage(), m_pConsole, path_demo.c_str(), GameServer()->NetVersion(), m_aCurrentMap, &m_aCurrentMapSha256[MAP_TYPE_SIX], m_aCurrentMapCrc[MAP_TYPE_SIX], "server", m_aCurrentMapSize[MAP_TYPE_SIX], m_apCurrentMapData[MAP_TYPE_SIX]);
 		}
 	}
 	printf("Bots added\n");
 
 	printf("Initializing neural model...\n");
-	ModelManager model_manager(count_bots * update_tick, count_bots);
+	ModelManager model_manager(count_bots * update_tick / skip_tick, count_bots);
 	printf("Model initialized.\n");
 
-	printf("Creating train directory with folders...\n");
-
-	static auto dir_name = to_string(std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::system_clock::now().time_since_epoch()).count());
-	
-	if(fs_makedir("train") != 0)
-	{
-		cout << "Can't make train directory" << endl;
-		exit(1);
-	}
-	
-	if(fs_makedir((string("train\\") + dir_name).c_str()) != 0)
-	{
-		cout << "Can't make dir for this learning directory" << endl;
-		exit(1);
-	}
-
-	if(fs_makedir(string("train\\" + dir_name + "\\models").c_str()) != 0)
-	{
-		cout << "Can't make models directory" << endl;
-		exit(1);
-	}
-
-	if(fs_makedir(string("train\\" + dir_name + "\\demos").c_str()) != 0)
-	{
-		cout << "Can't make demos directory" << endl;
-		exit(1);
-	}
-
-	printf("Train directory with folders created.\n");
-
 	printf("Creating data.csv file for statistics...\n");
 	std::ofstream logger;
 	{
@@ -3183,7 +3188,7 @@ int CServer::Run()
 		std::cout << update_tick << std::endl;*/
 		sprintf_s(aFilename, sizeof(aFilename), "lr%.1embs%lldppoe%lldbots%drpb%d.csv", model_manager.GetLearningRate(), model_manager.GetMiniBatchSize(), model_manager.GetCountPPOEpochs(), count_bots, update_tick);
 		logger.open("train\\" + dir_name + "\\" + aFilename);
-		logger << "Step,Average reward,TPS,Dies,Average distance,Training loss,Learning rate,Time since start,Time to decide,Time to tick,Time rest,Time pre forward,Time forward,Time normal,Time to cpu,Time process last" << endl;
+		logger << "Step,Average reward,TPS,Dies,Average distance,Training loss,Actor loss,Critic loss,Learning rate,Time since start,Time to decide,Time to tick,Time rest,Time pre forward,Time forward,Time normal,Time to cpu,Time process last" << endl;
 	}
 
 	printf("data.csv file created and initialized.\n");
@@ -3192,7 +3197,7 @@ int CServer::Run()
 		char aFilename[IO_MAX_PATH_LENGTH];
 		str_format(aFilename, sizeof(aFilename), "%s_%d_%llu.demo", m_aCurrentMap, m_NetServer.Address().port, time_get());
 		path_demo = "train/" + dir_name + "/demos/" + aFilename;
-		int ret = m_aDemoRecorder[MAX_CLIENTS].Start(Storage(), m_pConsole, path_demo.c_str(), GameServer()->NetVersion(), m_aCurrentMap, &m_aCurrentMapSha256[MAP_TYPE_SIX], m_aCurrentMapCrc[MAP_TYPE_SIX], "server", m_aCurrentMapSize[MAP_TYPE_SIX], m_apCurrentMapData[MAP_TYPE_SIX]);
+		int ret = m_aDemoRecorder[0].Start(Storage(), m_pConsole, path_demo.c_str(), GameServer()->NetVersion(), m_aCurrentMap, &m_aCurrentMapSha256[MAP_TYPE_SIX], m_aCurrentMapCrc[MAP_TYPE_SIX], "server", m_aCurrentMapSize[MAP_TYPE_SIX], m_apCurrentMapData[MAP_TYPE_SIX]);
 	}
 	
 	// start game
@@ -3351,6 +3356,7 @@ int CServer::Run()
 				static double cumulative_time_process_last = 0;
 
 				// Handle bots
+				if(m_CurrentGameTick % skip_tick == 0)
 				{
 					auto gamelayer = gamecontext->Layers()->GameLayer();
 					const CTile *pTiles = static_cast<CTile *>(Kernel()->RequestInterface<IMap>()->GetData(gamelayer->m_Data));
@@ -3363,7 +3369,7 @@ int CServer::Run()
 
 					// Rewards
 					static float checkpoint_reward = 100.f / 32.f;
-					static float die_reward = -500.f / 32.f; // -100.f / 32.f
+					static float die_reward = -250.f / 32.f; // -500.f / 32.f
 					static float finish_reward = 1000.f / 32.f;
 					static float step_reward = -0.01f;
 
@@ -3418,7 +3424,19 @@ int CServer::Run()
 							}
 							else if(finished)
 							{
+								//m_aDemoRecorder[bot->GetCID()].Stop();
+
+								/*char aNewFilename[IO_MAX_PATH_LENGTH];
+								str_format(aNewFilename, sizeof(aNewFilename), "average_dist_%.2f_rew_%.2f_%s_%llu.demo", avg_dist, avg_reward, m_aCurrentMap, time_get_impl());
+								path_demo = "train/" + dir_name + "/demos/" + aNewFilename;
+								Storage()->RenameFile(m_aDemoRecorder[bot->GetCID()].GetCurrentFilename(), path_demo.c_str(), IStorage::TYPE_ABSOLUTE);*/
+
 								bot->KillCharacter();
+
+								/*char aFilename[IO_MAX_PATH_LENGTH];
+								str_format(aFilename, sizeof(aFilename), "%s_%s_%d_%llu.demo", m_aCurrentMap, name.c_str(), m_NetServer.Address().port, time_get());
+								string path_demo = "train/" + dir_name + "/demos/" + aFilename;
+								int ret = m_aDemoRecorder[i].Start(Storage(), m_pConsole, path_demo.c_str(), GameServer()->NetVersion(), m_aCurrentMap, &m_aCurrentMapSha256[MAP_TYPE_SIX], m_aCurrentMapCrc[MAP_TYPE_SIX], "server", m_aCurrentMapSize[MAP_TYPE_SIX], m_apCurrentMapData[MAP_TYPE_SIX]);*/
 							}
 							
 							//decide_time = time_get_impl();
@@ -3500,7 +3518,9 @@ int CServer::Run()
 
 								}
 
-								auto long_stay_penalty = -0.0003f * (m_CurrentGameTick - vBotBestDistance[i].second);
+								auto tick_diff = m_CurrentGameTick - vBotBestDistance[i].second;
+
+								auto long_stay_penalty = -0.0003f * tick_diff;
 
 								//cout << reward << endl;
 
@@ -3569,7 +3589,7 @@ int CServer::Run()
 						float avg_dist = ((float)moved_distance / (float)(dies + count_bots));
 						rewards.clear();
 
-						if(m_CurrentGameTick % 20000 == 0)
+						if(m_CurrentGameTick % (20000 * skip_tick) == 0 && model_manager.IsTraining())
 						{
 							printf("UPDATING\n");
 							std::vector<float> vAverageDistancePerSpawn(vSpawnCumulativeReward.size());
@@ -3628,9 +3648,9 @@ int CServer::Run()
 							}*/
 						}
 
-						auto demo_recorder = &m_aDemoRecorder[MAX_CLIENTS];
+						auto demo_recorder = &m_aDemoRecorder[0];
 
-						if(demo_recorder->IsRecording())
+						if(demo_recorder->IsRecording() && model_manager.IsTraining())
 						{
 							demo_recorder->Stop();
 							char aNewFilename[IO_MAX_PATH_LENGTH];
@@ -3639,12 +3659,12 @@ int CServer::Run()
 							Storage()->RenameFile(demo_recorder->GetCurrentFilename(), path_demo.c_str(), IStorage::TYPE_ABSOLUTE);
 						}
 						//printf("111\n");
-						if(m_CurrentGameTick % 20000 == 0)
+						if(m_CurrentGameTick % (20000 * skip_tick) == 0 && model_manager.IsTraining())
 						{
 							model_manager.Save("train\\" + dir_name + "\\models\\last");
 						}
 						//printf("222\n");
-						if(avg_dist > best_average)
+						if(avg_dist > best_average && model_manager.IsTraining())
 						{
 							best_average = avg_dist;
 							model_manager.Save("train\\" + dir_name + "\\models\\best"); // best" + to_string(average)
@@ -3670,15 +3690,19 @@ int CServer::Run()
 						//printf("ret: %i\n", ret);
 						//printf("start_u\n");
 						//int64_t update_time = time_get_impl();
-						double avg_loss = 0;
-						model_manager.Update(avg_dist, avg_loss);
+						double avg_training_loss = 0;
+						double avg_actor_loss = 0;
+						double avg_critic_loss = 0;
+						model_manager.Update(avg_dist, avg_training_loss, avg_actor_loss, avg_critic_loss);
 						//cout << "Time update: " << (float)(time_get_impl() - decide_time) / (float)time_freq() << endl;
 						logger << m_CurrentGameTick / update_tick
 						       << "," << avg_reward
 						       << "," << ticks_per_second
 						       << "," << dies
 						       << "," << avg_dist
-						       << "," << avg_loss
+						       << "," << avg_training_loss
+						       << "," << avg_actor_loss
+						       << "," << avg_critic_loss
 						       << "," << model_manager.GetCurrentLearningRate()
 						       << "," << (float)time_get_impl() / (float)time_freq()
 						       << "," << (cumulative_time_to_decide / (float)update_tick)
@@ -3690,7 +3714,7 @@ int CServer::Run()
 						       << "," << (cumulative_time_to_cpu / (float)update_tick)
 						       << "," << (cumulative_time_process_last / (float)update_tick)
 							<< endl;
-						cout << "Avg. reward: " << avg_reward << " TPS: " << ticks_per_second << " Avg. Training Loss: " << avg_loss
+						cout << "Avg. reward: " << avg_reward << " TPS: " << ticks_per_second << " Avg. Training Loss: " << avg_training_loss
 						     << " Dies: " << dies << " Avg. distance: " << avg_dist << endl;
 						dies \
 							= moved_distance \
@@ -3707,12 +3731,12 @@ int CServer::Run()
 							= 0;
 						//cout << "Time to update: " << (float)(time_get_impl() - update_time) / (float)time_freq() << endl;
 						//printf("end\n");
-						if(m_CurrentGameTick % 20000 == 0)
+						if(m_CurrentGameTick % 20000 == 0 && model_manager.IsTraining())
 						{
 							char aFilename[IO_MAX_PATH_LENGTH];
 							str_format(aFilename, sizeof(aFilename), "%s_%d_%llu.demo", m_aCurrentMap, m_NetServer.Address().port, time_get());
 							path_demo = "train\\" + dir_name + "\\demos\\" + aFilename;
-							//int ret = demo_recorder->Start(Storage(), m_pConsole, path_demo.c_str(), GameServer()->NetVersion(), m_aCurrentMap, &m_aCurrentMapSha256[MAP_TYPE_SIX], m_aCurrentMapCrc[MAP_TYPE_SIX], "server", m_aCurrentMapSize[MAP_TYPE_SIX], m_apCurrentMapData[MAP_TYPE_SIX]);
+							int ret = demo_recorder->Start(Storage(), m_pConsole, path_demo.c_str(), GameServer()->NetVersion(), m_aCurrentMap, &m_aCurrentMapSha256[MAP_TYPE_SIX], m_aCurrentMapCrc[MAP_TYPE_SIX], "server", m_aCurrentMapSize[MAP_TYPE_SIX], m_apCurrentMapData[MAP_TYPE_SIX]);
 						}
 						decide_time = std::chrono::high_resolution_clock::now();
 					}
@@ -3731,7 +3755,7 @@ int CServer::Run()
 						auto bot_character_core = bot_character->Core();
 						// auto bot_2_character = gamecontext->GetPlayerChar(bot_2->GetCID());
 						//printf("HAHHA3.3\n");
-						if(m_CurrentGameTick != 0 && m_CurrentGameTick % update_tick == 0)
+						if(m_CurrentGameTick != 0 && m_CurrentGameTick % update_tick == 0 && model_manager.IsTraining())
 						{
 							// Add to cumulative spawn distance vector
 							//int iOldSpawnPoint = vBotsSpawnPos[i];
@@ -3990,28 +4014,29 @@ int CServer::Run()
 					//	//printf(buf);
 					//}
 					//printf("HAHHA4\n");
+					started = true;
+					auto now = std::chrono::high_resolution_clock::now();
+					cumulative_time_rest += std::chrono::duration_cast<std::chrono::duration<float>>(now - decide_time).count() * 1000.f;
+					// cout << "Time rest: " << std::chrono::duration_cast<std::chrono::duration<float>>(now - decide_time).count() << endl;
+					double time_pre_forward = 0;
+					double time_forward = 0;
+					double time_normal = 0;
+					double time_to_cpu = 0;
+					double time_process_last = 0;
+					decide_time = std::chrono::high_resolution_clock::now();
+					vOutputs = model_manager.Decide(vInputInputs, vInputBlocks, time_pre_forward, time_forward, time_normal, time_to_cpu, time_process_last);
+					now = std::chrono::high_resolution_clock::now();
+					cumulative_time_to_decide += std::chrono::duration_cast<std::chrono::duration<float>>(now - decide_time).count() * 1000.f;
+					cumulative_time_pre_forward += time_pre_forward;
+					cumulative_time_forward += time_forward;
+					cumulative_time_normal += time_normal;
+					cumulative_time_to_cpu += time_to_cpu;
+					cumulative_time_process_last += time_process_last;
+					// cout << "Time to decide: " << std::chrono::duration_cast<std::chrono::duration<float>>(now - decide_time).count() << endl;
+					decide_time = std::chrono::high_resolution_clock::now();
 				}
 				//printf("13\n");
-				started = true;
-				auto now = std::chrono::high_resolution_clock::now();
-				cumulative_time_rest += std::chrono::duration_cast<std::chrono::duration<float>>(now - decide_time).count() * 1000.f;
-				//cout << "Time rest: " << std::chrono::duration_cast<std::chrono::duration<float>>(now - decide_time).count() << endl;
-				double time_pre_forward = 0;
-				double time_forward = 0;
-				double time_normal = 0;
-				double time_to_cpu = 0;
-				double time_process_last = 0;
-				decide_time = std::chrono::high_resolution_clock::now();
-				vOutputs = model_manager.Decide(vInputInputs, vInputBlocks, time_pre_forward, time_forward, time_normal, time_to_cpu, time_process_last);
-				now = std::chrono::high_resolution_clock::now();
-				cumulative_time_to_decide += std::chrono::duration_cast<std::chrono::duration<float>>(now - decide_time).count() * 1000.f;
-				cumulative_time_pre_forward += time_pre_forward;
-				cumulative_time_forward += time_forward;
-				cumulative_time_normal += time_normal;
-				cumulative_time_to_cpu += time_to_cpu;
-				cumulative_time_process_last += time_process_last;
-				//cout << "Time to decide: " << std::chrono::duration_cast<std::chrono::duration<float>>(now - decide_time).count() << endl;
-				decide_time = std::chrono::high_resolution_clock::now();
+				
 				//printf("KEK\n");
 				//printf("14\n");
 				for(int c = 0; c < MAX_CLIENTS; c++)
@@ -4089,7 +4114,15 @@ int CServer::Run()
 						model_angle = returned_model.angle * 299.f;
 						model_direction = returned_model.direction;
 						model_hook = returned_model.hook;
-						model_jump = returned_model.jump;
+						
+						if(m_CurrentGameTick % skip_tick != 1)
+						{
+							model_jump = 0;
+						}
+						else
+						{
+							model_jump = returned_model.jump;
+						}
 						// printf("HAHHA33.3\n");
 						// printf("HAHHA34\n");
 						//  static ModelManager model_manager;
@@ -4265,7 +4298,7 @@ int CServer::Run()
 					break;
 				}
 				//printf("21\n");
-				now = std::chrono::high_resolution_clock::now();
+				auto now = std::chrono::high_resolution_clock::now();
 				cumulative_time_to_tick += std::chrono::duration_cast<std::chrono::duration<float>>(now - decide_time).count() * 1000.f;
 				//cout << "Time to tick: " << (float)(now - decide_time) / (float)time_freq() << endl;
 				decide_time = std::chrono::high_resolution_clock::now();
@@ -4285,7 +4318,7 @@ int CServer::Run()
 			// snap game
 			if(NewTicks)
 			{
-				if((Config()->m_SvHighBandwidth || (m_CurrentGameTick % 2) == 0) && m_aDemoRecorder[MAX_CLIENTS].IsRecording())
+				if((Config()->m_SvHighBandwidth || (m_CurrentGameTick % 2) == 0) && m_aDemoRecorder[0].IsRecording())
 					DoSnapshot();
 
 				UpdateClientRconCommands();
diff --git a/src/game/server/player.cpp b/src/game/server/player.cpp
index 7678d27..e73a7a0 100644
--- a/src/game/server/player.cpp
+++ b/src/game/server/player.cpp
@@ -247,8 +247,8 @@ void CPlayer::Tick()
 				m_pCharacter = 0;
 			}
 		}
-		else if(m_Spawning && !m_WeakHookSpawn)
-			TryRespawn();
+		/*else if(m_Spawning && !m_WeakHookSpawn)
+			TryRespawn();*/
 	}
 	else
 	{
@@ -307,8 +307,8 @@ void CPlayer::PostPostTick()
 		if(!Server()->ClientIngame(m_ClientID))
 			return;
 
-	if(!GameServer()->m_World.m_Paused && !m_pCharacter && m_Spawning && m_WeakHookSpawn)
-		TryRespawn();
+	/*if(!GameServer()->m_World.m_Paused && !m_pCharacter && m_Spawning && m_WeakHookSpawn)
+		TryRespawn();*/
 }
 
 void CPlayer::Snap(int SnappingClient)