Merge pull request BOINC#4871 from BOINC/dpa_niu_prefs

Add not-in-use computing prefs
AenBleidd · Sep 21, 2022 · 373bcff · 373bcff
2 parents 7cb3fac + f627b09
commit 373bcff
Show file tree

Hide file tree

Showing 21 changed files with 1,022 additions and 661 deletions.
diff --git a/client/app.cpp b/client/app.cpp
@@ -661,9 +661,9 @@ int ACTIVE_TASK::get_free_slot(RESULT* rp) {
 
         // paranoia - don't allow unbounded slots
         //
-        if (j > gstate.ncpus*100) {
+        if (j > gstate.n_usable_cpus*100) {
             msg_printf(rp->project, MSG_INTERNAL_ERROR,
-                "exceeded limit of %d slot directories", gstate.ncpus*100
+                "exceeded limit of %d slot directories", gstate.n_usable_cpus*100
             );
             return ERR_NULL;
         }
@@ -1210,16 +1210,14 @@ void* throttler(void*) {
 
     while (1) {
         client_mutex.lock();
-        if (gstate.tasks_suspended
-            || gstate.global_prefs.cpu_usage_limit > 99
-            || gstate.global_prefs.cpu_usage_limit < 0.005
-            ) {
+        double limit = gstate.current_cpu_usage_limit();
+        if (gstate.tasks_suspended || limit == 0) {
             client_mutex.unlock();
 //            ::Sleep((int)(1000*10));  // for Win debugging
             boinc_sleep(10);
             continue;
         }
-        double on, off, on_frac = gstate.global_prefs.cpu_usage_limit / 100;
+        double on, off, on_frac = limit / 100;
 #if 0
 // sub-second CPU throttling
 // DOESN'T WORK BECAUSE OF 1-SEC API POLL

diff --git a/client/app_control.cpp b/client/app_control.cpp
@@ -1546,7 +1546,7 @@ void ACTIVE_TASK_SET::get_msgs() {
     last_time = gstate.now;
 
     double et_diff = delta_t;
-    double et_diff_throttle = delta_t * gstate.global_prefs.cpu_usage_limit/100;
+    double et_diff_throttle = delta_t * gstate.current_cpu_usage_limit()/100;
 
     for (i=0; i<active_tasks.size(); i++) {
         atp = active_tasks[i];

diff --git a/client/client_state.cpp b/client/client_state.cpp
@@ -156,7 +156,7 @@ CLIENT_STATE::CLIENT_STATE()
     redirect_io = false;
     disable_graphics = false;
     cant_write_state_file = false;
-    ncpus = 1;
+    n_usable_cpus = 1;
     benchmarks_running = false;
     client_disk_usage = 0.0;
     total_disk_usage = 0.0;
@@ -202,8 +202,8 @@ void CLIENT_STATE::show_host_info() {
         "Processor: %d %s %s",
         host_info.p_ncpus, host_info.p_vendor, host_info.p_model
     );
-    if (ncpus != host_info.p_ncpus) {
-        msg_printf(NULL, MSG_INFO, "Using %d CPUs", ncpus);
+    if (n_usable_cpus != host_info.p_ncpus) {
+        msg_printf(NULL, MSG_INFO, "Using %d CPUs", n_usable_cpus);
     }
 #if 0
     if (host_info.m_cache > 0) {
@@ -626,7 +626,7 @@ int CLIENT_STATE::init() {
     //
     host_info.p_vm_extensions_disabled = false;
 
-    set_ncpus();
+    set_n_usable_cpus();
     show_host_info();
 
     // this follows parse_state_file() because that's where we read project names
@@ -992,6 +992,8 @@ bool CLIENT_STATE::poll_slow_events() {
 #endif
 
     if (user_active != old_user_active) {
+        set_n_usable_cpus();
+            // if niu_max_ncpus_pct pref is set, # usable CPUs may change
         request_schedule_cpus(user_active?"Not idle":"Idle");
     }
 

diff --git a/client/client_state.h b/client/client_state.h
@@ -18,7 +18,6 @@
 #ifndef BOINC_CLIENT_STATE_H
 #define BOINC_CLIENT_STATE_H
 
-#define NEW_CPU_THROTTLE
 // do CPU throttling using a separate thread.
 // This makes it possible to throttle faster than the client's 1-sec poll period
 // NOTE: we can't actually do this because the runtime system's
@@ -35,9 +34,7 @@ using std::vector;
 
 #include "coproc.h"
 #include "util.h"
-#ifdef NEW_CPU_THROTTLE
 #include "thread.h"
-#endif
 
 #include "acct_mgr.h"
 #include "acct_setup.h"
@@ -345,7 +342,7 @@ struct CLIENT_STATE {
         // - an app fails to start (CS::schedule_cpus())
         // - any project op is done via RPC (suspend/resume)
         // - any result op is done via RPC (suspend/resume)
-    void set_ncpus();
+    void set_n_usable_cpus();
 
 // --------------- cs_account.cpp:
     int add_project(
@@ -363,12 +360,13 @@ struct CLIENT_STATE {
     double get_fraction_done(RESULT* result);
     int input_files_available(RESULT*, bool, FILE_INFO** f=0);
     ACTIVE_TASK* lookup_active_task_by_result(RESULT*);
-    int ncpus;
-        // Act like there are this many CPUs.
+    int n_usable_cpus;
+        // number of usable CPUs
         // By default this is the # of physical CPUs,
         // but it can be changed in two ways:
-        // - type <ncpus>N</ncpus> in the config file
-        // - type the max_ncpus_pct pref
+        // - <ncpus>N</ncpus> in cc_config.xml
+        //      (for debugging; can be > # physical CPUs)
+        // - the max_ncpus_pct and niu_max_ncpus_pct prefs
 
     int latest_version(APP*, char*);
     int app_finished(ACTIVE_TASK&);
@@ -426,6 +424,7 @@ struct CLIENT_STATE {
         const char* fname = GLOBAL_PREFS_FILE_NAME,
         const char* override_fname = GLOBAL_PREFS_OVERRIDE_FILE
     );
+    void print_global_prefs();
     int save_global_prefs(const char* prefs, char* url, char* sched);
     double available_ram();
     double max_available_ram();
@@ -521,6 +520,24 @@ struct CLIENT_STATE {
 #endif
 
     KEYWORDS keywords;
+
+    double current_cpu_usage_limit() {
+        double x = global_prefs.cpu_usage_limit;
+        if (!user_active && global_prefs.niu_cpu_usage_limit>=0) {
+            x = global_prefs.niu_cpu_usage_limit;
+        }
+        if (x < 0.005 || x > 99.99) {
+            x = 100;
+        }
+        return x;
+    }
+    double current_suspend_cpu_usage() {
+        double x = global_prefs.suspend_cpu_usage;
+        if (!user_active && global_prefs.niu_suspend_cpu_usage>=0) {
+            x = global_prefs.niu_suspend_cpu_usage;
+        }
+        return x;
+    }
 };
 
 extern CLIENT_STATE gstate;
@@ -535,10 +552,8 @@ extern double calculate_exponential_backoff(
     int n, double MIN, double MAX
 );
 
-#ifdef NEW_CPU_THROTTLE
 extern THREAD_LOCK client_mutex;
 extern THREAD throttle_thread;
-#endif
 
 //////// TIME-RELATED CONSTANTS ////////////
 

diff --git a/client/cpu_sched.cpp b/client/cpu_sched.cpp
@@ -95,7 +95,7 @@ struct PROC_RESOURCES {
     COPROCS pr_coprocs;
 
     void init() {
-        ncpus = gstate.ncpus;
+        ncpus = gstate.n_usable_cpus;
         ncpus_used_st = 0;
         ncpus_used_mt = 0;
         pr_coprocs.clone(coprocs, false);
@@ -567,7 +567,7 @@ void CLIENT_STATE::reset_rec_accounting() {
 //
 static void update_rec() {
     double f = gstate.host_info.p_fpops;
-    double on_frac = gstate.global_prefs.cpu_usage_limit / 100;
+    double on_frac = gstate.current_cpu_usage_limit() / 100;
 
     for (unsigned int i=0; i<gstate.projects.size(); i++) {
         PROJECT* p = gstate.projects[i];
@@ -628,7 +628,7 @@ double total_peak_flops() {
     static double tpf;
     if (first) {
         first = false;
-        tpf = gstate.host_info.p_fpops * gstate.ncpus;
+        tpf = gstate.host_info.p_fpops * gstate.n_usable_cpus;
         for (int i=1; i<coprocs.n_rsc; i++) {
             COPROC& cp = coprocs.coprocs[i];
             tpf += rsc_work_fetch[i].relative_speed * gstate.host_info.p_fpops * cp.count;
@@ -1013,7 +1013,7 @@ static void promote_multi_thread_jobs(vector<RESULT*>& runnable_jobs) {
     vector<RESULT*>::iterator cur = runnable_jobs.begin();
     while(1) {
         if (cur == runnable_jobs.end()) break;
-        if (cpus_used >= gstate.ncpus) break;
+        if (cpus_used >= gstate.n_usable_cpus) break;
         RESULT* rp = *cur;
         if (rp->rr_sim_misses_deadline) break;
         double nc = rp->avp->avg_ncpus;
@@ -1251,9 +1251,9 @@ bool CLIENT_STATE::enforce_run_list(vector<RESULT*>& run_list) {
         // don't allow additional CPU jobs;
         // allow coproc jobs if the resulting CPU load is at most ncpus+1
         //
-        if (ncpus_used >= ncpus) {
+        if (ncpus_used >= n_usable_cpus) {
             if (rp->uses_coprocs()) {
-                if (ncpus_used + rp->avp->avg_ncpus > ncpus+1) {
+                if (ncpus_used + rp->avp->avg_ncpus > n_usable_cpus+1) {
                     if (log_flags.cpu_sched_debug) {
                         msg_printf(rp->project, MSG_INFO,
                             "[cpu_sched_debug] skipping GPU job %s; CPU committed",
@@ -1266,7 +1266,7 @@ bool CLIENT_STATE::enforce_run_list(vector<RESULT*>& run_list) {
                 if (log_flags.cpu_sched_debug) {
                     msg_printf(rp->project, MSG_INFO,
                         "[cpu_sched_debug] all CPUs used (%.2f >= %d), skipping %s",
-                        ncpus_used, ncpus,
+                        ncpus_used, n_usable_cpus,
                         rp->name
                     );
                 }
@@ -1350,11 +1350,11 @@ bool CLIENT_STATE::enforce_run_list(vector<RESULT*>& run_list) {
         }
     }
 
-    if (log_flags.cpu_sched_debug && ncpus_used < ncpus) {
+    if (log_flags.cpu_sched_debug && ncpus_used < n_usable_cpus) {
         msg_printf(0, MSG_INFO, "[cpu_sched_debug] using %.2f out of %d CPUs",
-            ncpus_used, ncpus
+            ncpus_used, n_usable_cpus
         );
-        if (ncpus_used < ncpus) {
+        if (ncpus_used < n_usable_cpus) {
             request_work_fetch("CPUs idle");
         }
     }
@@ -1622,12 +1622,14 @@ ACTIVE_TASK* CLIENT_STATE::get_task(RESULT* rp) {
     return atp;
 }
 
-// called at startup (after get_host_info())
-// and when general prefs have been parsed.
-// NOTE: GSTATE.NCPUS MUST BE 1 OR MORE; WE DIVIDE BY IT IN A COUPLE OF PLACES
+// called:
+// - at startup (after get_host_info())
+// - when general prefs have been parsed
+// - when user_active changes
+// NOTE: n_usable_cpus MUST BE 1 OR MORE; WE DIVIDE BY IT IN A COUPLE OF PLACES
 //
-void CLIENT_STATE::set_ncpus() {
-    int ncpus_old = ncpus;
+void CLIENT_STATE::set_n_usable_cpus() {
+    int ncpus_old = n_usable_cpus;
 
     // config file can say to act like host has N CPUs
     //
@@ -1638,25 +1640,29 @@ void CLIENT_STATE::set_ncpus() {
         first = false;
     }
     if (cc_config.ncpus>0) {
-        ncpus = cc_config.ncpus;
-        host_info.p_ncpus = ncpus;  // use this in scheduler requests
+        n_usable_cpus = cc_config.ncpus;
+        host_info.p_ncpus = n_usable_cpus;  // use this in scheduler requests
     } else {
         host_info.p_ncpus = original_p_ncpus;
-        ncpus = host_info.p_ncpus;
+        n_usable_cpus = host_info.p_ncpus;
     }
-    if (ncpus <= 0) {
-        ncpus = 1;      // shouldn't happen
+
+    double p = global_prefs.max_ncpus_pct;
+    if (!user_active && global_prefs.niu_max_ncpus_pct>=0) {
+        p = global_prefs.niu_max_ncpus_pct;
+    }
+    if (p) {
+        n_usable_cpus = (int)((n_usable_cpus * p)/100);
     }
 
-    if (global_prefs.max_ncpus_pct) {
-        ncpus = (int)((ncpus * global_prefs.max_ncpus_pct)/100);
-        if (ncpus == 0) ncpus = 1;
+    if (n_usable_cpus <= 0) {
+        n_usable_cpus = 1;
     }
 
-    if (initialized && ncpus != ncpus_old) {
+    if (initialized && n_usable_cpus != ncpus_old) {
         msg_printf(0, MSG_INFO,
             "Number of usable CPUs has changed from %d to %d.",
-            ncpus_old, ncpus
+            ncpus_old, n_usable_cpus
         );
         request_schedule_cpus("Number of usable CPUs has changed");
         request_work_fetch("Number of usable CPUs has changed");

diff --git a/client/cs_benchmark.cpp b/client/cs_benchmark.cpp
@@ -262,9 +262,9 @@ void CLIENT_STATE::start_cpu_benchmarks(bool force) {
     cpu_benchmarks_start = dtime();
 
     benchmark_descs.clear();
-    benchmark_descs.resize(ncpus);
+    benchmark_descs.resize(n_usable_cpus);
 
-    bm_ncpus = ncpus;
+    bm_ncpus = n_usable_cpus;
     benchmarks_running = true;
 
     for (i=0; i<bm_ncpus; i++) {