Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CP-33044 define attach/detach IDL calls for gpumon #4844

Merged
merged 1 commit into from
Nov 16, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions ocaml/xapi-idl/gpumon/dune
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@

(executable
(name gpumon_cli)
(public_name gpumon-cli)
(package xapi-idl)
(modules gpumon_cli)
(libraries
cmdliner
Expand Down
21 changes: 21 additions & 0 deletions ocaml/xapi-idl/gpumon/gpumon_interface.ml
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,10 @@ module RPC_API (R : RPC) = struct
module Nvidia = struct
(** common API call parameters *)

let unit_p = param Rpc.Types.unit

let bool_p = param Rpc.Types.bool

let debug_info_p =
param ~description:["Uninterpreted string used for debugging."] debug_info

Expand Down Expand Up @@ -217,5 +221,22 @@ module RPC_API (R : RPC) = struct
@-> nvidia_vgpu_metadata_list_p
@-> returning compatibility_p gpu_err
)

let nvml_attach =
declare "nvml_attach"
[
"Attach nVidia cards to Gpumon for metrics and compatibility checking."
]
(debug_info_p @-> returning unit_p gpu_err)

let nvml_detach =
declare "nvml_detach"
["Detach nVidia cards from Gpumon"]
(debug_info_p @-> returning unit_p gpu_err)

let nvml_is_attached =
declare "nvml_is_attached"
["Return true if nVidia cards are currently attached."]
(debug_info_p @-> returning bool_p gpu_err)
end
end
8 changes: 7 additions & 1 deletion ocaml/xcp-rrdd/lib/plugin/reporter.ml
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,11 @@ let wait_until_next_reading (module D : Debug.DEBUG) ~neg_shift ~uid ~protocol
)

let loop (module D : Debug.DEBUG) ~reporter ~report ~cleanup =
let log_backtrace e =
let trace = Printexc.(get_raw_backtrace () |> raw_backtrace_to_string) in
D.error "%s: %s" (Printexc.to_string e) trace
in

let running = ref true in
( match reporter with
| Some reporter ->
Expand Down Expand Up @@ -127,9 +132,10 @@ let loop (module D : Debug.DEBUG) ~reporter ~report ~cleanup =
cleanup () ;
running := false
| e ->
log_backtrace e ;
D.error "Unexpected error %s, sleeping for 10 seconds..."
(Printexc.to_string e) ;
D.log_backtrace () ;

Thread.delay 10.0
done ;
D.info "leaving main loop"
Expand Down