#include #include #include #include #include #include #include #include #include #include #include #include #include #include "ipmi.h" #include "lm.h" sig_atomic_t running = true; void sig_handler(int sig) { (void)sig; running = false; } bool quiet; std::vector gather_sensors(std::vector& ipmi_sensors, ipmi_monitoring_ctx_t ctx, std::vector& lm_chips) { std::vector out; struct ipmi_monitoring_ipmi_config ipmi_config = {}; ipmi_config.driver_type = IPMI_MONITORING_DRIVER_TYPE_OPENIPMI; bool grabids = false; for(Sensor& sensor : ipmi_sensors) { if(sensor.id <= 0) { grabids = true; break; } } if(grabids) { if(!ipmi_fill_sensor_ids(ipmi_sensors, ctx, &ipmi_config)) { std::cout<<"could not get ids for all the required sensors\n"; return out; } } else { ipmi_update_sensors(ipmi_sensors, ctx, &ipmi_config); } out.insert(out.end(), ipmi_sensors.begin(), ipmi_sensors.end()); std::vector lm_sensors = lm_get_temperatures(lm_chips); out.insert(out.end(), lm_sensors.begin(), lm_sensors.end()); return out; } double fan_curve(double temperature, double min_fan, double max_fan, double low_temperature, double high_temperature) { double slope = (max_fan-min_fan)/(high_temperature-low_temperature); return std::max(std::min(max_fan, min_fan+slope*(temperature-low_temperature)), min_fan); } double gpu_fan_zone(const std::vector& sensors) { std::vector> gpus = {{"amdgpu-pci-0300", false}, {"amdgpu-pci-8300", false}}; const char monitored_sensor_name[] = "edge"; double max_temp = std::numeric_limits::min(); for(const Sensor& sensor : sensors) { if(sensor.name == monitored_sensor_name) { for(std::pair& gpu : gpus) { if(sensor.chip == gpu.first) { gpu.second = true; } if(max_temp < sensor.reading) max_temp = sensor.reading; } } } for(std::pair& gpu : gpus) { if(!gpu.second) { std::cerr<<"Could not get temperature from "<& sensors) { Sensor cpu("IPMI", "CPU Temp"); Sensor system("IPMI", "System Temp"); bool hitCpu = false; bool hitSystem = false; std::vector out; for(const Sensor& sensor : sensors) { if(cpu == sensor) { hitCpu = true; cpu = sensor; } else if(sensor == system) { hitSystem = true; system = sensor; } } if(hitCpu && hitSystem) { double fanSystem = fan_curve(system.reading, 0.33, 1.0, 40, 65); double fanCpu = fan_curve(cpu.reading, 0.33, 1.0, 40, 70); return std::max(fanSystem, fanCpu); } else { std::cerr<<"Could not get temperature from System or Cpu! Ramping fans to maximum\n"; return 1; } } std::vector get_fan_zones(const std::vector& sensors) { std::vector out; out.push_back(system_fan_zone(sensors)); out.push_back(gpu_fan_zone(sensors)); return out; } int main_loop() { ipmi_ctx_t raw_ctx = ipmi_open_context(); if(!raw_ctx) { sensors_cleanup(); return 1; } int ret = sensors_init(nullptr); if(ret < 0) { std::cerr<<"Could not init lm_sensors\n"; ipmi_set_fan_group(raw_ctx, 0, 1); ipmi_set_fan_group(raw_ctx, 1, 1); ipmi_ctx_close(raw_ctx); ipmi_ctx_destroy(raw_ctx); return 1; } std::vector lm_chips = lm_get_chips("amdgpu-*"); std::vector ipmi_sensors; ipmi_sensors.push_back(Sensor("IPMI", "CPU Temp")); ipmi_sensors.push_back(Sensor("IPMI", "System Temp")); if(lm_chips.size() < 2) { std::cerr<<"Could not get both monitored gpus!"; ipmi_set_fan_group(raw_ctx, 0, 1); ipmi_set_fan_group(raw_ctx, 1, 1); ipmi_ctx_close(raw_ctx); ipmi_ctx_destroy(raw_ctx); sensors_cleanup(); return 1; } ipmi_monitoring_ctx_t monitoring_ctx = init_ipmi_monitoring(); if(!monitoring_ctx) { ipmi_set_fan_group(raw_ctx, 0, 1); ipmi_set_fan_group(raw_ctx, 1, 1); ipmi_ctx_close(raw_ctx); ipmi_ctx_destroy(raw_ctx); sensors_cleanup(); return 1; } while(running) { std::vector sensors = gather_sensors(ipmi_sensors, monitoring_ctx, lm_chips); std::vector fanzones = get_fan_zones(sensors); if(!quiet) { for(const Sensor& sensor : sensors) std::cout<<"Sensor "< 1) quiet = true; int ret = 0; for(size_t i = 0; i < 3; ++i) { ret = main_loop(); if(!running) break; std::cerr<<"Mainloop unable to start, retrying in 10 sec\n"; sleep(10); } if(ret != 0) std::cerr<<"Error not clearing, giveing up\n"; return ret; }