#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "ipmi.h" #include "lm.h" #include "fan.h" #include "ipmifan.h" #include "fandevicefan.h" #include "fanzone.h" sig_atomic_t running = true; void sig_handler(int sig) { (void)sig; running = false; } bool quiet; std::vector gather_sensors(std::vector& ipmi_sensors, ipmi_monitoring_ctx_t ctx, std::vector& lm_chips) { std::vector out; struct ipmi_monitoring_ipmi_config ipmi_config = {}; ipmi_config.driver_type = IPMI_MONITORING_DRIVER_TYPE_OPENIPMI; bool grabids = false; for(Sensor& sensor : ipmi_sensors) { if(sensor.id <= 0) { grabids = true; break; } } if(grabids) { if(!ipmi_fill_sensor_ids(ipmi_sensors, ctx, &ipmi_config)) { std::cout<<"could not get ids for all the required sensors\n"; return out; } } else { ipmi_update_sensors(ipmi_sensors, ctx, &ipmi_config); } out.insert(out.end(), ipmi_sensors.begin(), ipmi_sensors.end()); std::vector lm_sensors = lm_get_temperatures(lm_chips); out.insert(out.end(), lm_sensors.begin(), lm_sensors.end()); return out; } static double fan_curve(double temperature, double min_fan, double max_fan, double low_temperature, double high_temperature, bool stop) { if(stop && temperature push_down_high_temperature) push_down_state = true; if(temperature > high_temperature) return std::min((1-max_fan)*((temperature-high_temperature)/5.0)+max_fan, 1.0); return speed; } void ipmi_cleanup(ipmi_ctx_t raw_ctx) { ipmi_set_fan_group(raw_ctx, 0, 1); ipmi_set_fan_group(raw_ctx, 1, 1); ipmi_ctx_close(raw_ctx); ipmi_ctx_destroy(raw_ctx); } int main_loop() { ipmi_ctx_t raw_ctx = ipmi_open_context(); if(!raw_ctx) { std::cerr<<"Unable to connect to impi\n"; return 1; } int ret = sensors_init(nullptr); if(ret < 0) { std::cerr<<"Could not init lm_sensors\n"; ipmi_cleanup(raw_ctx); return 1; } std::vector lm_chips = lm_get_chips("amdgpu-*"); std::vector ipmi_sensors; ipmi_sensors.push_back(Sensor("IPMI", "CPU Temp")); ipmi_sensors.push_back(Sensor("IPMI", "System Temp")); std::vector lmSensors; lmSensors.push_back(Sensor("amdgpu-pci-0300", "edge")); lmSensors.push_back(Sensor("amdgpu-pci-8300", "edge")); if(lm_chips.size() < 2) { std::cerr<<"Could not get enough monitored gpus!\n"; ipmi_cleanup(raw_ctx); sensors_cleanup(); return 1; } ipmi_monitoring_ctx_t monitoring_ctx = init_ipmi_monitoring(); if(!monitoring_ctx) { std::cerr<<"Unable to connect to impi for monitoring\n"; ipmi_cleanup(raw_ctx); sensors_cleanup(); return 1; } struct fandevice fdevice; ret = fandevice_connect(&fdevice, 0); if(ret < 0) { std::cerr<<"Unable to connect to FanDevice\n"; ipmi_cleanup(raw_ctx); sensors_cleanup(); return 1; } std::vector fans; fans.push_back(new IpmiFan(raw_ctx, 0, "IPMI CPU FAN")); fans.push_back(new IpmiFan(raw_ctx, 1, "IPMI SYSTEM FAN")); fans.push_back(new FanDeviceFan(&fdevice, FAN_A, "MI100_1 FAN")); fans.push_back(new FanDeviceFan(&fdevice, FAN_B, "MI100_2 FAN")); fans.push_back(new FanDeviceFan(&fdevice, FAN_D, "TOP SYSTEM FAN")); fans.push_back(new FanDeviceFan(&fdevice, FAN_C, "FRONT SYSTEM FAN")); std::array pushDownStates = {true, true}; std::vector fanZones; fanZones.push_back(new FanZone(ipmi_sensors[0], fans[0], [](double in){return fan_curve(in, 0.1, 1, 45, 65, false);}, "CPU FAN ZONE")); fanZones.push_back(new FanZone({ipmi_sensors[0], ipmi_sensors[1]}, fans[1], [](double in){return fan_curve(in, 0.2, 1, 40, 55, false);}, "SYSTEM FAN ZONE")); fanZones.push_back(new FanZone({ipmi_sensors[0], ipmi_sensors[1]}, fans[4], [](double in){return fan_curve(in, 0.5, 1, 60, 65, true);}, "TOP FAN ZONE")); fanZones.push_back(new FanZone({lmSensors[0], lmSensors[1]}, fans[5], [](double in){return fan_curve(in, 0, 1, 60, 80, true);}, "FRONT FAN ZONE")); fanZones.push_back(new FanZone(lmSensors[1], fans[2], [&pushDownStates](double in){return mi100_fan_curve(in, 0.14, 0.7, 65, 80, 50, 70, pushDownStates[0]);}, "MI100_1 FAN ZONE")); fanZones.push_back(new FanZone(lmSensors[0], fans[3], [&pushDownStates](double in){return mi100_fan_curve(in, 0.14, 0.7, 65, 80, 50, 70, pushDownStates[1]);}, "MI100_2 FAN ZONE")); while(running) { std::vector sensors = gather_sensors(ipmi_sensors, monitoring_ctx, lm_chips); if(!quiet) { for(const Sensor& sensor : sensors) std::cout<print(sensors); } for(FanZone* zone : fanZones) zone->step(sensors); std::cout<<'\n'; sleep(10); } for(FanZone* zone : fanZones) delete zone; for(Fan* fan : fans) delete fan; ipmi_cleanup(raw_ctx); ipmi_monitoring_ctx_destroy(monitoring_ctx); sensors_cleanup(); return 0; } int main (int argc, char **argv) { signal(SIGABRT, sig_handler); signal(SIGTERM, sig_handler); signal(SIGHUP, sig_handler); signal(SIGINT, sig_handler); if(argc > 1) quiet = true; int ret = 0; for(size_t i = 0; i < 3; ++i) { ret = main_loop(); if(!running) break; std::cerr<<"Mainloop unable to start, retrying in 10 sec\n"; sleep(10); } if(ret != 0) std::cerr<<"Error not clearing, giveing up\n"; return ret; }