From 8c93f843b5d7f3975750499a99ef02d9227cda38 Mon Sep 17 00:00:00 2001 From: uvos Date: Thu, 14 Nov 2024 12:36:38 +0100 Subject: [PATCH] update fur current gpu config --- CMakeLists.txt | 7 +- main.cpp | 170 ++++++++++++++++++++++--------------------------- sensor.h | 2 +- 3 files changed, 80 insertions(+), 99 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 4ba30c9..dffe7e7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -11,12 +11,13 @@ endif(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT) find_package(PkgConfig REQUIRED) pkg_search_module(IPMI_MONITORING REQUIRED libipmimonitoring) pkg_search_module(IPMI REQUIRED libfreeipmi) +pkg_search_module(FANDEVICE REQUIRED fandevice) pkg_search_module(SYSTEMD systemd) -add_executable(${PROJECT_NAME} main.cpp ipmi.cpp lm.cpp) -target_link_libraries(${PROJECT_NAME} ${IPMI_LINK_LIBRARIES} ${IPMI_MONITORING_LINK_LIBRARIES} sensors) -target_include_directories(${PROJECT_NAME} PRIVATE ${IPMI_INCLUDE_DIRS} ${IPMI_MONITORING_INCLUDE_DIRS}) +add_executable(${PROJECT_NAME} main.cpp ipmi.cpp lm.cpp ipmifan.cpp fandevicefan.cpp fanzone.cpp) +target_link_libraries(${PROJECT_NAME} ${IPMI_LINK_LIBRARIES} ${FANDEVICE_LINK_LIBRARIES} ${IPMI_MONITORING_LINK_LIBRARIES} sensors) +target_include_directories(${PROJECT_NAME} PRIVATE ${IPMI_INCLUDE_DIRS} ${FANDEVICE_INCLUDE_DIRS} ${IPMI_MONITORING_INCLUDE_DIRS}) target_compile_options(${PROJECT_NAME} PRIVATE "-Wall" "-O2" "-g" "-fno-strict-aliasing" "-Wfatal-errors" "-Wno-reorder") install(TARGETS ${PROJECT_NAME} RUNTIME DESTINATION bin) diff --git a/main.cpp b/main.cpp index 9b2b01a..9faf0be 100644 --- a/main.cpp +++ b/main.cpp @@ -10,10 +10,16 @@ #include #include #include +#include #include +#include #include "ipmi.h" #include "lm.h" +#include "fan.h" +#include "ipmifan.h" +#include "fandevicefan.h" +#include "fanzone.h" sig_atomic_t running = true; @@ -61,87 +67,36 @@ std::vector gather_sensors(std::vector& ipmi_sensors, ipmi_monit return out; } -double fan_curve(double temperature, double min_fan, double max_fan, double low_temperature, double high_temperature) +static double fan_curve(double temperature, double min_fan, double max_fan, double low_temperature, double high_temperature, bool stop) { + if(stop && temperature & sensors) +static double mi100_fan_curve(double temperature, double min_fan, double max_fan, double low_temperature, double high_temperature, + double push_down_low_temperature, double push_down_high_temperature, bool &push_down_state) { - std::vector> gpus = {{"amdgpu-pci-0300", false}, {"amdgpu-pci-8300", false}, {"amdgpu-pci-8900", false}}; - const char monitored_sensor_name[] = "edge"; + double speed = fan_curve(temperature, min_fan, max_fan, low_temperature, high_temperature, false); + if(push_down_state) + speed = std::max(speed, 0.6); + if(temperature < push_down_low_temperature) + push_down_state = false; + else if(temperature > push_down_high_temperature) + push_down_state = true; + if(temperature > high_temperature) + return std::min((1-max_fan)*((temperature-high_temperature)/5.0)+max_fan, 1.0); - double max_temp = std::numeric_limits::min(); - for(const Sensor& sensor : sensors) - { - if(sensor.name == monitored_sensor_name) - { - for(std::pair& gpu : gpus) - { - if(sensor.chip == gpu.first) - { - if(max_temp < sensor.reading) - max_temp = sensor.reading; - gpu.second = true; - } - } - } - } - for(std::pair& gpu : gpus) - { - if(!gpu.second) - { - std::cerr<<"Could not get temperature from "<& sensors) +void ipmi_cleanup(ipmi_ctx_t raw_ctx) { - Sensor cpu("IPMI", "CPU Temp"); - Sensor system("IPMI", "System Temp"); - bool hitCpu = false; - bool hitSystem = false; - std::vector out; - - for(const Sensor& sensor : sensors) - { - if(cpu == sensor) - { - hitCpu = true; - cpu = sensor; - } - else if(sensor == system) - { - hitSystem = true; - system = sensor; - } - } - - if(hitCpu && hitSystem) - { - double fanSystem = fan_curve(system.reading, 0.33, 1.0, 40, 65); - double fanCpu = fan_curve(cpu.reading, 0.33, 1.0, 40, 70); - - return std::max(fanSystem, fanCpu); - } - else - { - std::cerr<<"Could not get temperature from System or Cpu! Ramping fans to maximum\n"; - return 1; - } -} - -std::vector get_fan_zones(const std::vector& sensors) -{ - std::vector out; - out.push_back(system_fan_zone(sensors)); - out.push_back(gpu_fan_zone(sensors)); - return out; + ipmi_set_fan_group(raw_ctx, 0, 1); + ipmi_set_fan_group(raw_ctx, 1, 1); + ipmi_ctx_close(raw_ctx); + ipmi_ctx_destroy(raw_ctx); } int main_loop() @@ -149,7 +104,7 @@ int main_loop() ipmi_ctx_t raw_ctx = ipmi_open_context(); if(!raw_ctx) { - sensors_cleanup(); + std::cerr<<"Unable to connect to impi\n"; return 1; } @@ -157,10 +112,7 @@ int main_loop() if(ret < 0) { std::cerr<<"Could not init lm_sensors\n"; - ipmi_set_fan_group(raw_ctx, 0, 1); - ipmi_set_fan_group(raw_ctx, 1, 1); - ipmi_ctx_close(raw_ctx); - ipmi_ctx_destroy(raw_ctx); + ipmi_cleanup(raw_ctx); return 1; } @@ -169,13 +121,14 @@ int main_loop() ipmi_sensors.push_back(Sensor("IPMI", "CPU Temp")); ipmi_sensors.push_back(Sensor("IPMI", "System Temp")); + std::vector lmSensors; + lmSensors.push_back(Sensor("amdgpu-pci-0300", "edge")); + lmSensors.push_back(Sensor("amdgpu-pci-8300", "edge")); + if(lm_chips.size() < 2) { - std::cerr<<"Could not get both monitored gpus!"; - ipmi_set_fan_group(raw_ctx, 0, 1); - ipmi_set_fan_group(raw_ctx, 1, 1); - ipmi_ctx_close(raw_ctx); - ipmi_ctx_destroy(raw_ctx); + std::cerr<<"Could not get enough monitored gpus!\n"; + ipmi_cleanup(raw_ctx); sensors_cleanup(); return 1; } @@ -183,37 +136,64 @@ int main_loop() ipmi_monitoring_ctx_t monitoring_ctx = init_ipmi_monitoring(); if(!monitoring_ctx) { - ipmi_set_fan_group(raw_ctx, 0, 1); - ipmi_set_fan_group(raw_ctx, 1, 1); - ipmi_ctx_close(raw_ctx); - ipmi_ctx_destroy(raw_ctx); + std::cerr<<"Unable to connect to impi for monitoring\n"; + ipmi_cleanup(raw_ctx); sensors_cleanup(); return 1; } + struct fandevice fdevice; + ret = fandevice_connect(&fdevice, 0); + if(ret < 0) + { + std::cerr<<"Unable to connect to FanDevice\n"; + ipmi_cleanup(raw_ctx); + sensors_cleanup(); + return 1; + } + std::vector fans; + fans.push_back(new IpmiFan(raw_ctx, 0, "IPMI CPU FAN")); + fans.push_back(new IpmiFan(raw_ctx, 1, "IPMI SYSTEM FAN")); + fans.push_back(new FanDeviceFan(&fdevice, FAN_A, "MI100_1 FAN")); + fans.push_back(new FanDeviceFan(&fdevice, FAN_B, "MI100_2 FAN")); + fans.push_back(new FanDeviceFan(&fdevice, FAN_D, "TOP SYSTEM FAN")); + fans.push_back(new FanDeviceFan(&fdevice, FAN_C, "FRONT SYSTEM FAN")); + + std::array pushDownStates = {true, true}; + + std::vector fanZones; + fanZones.push_back(new FanZone(ipmi_sensors[0], fans[0], [](double in){return fan_curve(in, 0.1, 1, 45, 65, false);}, "CPU FAN ZONE")); + fanZones.push_back(new FanZone({ipmi_sensors[0], ipmi_sensors[1]}, fans[1], [](double in){return fan_curve(in, 0.2, 1, 40, 55, false);}, "SYSTEM FAN ZONE")); + fanZones.push_back(new FanZone({ipmi_sensors[0], ipmi_sensors[1]}, fans[4], [](double in){return fan_curve(in, 0.5, 1, 60, 65, true);}, "TOP FAN ZONE")); + fanZones.push_back(new FanZone({lmSensors[0], lmSensors[1]}, fans[5], [](double in){return fan_curve(in, 0, 1, 60, 80, true);}, "FRONT FAN ZONE")); + fanZones.push_back(new FanZone(lmSensors[1], fans[2], [&pushDownStates](double in){return mi100_fan_curve(in, 0.14, 0.7, 65, 80, 50, 70, pushDownStates[0]);}, "MI100_1 FAN ZONE")); + fanZones.push_back(new FanZone(lmSensors[0], fans[3], [&pushDownStates](double in){return mi100_fan_curve(in, 0.14, 0.7, 65, 80, 50, 70, pushDownStates[1]);}, "MI100_2 FAN ZONE")); while(running) { std::vector sensors = gather_sensors(ipmi_sensors, monitoring_ctx, lm_chips); - std::vector fanzones = get_fan_zones(sensors); if(!quiet) { for(const Sensor& sensor : sensors) - std::cout<<"Sensor "<print(sensors); } - ipmi_set_fan_group(raw_ctx, 0, fanzones[0]); - ipmi_set_fan_group(raw_ctx, 1, fanzones[1]); + for(FanZone* zone : fanZones) + zone->step(sensors); + std::cout<<'\n'; sleep(10); } - ipmi_set_fan_group(raw_ctx, 0, 1); - ipmi_set_fan_group(raw_ctx, 1, 1); - ipmi_ctx_close(raw_ctx); - ipmi_ctx_destroy(raw_ctx); + for(FanZone* zone : fanZones) + delete zone; + for(Fan* fan : fans) + delete fan; + + ipmi_cleanup(raw_ctx); ipmi_monitoring_ctx_destroy(monitoring_ctx); sensors_cleanup(); diff --git a/sensor.h b/sensor.h index 4884c97..849eeb6 100644 --- a/sensor.h +++ b/sensor.h @@ -13,5 +13,5 @@ public: public: Sensor() = default; Sensor(std::string chipI, std::string nameI, int idI = 0): name(nameI), chip(chipI), id(idI) {} - bool operator==(const Sensor& other) {return other.name == name && other.chip == chip;} + bool operator==(const Sensor& other) const {return other.name == name && other.chip == chip;} };