diff --git a/CMakeLists.txt b/CMakeLists.txt index 4eb83d6..dffe7e7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,14 +4,28 @@ project(ipmifan LANGUAGES CXX) set(CMAKE_CXX_STANDARD 20) -find_package(Doxygen) +if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT) + set(CMAKE_INSTALL_PREFIX "/usr" CACHE PATH "..." FORCE) +endif(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT) find_package(PkgConfig REQUIRED) -pkg_search_module(IPMI REQUIRED libipmimonitoring) +pkg_search_module(IPMI_MONITORING REQUIRED libipmimonitoring) +pkg_search_module(IPMI REQUIRED libfreeipmi) +pkg_search_module(FANDEVICE REQUIRED fandevice) +pkg_search_module(SYSTEMD systemd) -add_executable(${PROJECT_NAME} main.cpp ipmi.cpp lm.cpp) -target_link_libraries(${PROJECT_NAME} ${IPMI_LINK_LIBRARIES} ${IPMIPOSIX_LINK_LIBRARIES} sensors) -target_include_directories(${PROJECT_NAME} PRIVATE ${IPMI_INCLUDE_DIRS} ${IPMIPOSIX_INCLUDE_DIRS}) + +add_executable(${PROJECT_NAME} main.cpp ipmi.cpp lm.cpp ipmifan.cpp fandevicefan.cpp fanzone.cpp) +target_link_libraries(${PROJECT_NAME} ${IPMI_LINK_LIBRARIES} ${FANDEVICE_LINK_LIBRARIES} ${IPMI_MONITORING_LINK_LIBRARIES} sensors) +target_include_directories(${PROJECT_NAME} PRIVATE ${IPMI_INCLUDE_DIRS} ${FANDEVICE_INCLUDE_DIRS} ${IPMI_MONITORING_INCLUDE_DIRS}) target_compile_options(${PROJECT_NAME} PRIVATE "-Wall" "-O2" "-g" "-fno-strict-aliasing" "-Wfatal-errors" "-Wno-reorder") install(TARGETS ${PROJECT_NAME} RUNTIME DESTINATION bin) +if(SYSTEMD_FOUND) + pkg_get_variable(SYSTEMD_UNIT_DIR_PKG systemd systemd_system_unit_path) + message(STATUS ${SYSTEMD_UNIT_DIR_PKG}) + string(REPLACE ":" ";" SYSTEMD_UNIT_DIR_LIST ${SYSTEMD_UNIT_DIR_PKG}) + list(GET SYSTEMD_UNIT_DIR_LIST 0 SYSTEMD_UNIT_DIR) + + install(FILES ipmifan.service DESTINATION ${SYSTEMD_UNIT_DIR}) +endif(SYSTEMD_FOUND) diff --git a/ipmi.cpp b/ipmi.cpp index ed4a024..e5aea0c 100644 --- a/ipmi.cpp +++ b/ipmi.cpp @@ -2,6 +2,8 @@ #include #include +static constexpr size_t IPMI_RAW_MAX_ARGS = 65536*2; + static double ipmi_convert_sensor_reading(void *sensor_reading, int sensor_reading_type) { if(sensor_reading_type == IPMI_MONITORING_SENSOR_READING_TYPE_UNSIGNED_INTEGER8_BOOL) @@ -102,3 +104,37 @@ ipmi_monitoring_ctx_t init_ipmi_monitoring() return ctx; } + +ipmi_ctx_t ipmi_open_context() +{ + ipmi_ctx_t ctx = ipmi_ctx_create(); + if(!ctx) + { + std::cerr<<"Could not allocae raw context\n"; + return nullptr; + } + + ipmi_driver_type_t driver = IPMI_DEVICE_OPENIPMI; + int ret = ipmi_ctx_find_inband(ctx, &driver, false, 0, 0, nullptr, 0, 0); + if(ret < 0) + { + std::cerr<<"Could not create raw context "<(100), static_cast(speed*100)), static_cast(0)); + char command[] = {0x70, 0x66, 0x01, static_cast(group), converted_speed}; + char bytesrx[IPMI_RAW_MAX_ARGS] = {0}; + int rxlen = ipmi_cmd_raw(raw_ctx, 0, 0x30, command, sizeof(command), bytesrx, IPMI_RAW_MAX_ARGS); + if(rxlen < 0) + { + std::cerr<<"Raw write to ipmi failed with: "< #include #include +#include #include "sensor.h" @@ -11,3 +12,7 @@ bool ipmi_fill_sensor_ids(std::vector& sensors, ipmi_monitoring_ctx_t ct bool ipmi_update_sensors(std::vector& sensors, ipmi_monitoring_ctx_t ctx, struct ipmi_monitoring_ipmi_config* config); ipmi_monitoring_ctx_t init_ipmi_monitoring(); + +ipmi_ctx_t ipmi_open_context(); + +bool ipmi_set_fan_group(ipmi_ctx_t raw_ctx, uint8_t group, double speed); diff --git a/ipmifan.service b/ipmifan.service new file mode 100644 index 0000000..30ab7df --- /dev/null +++ b/ipmifan.service @@ -0,0 +1,10 @@ +[Unit] +Description=Start impi fan control +After=lm_sensors.service systemd-modules-load.service + +[Service] +Type=simple +ExecStart=/usr/bin/ipmifan -q + +[Install] +WantedBy=multi-user.target diff --git a/main.cpp b/main.cpp index d8e1fb3..9faf0be 100644 --- a/main.cpp +++ b/main.cpp @@ -1,3 +1,6 @@ +#include +#include +#include #include #include #include @@ -7,9 +10,16 @@ #include #include #include +#include +#include +#include #include "ipmi.h" #include "lm.h" +#include "fan.h" +#include "ipmifan.h" +#include "fandevicefan.h" +#include "fanzone.h" sig_atomic_t running = true; @@ -19,6 +29,8 @@ void sig_handler(int sig) running = false; } +bool quiet; + std::vector gather_sensors(std::vector& ipmi_sensors, ipmi_monitoring_ctx_t ctx, std::vector& lm_chips) { std::vector out; @@ -55,57 +67,137 @@ std::vector gather_sensors(std::vector& ipmi_sensors, ipmi_monit return out; } -double fan_curve(double temperature, double min_fan, double max_fan, double low_temperature, double high_temperature) +static double fan_curve(double temperature, double min_fan, double max_fan, double low_temperature, double high_temperature, bool stop) { + if(stop && temperature & sensors) +static double mi100_fan_curve(double temperature, double min_fan, double max_fan, double low_temperature, double high_temperature, + double push_down_low_temperature, double push_down_high_temperature, bool &push_down_state) { - const char mi50Chip[] = "amdgpu-pci-2300"; - const char mi25Chip[] = "amdgpu-pci-4300"; - const char monitored_sensor_name[] = "edge"; + double speed = fan_curve(temperature, min_fan, max_fan, low_temperature, high_temperature, false); + if(push_down_state) + speed = std::max(speed, 0.6); + if(temperature < push_down_low_temperature) + push_down_state = false; + else if(temperature > push_down_high_temperature) + push_down_state = true; + if(temperature > high_temperature) + return std::min((1-max_fan)*((temperature-high_temperature)/5.0)+max_fan, 1.0); - double max_temp = std::numeric_limits::min(); - for(const Sensor& sensor : sensors) + return speed; +} + +void ipmi_cleanup(ipmi_ctx_t raw_ctx) +{ + ipmi_set_fan_group(raw_ctx, 0, 1); + ipmi_set_fan_group(raw_ctx, 1, 1); + ipmi_ctx_close(raw_ctx); + ipmi_ctx_destroy(raw_ctx); +} + +int main_loop() +{ + ipmi_ctx_t raw_ctx = ipmi_open_context(); + if(!raw_ctx) { - if((sensor.chip == mi50Chip || sensor.chip == mi25Chip) && sensor.name == monitored_sensor_name) + std::cerr<<"Unable to connect to impi\n"; + return 1; + } + + int ret = sensors_init(nullptr); + if(ret < 0) + { + std::cerr<<"Could not init lm_sensors\n"; + ipmi_cleanup(raw_ctx); + return 1; + } + + std::vector lm_chips = lm_get_chips("amdgpu-*"); + std::vector ipmi_sensors; + ipmi_sensors.push_back(Sensor("IPMI", "CPU Temp")); + ipmi_sensors.push_back(Sensor("IPMI", "System Temp")); + + std::vector lmSensors; + lmSensors.push_back(Sensor("amdgpu-pci-0300", "edge")); + lmSensors.push_back(Sensor("amdgpu-pci-8300", "edge")); + + if(lm_chips.size() < 2) + { + std::cerr<<"Could not get enough monitored gpus!\n"; + ipmi_cleanup(raw_ctx); + sensors_cleanup(); + return 1; + } + + ipmi_monitoring_ctx_t monitoring_ctx = init_ipmi_monitoring(); + if(!monitoring_ctx) + { + std::cerr<<"Unable to connect to impi for monitoring\n"; + ipmi_cleanup(raw_ctx); + sensors_cleanup(); + return 1; + } + + struct fandevice fdevice; + ret = fandevice_connect(&fdevice, 0); + if(ret < 0) + { + std::cerr<<"Unable to connect to FanDevice\n"; + ipmi_cleanup(raw_ctx); + sensors_cleanup(); + return 1; + } + + std::vector fans; + fans.push_back(new IpmiFan(raw_ctx, 0, "IPMI CPU FAN")); + fans.push_back(new IpmiFan(raw_ctx, 1, "IPMI SYSTEM FAN")); + fans.push_back(new FanDeviceFan(&fdevice, FAN_A, "MI100_1 FAN")); + fans.push_back(new FanDeviceFan(&fdevice, FAN_B, "MI100_2 FAN")); + fans.push_back(new FanDeviceFan(&fdevice, FAN_D, "TOP SYSTEM FAN")); + fans.push_back(new FanDeviceFan(&fdevice, FAN_C, "FRONT SYSTEM FAN")); + + std::array pushDownStates = {true, true}; + + std::vector fanZones; + fanZones.push_back(new FanZone(ipmi_sensors[0], fans[0], [](double in){return fan_curve(in, 0.1, 1, 45, 65, false);}, "CPU FAN ZONE")); + fanZones.push_back(new FanZone({ipmi_sensors[0], ipmi_sensors[1]}, fans[1], [](double in){return fan_curve(in, 0.2, 1, 40, 55, false);}, "SYSTEM FAN ZONE")); + fanZones.push_back(new FanZone({ipmi_sensors[0], ipmi_sensors[1]}, fans[4], [](double in){return fan_curve(in, 0.5, 1, 60, 65, true);}, "TOP FAN ZONE")); + fanZones.push_back(new FanZone({lmSensors[0], lmSensors[1]}, fans[5], [](double in){return fan_curve(in, 0, 1, 60, 80, true);}, "FRONT FAN ZONE")); + fanZones.push_back(new FanZone(lmSensors[1], fans[2], [&pushDownStates](double in){return mi100_fan_curve(in, 0.14, 0.7, 65, 80, 50, 70, pushDownStates[0]);}, "MI100_1 FAN ZONE")); + fanZones.push_back(new FanZone(lmSensors[0], fans[3], [&pushDownStates](double in){return mi100_fan_curve(in, 0.14, 0.7, 65, 80, 50, 70, pushDownStates[1]);}, "MI100_2 FAN ZONE")); + while(running) + { + std::vector sensors = gather_sensors(ipmi_sensors, monitoring_ctx, lm_chips); + + if(!quiet) { - if(max_temp < sensor.reading) - max_temp = sensor.reading; + for(const Sensor& sensor : sensors) + std::cout<print(sensors); } + + for(FanZone* zone : fanZones) + zone->step(sensors); + std::cout<<'\n'; + sleep(10); } - return fan_curve(max_temp, 0.2, 1.0, 40, 75); -} + for(FanZone* zone : fanZones) + delete zone; + for(Fan* fan : fans) + delete fan; -double system_fan_zone(const std::vector& sensors) -{ - Sensor cpu("IPMI", "CPU Temp"); - Sensor system("IPMI", "System Temp"); - std::vector out; + ipmi_cleanup(raw_ctx); + ipmi_monitoring_ctx_destroy(monitoring_ctx); + sensors_cleanup(); - for(const Sensor& sensor : sensors) - { - if(cpu == sensor) - cpu = sensor; - else if(sensor == system) - system = sensor; - } - - double fanSystem = fan_curve(system.reading, 0.2, 1.0, 35, 45); - double fanCpu = fan_curve(cpu.reading, 0.2, 1.0, 40, 70); - - return std::max(fanSystem, fanCpu); -} - -std::vector get_fan_zones(const std::vector& sensors) -{ - std::vector out; - out.push_back(system_fan_zone(sensors)); - out.push_back(gpu_fan_zone(sensors)); - return out; + return 0; } int main (int argc, char **argv) @@ -115,32 +207,22 @@ int main (int argc, char **argv) signal(SIGHUP, sig_handler); signal(SIGINT, sig_handler); - int ret = sensors_init(nullptr); - if(ret < 0) + if(argc > 1) + quiet = true; + + int ret = 0; + for(size_t i = 0; i < 3; ++i) { - std::cerr<<"Could not init lm_sensors\n"; + ret = main_loop(); + if(!running) + break; + std::cerr<<"Mainloop unable to start, retrying in 10 sec\n"; + sleep(10); } - std::vector lm_chips = lm_get_chips("amdgpu-*"); - std::vector ipmi_sensors; - ipmi_sensors.push_back(Sensor("IPMI", "CPU Temp")); - ipmi_sensors.push_back(Sensor("IPMI", "System Temp")); + if(ret != 0) + std::cerr<<"Error not clearing, giveing up\n"; - ipmi_monitoring_ctx_t ctx = init_ipmi_monitoring(); - if(!ctx) - return 1; - - while(running) - { - std::vector sensors = gather_sensors(ipmi_sensors, ctx, lm_chips); - std::vector fanzones = get_fan_zones(sensors); - for(const double fanzone : fanzones) - std::cout<