diff --git a/CMakeLists.txt b/CMakeLists.txt index 4eb83d6..4ba30c9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,14 +4,27 @@ project(ipmifan LANGUAGES CXX) set(CMAKE_CXX_STANDARD 20) -find_package(Doxygen) +if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT) + set(CMAKE_INSTALL_PREFIX "/usr" CACHE PATH "..." FORCE) +endif(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT) find_package(PkgConfig REQUIRED) -pkg_search_module(IPMI REQUIRED libipmimonitoring) +pkg_search_module(IPMI_MONITORING REQUIRED libipmimonitoring) +pkg_search_module(IPMI REQUIRED libfreeipmi) +pkg_search_module(SYSTEMD systemd) + add_executable(${PROJECT_NAME} main.cpp ipmi.cpp lm.cpp) -target_link_libraries(${PROJECT_NAME} ${IPMI_LINK_LIBRARIES} ${IPMIPOSIX_LINK_LIBRARIES} sensors) -target_include_directories(${PROJECT_NAME} PRIVATE ${IPMI_INCLUDE_DIRS} ${IPMIPOSIX_INCLUDE_DIRS}) +target_link_libraries(${PROJECT_NAME} ${IPMI_LINK_LIBRARIES} ${IPMI_MONITORING_LINK_LIBRARIES} sensors) +target_include_directories(${PROJECT_NAME} PRIVATE ${IPMI_INCLUDE_DIRS} ${IPMI_MONITORING_INCLUDE_DIRS}) target_compile_options(${PROJECT_NAME} PRIVATE "-Wall" "-O2" "-g" "-fno-strict-aliasing" "-Wfatal-errors" "-Wno-reorder") install(TARGETS ${PROJECT_NAME} RUNTIME DESTINATION bin) +if(SYSTEMD_FOUND) + pkg_get_variable(SYSTEMD_UNIT_DIR_PKG systemd systemd_system_unit_path) + message(STATUS ${SYSTEMD_UNIT_DIR_PKG}) + string(REPLACE ":" ";" SYSTEMD_UNIT_DIR_LIST ${SYSTEMD_UNIT_DIR_PKG}) + list(GET SYSTEMD_UNIT_DIR_LIST 0 SYSTEMD_UNIT_DIR) + + install(FILES ipmifan.service DESTINATION ${SYSTEMD_UNIT_DIR}) +endif(SYSTEMD_FOUND) diff --git a/ipmi.cpp b/ipmi.cpp index ed4a024..e5aea0c 100644 --- a/ipmi.cpp +++ b/ipmi.cpp @@ -2,6 +2,8 @@ #include #include +static constexpr size_t IPMI_RAW_MAX_ARGS = 65536*2; + static double ipmi_convert_sensor_reading(void *sensor_reading, int sensor_reading_type) { if(sensor_reading_type == IPMI_MONITORING_SENSOR_READING_TYPE_UNSIGNED_INTEGER8_BOOL) @@ -102,3 +104,37 @@ ipmi_monitoring_ctx_t init_ipmi_monitoring() return ctx; } + +ipmi_ctx_t ipmi_open_context() +{ + ipmi_ctx_t ctx = ipmi_ctx_create(); + if(!ctx) + { + std::cerr<<"Could not allocae raw context\n"; + return nullptr; + } + + ipmi_driver_type_t driver = IPMI_DEVICE_OPENIPMI; + int ret = ipmi_ctx_find_inband(ctx, &driver, false, 0, 0, nullptr, 0, 0); + if(ret < 0) + { + std::cerr<<"Could not create raw context "<(100), static_cast(speed*100)), static_cast(0)); + char command[] = {0x70, 0x66, 0x01, static_cast(group), converted_speed}; + char bytesrx[IPMI_RAW_MAX_ARGS] = {0}; + int rxlen = ipmi_cmd_raw(raw_ctx, 0, 0x30, command, sizeof(command), bytesrx, IPMI_RAW_MAX_ARGS); + if(rxlen < 0) + { + std::cerr<<"Raw write to ipmi failed with: "< #include #include +#include #include "sensor.h" @@ -11,3 +12,7 @@ bool ipmi_fill_sensor_ids(std::vector& sensors, ipmi_monitoring_ctx_t ct bool ipmi_update_sensors(std::vector& sensors, ipmi_monitoring_ctx_t ctx, struct ipmi_monitoring_ipmi_config* config); ipmi_monitoring_ctx_t init_ipmi_monitoring(); + +ipmi_ctx_t ipmi_open_context(); + +bool ipmi_set_fan_group(ipmi_ctx_t raw_ctx, uint8_t group, double speed); diff --git a/ipmifan.service b/ipmifan.service new file mode 100644 index 0000000..30ab7df --- /dev/null +++ b/ipmifan.service @@ -0,0 +1,10 @@ +[Unit] +Description=Start impi fan control +After=lm_sensors.service systemd-modules-load.service + +[Service] +Type=simple +ExecStart=/usr/bin/ipmifan -q + +[Install] +WantedBy=multi-user.target diff --git a/main.cpp b/main.cpp index d8e1fb3..9b2b01a 100644 --- a/main.cpp +++ b/main.cpp @@ -1,3 +1,6 @@ +#include +#include +#include #include #include #include @@ -7,6 +10,7 @@ #include #include #include +#include #include "ipmi.h" #include "lm.h" @@ -19,6 +23,8 @@ void sig_handler(int sig) running = false; } +bool quiet; + std::vector gather_sensors(std::vector& ipmi_sensors, ipmi_monitoring_ctx_t ctx, std::vector& lm_chips) { std::vector out; @@ -63,41 +69,71 @@ double fan_curve(double temperature, double min_fan, double max_fan, double low_ double gpu_fan_zone(const std::vector& sensors) { - const char mi50Chip[] = "amdgpu-pci-2300"; - const char mi25Chip[] = "amdgpu-pci-4300"; + std::vector> gpus = {{"amdgpu-pci-0300", false}, {"amdgpu-pci-8300", false}, {"amdgpu-pci-8900", false}}; const char monitored_sensor_name[] = "edge"; double max_temp = std::numeric_limits::min(); for(const Sensor& sensor : sensors) { - if((sensor.chip == mi50Chip || sensor.chip == mi25Chip) && sensor.name == monitored_sensor_name) + if(sensor.name == monitored_sensor_name) { - if(max_temp < sensor.reading) - max_temp = sensor.reading; + for(std::pair& gpu : gpus) + { + if(sensor.chip == gpu.first) + { + if(max_temp < sensor.reading) + max_temp = sensor.reading; + gpu.second = true; + } + } + } + } + for(std::pair& gpu : gpus) + { + if(!gpu.second) + { + std::cerr<<"Could not get temperature from "<& sensors) { Sensor cpu("IPMI", "CPU Temp"); Sensor system("IPMI", "System Temp"); + bool hitCpu = false; + bool hitSystem = false; std::vector out; for(const Sensor& sensor : sensors) { if(cpu == sensor) + { + hitCpu = true; cpu = sensor; + } else if(sensor == system) + { + hitSystem = true; system = sensor; + } } - double fanSystem = fan_curve(system.reading, 0.2, 1.0, 35, 45); - double fanCpu = fan_curve(cpu.reading, 0.2, 1.0, 40, 70); + if(hitCpu && hitSystem) + { + double fanSystem = fan_curve(system.reading, 0.33, 1.0, 40, 65); + double fanCpu = fan_curve(cpu.reading, 0.33, 1.0, 40, 70); - return std::max(fanSystem, fanCpu); + return std::max(fanSystem, fanCpu); + } + else + { + std::cerr<<"Could not get temperature from System or Cpu! Ramping fans to maximum\n"; + return 1; + } } std::vector get_fan_zones(const std::vector& sensors) @@ -108,17 +144,24 @@ std::vector get_fan_zones(const std::vector& sensors) return out; } -int main (int argc, char **argv) +int main_loop() { - signal(SIGABRT, sig_handler); - signal(SIGTERM, sig_handler); - signal(SIGHUP, sig_handler); - signal(SIGINT, sig_handler); + ipmi_ctx_t raw_ctx = ipmi_open_context(); + if(!raw_ctx) + { + sensors_cleanup(); + return 1; + } int ret = sensors_init(nullptr); if(ret < 0) { std::cerr<<"Could not init lm_sensors\n"; + ipmi_set_fan_group(raw_ctx, 0, 1); + ipmi_set_fan_group(raw_ctx, 1, 1); + ipmi_ctx_close(raw_ctx); + ipmi_ctx_destroy(raw_ctx); + return 1; } std::vector lm_chips = lm_get_chips("amdgpu-*"); @@ -126,21 +169,80 @@ int main (int argc, char **argv) ipmi_sensors.push_back(Sensor("IPMI", "CPU Temp")); ipmi_sensors.push_back(Sensor("IPMI", "System Temp")); - ipmi_monitoring_ctx_t ctx = init_ipmi_monitoring(); - if(!ctx) + if(lm_chips.size() < 2) + { + std::cerr<<"Could not get both monitored gpus!"; + ipmi_set_fan_group(raw_ctx, 0, 1); + ipmi_set_fan_group(raw_ctx, 1, 1); + ipmi_ctx_close(raw_ctx); + ipmi_ctx_destroy(raw_ctx); + sensors_cleanup(); return 1; + } + + ipmi_monitoring_ctx_t monitoring_ctx = init_ipmi_monitoring(); + if(!monitoring_ctx) + { + ipmi_set_fan_group(raw_ctx, 0, 1); + ipmi_set_fan_group(raw_ctx, 1, 1); + ipmi_ctx_close(raw_ctx); + ipmi_ctx_destroy(raw_ctx); + sensors_cleanup(); + return 1; + } + while(running) { - std::vector sensors = gather_sensors(ipmi_sensors, ctx, lm_chips); + std::vector sensors = gather_sensors(ipmi_sensors, monitoring_ctx, lm_chips); std::vector fanzones = get_fan_zones(sensors); - for(const double fanzone : fanzones) - std::cout< 1) + quiet = true; + + int ret = 0; + for(size_t i = 0; i < 3; ++i) + { + ret = main_loop(); + if(!running) + break; + std::cerr<<"Mainloop unable to start, retrying in 10 sec\n"; + sleep(10); + } + + if(ret != 0) + std::cerr<<"Error not clearing, giveing up\n"; + + return ret; +} +