Compare commits
1 Commits
ef6b0c7d4b
...
master
Author | SHA1 | Date | |
---|---|---|---|
8c93f843b5 |
@ -11,12 +11,13 @@ endif(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
|
||||
find_package(PkgConfig REQUIRED)
|
||||
pkg_search_module(IPMI_MONITORING REQUIRED libipmimonitoring)
|
||||
pkg_search_module(IPMI REQUIRED libfreeipmi)
|
||||
pkg_search_module(FANDEVICE REQUIRED fandevice)
|
||||
pkg_search_module(SYSTEMD systemd)
|
||||
|
||||
|
||||
add_executable(${PROJECT_NAME} main.cpp ipmi.cpp lm.cpp)
|
||||
target_link_libraries(${PROJECT_NAME} ${IPMI_LINK_LIBRARIES} ${IPMI_MONITORING_LINK_LIBRARIES} sensors)
|
||||
target_include_directories(${PROJECT_NAME} PRIVATE ${IPMI_INCLUDE_DIRS} ${IPMI_MONITORING_INCLUDE_DIRS})
|
||||
add_executable(${PROJECT_NAME} main.cpp ipmi.cpp lm.cpp ipmifan.cpp fandevicefan.cpp fanzone.cpp)
|
||||
target_link_libraries(${PROJECT_NAME} ${IPMI_LINK_LIBRARIES} ${FANDEVICE_LINK_LIBRARIES} ${IPMI_MONITORING_LINK_LIBRARIES} sensors)
|
||||
target_include_directories(${PROJECT_NAME} PRIVATE ${IPMI_INCLUDE_DIRS} ${FANDEVICE_INCLUDE_DIRS} ${IPMI_MONITORING_INCLUDE_DIRS})
|
||||
target_compile_options(${PROJECT_NAME} PRIVATE "-Wall" "-O2" "-g" "-fno-strict-aliasing" "-Wfatal-errors" "-Wno-reorder")
|
||||
install(TARGETS ${PROJECT_NAME} RUNTIME DESTINATION bin)
|
||||
|
||||
|
170
main.cpp
170
main.cpp
@ -10,10 +10,16 @@
|
||||
#include <sensors/error.h>
|
||||
#include <signal.h>
|
||||
#include <limits>
|
||||
#include <array>
|
||||
#include <freeipmi/freeipmi.h>
|
||||
#include <fandevice.h>
|
||||
|
||||
#include "ipmi.h"
|
||||
#include "lm.h"
|
||||
#include "fan.h"
|
||||
#include "ipmifan.h"
|
||||
#include "fandevicefan.h"
|
||||
#include "fanzone.h"
|
||||
|
||||
sig_atomic_t running = true;
|
||||
|
||||
@ -61,87 +67,36 @@ std::vector<Sensor> gather_sensors(std::vector<Sensor>& ipmi_sensors, ipmi_monit
|
||||
return out;
|
||||
}
|
||||
|
||||
double fan_curve(double temperature, double min_fan, double max_fan, double low_temperature, double high_temperature)
|
||||
static double fan_curve(double temperature, double min_fan, double max_fan, double low_temperature, double high_temperature, bool stop)
|
||||
{
|
||||
if(stop && temperature <low_temperature)
|
||||
return 0;
|
||||
double slope = (max_fan-min_fan)/(high_temperature-low_temperature);
|
||||
return std::max(std::min(max_fan, min_fan+slope*(temperature-low_temperature)), min_fan);
|
||||
}
|
||||
|
||||
double gpu_fan_zone(const std::vector<Sensor>& sensors)
|
||||
static double mi100_fan_curve(double temperature, double min_fan, double max_fan, double low_temperature, double high_temperature,
|
||||
double push_down_low_temperature, double push_down_high_temperature, bool &push_down_state)
|
||||
{
|
||||
std::vector<std::pair<std::string, bool>> gpus = {{"amdgpu-pci-0300", false}, {"amdgpu-pci-8300", false}, {"amdgpu-pci-8900", false}};
|
||||
const char monitored_sensor_name[] = "edge";
|
||||
double speed = fan_curve(temperature, min_fan, max_fan, low_temperature, high_temperature, false);
|
||||
if(push_down_state)
|
||||
speed = std::max(speed, 0.6);
|
||||
if(temperature < push_down_low_temperature)
|
||||
push_down_state = false;
|
||||
else if(temperature > push_down_high_temperature)
|
||||
push_down_state = true;
|
||||
if(temperature > high_temperature)
|
||||
return std::min((1-max_fan)*((temperature-high_temperature)/5.0)+max_fan, 1.0);
|
||||
|
||||
double max_temp = std::numeric_limits<double>::min();
|
||||
for(const Sensor& sensor : sensors)
|
||||
{
|
||||
if(sensor.name == monitored_sensor_name)
|
||||
{
|
||||
for(std::pair<std::string, bool>& gpu : gpus)
|
||||
{
|
||||
if(sensor.chip == gpu.first)
|
||||
{
|
||||
if(max_temp < sensor.reading)
|
||||
max_temp = sensor.reading;
|
||||
gpu.second = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
for(std::pair<std::string, bool>& gpu : gpus)
|
||||
{
|
||||
if(!gpu.second)
|
||||
{
|
||||
std::cerr<<"Could not get temperature from "<<gpu.first<<" ramping fans to maximum\n";
|
||||
return 1.0;
|
||||
}
|
||||
return speed;
|
||||
}
|
||||
|
||||
return fan_curve(max_temp, 0.05, 1.0, 45, 75);
|
||||
}
|
||||
|
||||
double system_fan_zone(const std::vector<Sensor>& sensors)
|
||||
void ipmi_cleanup(ipmi_ctx_t raw_ctx)
|
||||
{
|
||||
Sensor cpu("IPMI", "CPU Temp");
|
||||
Sensor system("IPMI", "System Temp");
|
||||
bool hitCpu = false;
|
||||
bool hitSystem = false;
|
||||
std::vector<double> out;
|
||||
|
||||
for(const Sensor& sensor : sensors)
|
||||
{
|
||||
if(cpu == sensor)
|
||||
{
|
||||
hitCpu = true;
|
||||
cpu = sensor;
|
||||
}
|
||||
else if(sensor == system)
|
||||
{
|
||||
hitSystem = true;
|
||||
system = sensor;
|
||||
}
|
||||
}
|
||||
|
||||
if(hitCpu && hitSystem)
|
||||
{
|
||||
double fanSystem = fan_curve(system.reading, 0.33, 1.0, 40, 65);
|
||||
double fanCpu = fan_curve(cpu.reading, 0.33, 1.0, 40, 70);
|
||||
|
||||
return std::max(fanSystem, fanCpu);
|
||||
}
|
||||
else
|
||||
{
|
||||
std::cerr<<"Could not get temperature from System or Cpu! Ramping fans to maximum\n";
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<double> get_fan_zones(const std::vector<Sensor>& sensors)
|
||||
{
|
||||
std::vector<double> out;
|
||||
out.push_back(system_fan_zone(sensors));
|
||||
out.push_back(gpu_fan_zone(sensors));
|
||||
return out;
|
||||
ipmi_set_fan_group(raw_ctx, 0, 1);
|
||||
ipmi_set_fan_group(raw_ctx, 1, 1);
|
||||
ipmi_ctx_close(raw_ctx);
|
||||
ipmi_ctx_destroy(raw_ctx);
|
||||
}
|
||||
|
||||
int main_loop()
|
||||
@ -149,7 +104,7 @@ int main_loop()
|
||||
ipmi_ctx_t raw_ctx = ipmi_open_context();
|
||||
if(!raw_ctx)
|
||||
{
|
||||
sensors_cleanup();
|
||||
std::cerr<<"Unable to connect to impi\n";
|
||||
return 1;
|
||||
}
|
||||
|
||||
@ -157,10 +112,7 @@ int main_loop()
|
||||
if(ret < 0)
|
||||
{
|
||||
std::cerr<<"Could not init lm_sensors\n";
|
||||
ipmi_set_fan_group(raw_ctx, 0, 1);
|
||||
ipmi_set_fan_group(raw_ctx, 1, 1);
|
||||
ipmi_ctx_close(raw_ctx);
|
||||
ipmi_ctx_destroy(raw_ctx);
|
||||
ipmi_cleanup(raw_ctx);
|
||||
return 1;
|
||||
}
|
||||
|
||||
@ -169,13 +121,14 @@ int main_loop()
|
||||
ipmi_sensors.push_back(Sensor("IPMI", "CPU Temp"));
|
||||
ipmi_sensors.push_back(Sensor("IPMI", "System Temp"));
|
||||
|
||||
std::vector<Sensor> lmSensors;
|
||||
lmSensors.push_back(Sensor("amdgpu-pci-0300", "edge"));
|
||||
lmSensors.push_back(Sensor("amdgpu-pci-8300", "edge"));
|
||||
|
||||
if(lm_chips.size() < 2)
|
||||
{
|
||||
std::cerr<<"Could not get both monitored gpus!";
|
||||
ipmi_set_fan_group(raw_ctx, 0, 1);
|
||||
ipmi_set_fan_group(raw_ctx, 1, 1);
|
||||
ipmi_ctx_close(raw_ctx);
|
||||
ipmi_ctx_destroy(raw_ctx);
|
||||
std::cerr<<"Could not get enough monitored gpus!\n";
|
||||
ipmi_cleanup(raw_ctx);
|
||||
sensors_cleanup();
|
||||
return 1;
|
||||
}
|
||||
@ -183,37 +136,64 @@ int main_loop()
|
||||
ipmi_monitoring_ctx_t monitoring_ctx = init_ipmi_monitoring();
|
||||
if(!monitoring_ctx)
|
||||
{
|
||||
ipmi_set_fan_group(raw_ctx, 0, 1);
|
||||
ipmi_set_fan_group(raw_ctx, 1, 1);
|
||||
ipmi_ctx_close(raw_ctx);
|
||||
ipmi_ctx_destroy(raw_ctx);
|
||||
std::cerr<<"Unable to connect to impi for monitoring\n";
|
||||
ipmi_cleanup(raw_ctx);
|
||||
sensors_cleanup();
|
||||
return 1;
|
||||
}
|
||||
|
||||
struct fandevice fdevice;
|
||||
ret = fandevice_connect(&fdevice, 0);
|
||||
if(ret < 0)
|
||||
{
|
||||
std::cerr<<"Unable to connect to FanDevice\n";
|
||||
ipmi_cleanup(raw_ctx);
|
||||
sensors_cleanup();
|
||||
return 1;
|
||||
}
|
||||
|
||||
std::vector<Fan*> fans;
|
||||
fans.push_back(new IpmiFan(raw_ctx, 0, "IPMI CPU FAN"));
|
||||
fans.push_back(new IpmiFan(raw_ctx, 1, "IPMI SYSTEM FAN"));
|
||||
fans.push_back(new FanDeviceFan(&fdevice, FAN_A, "MI100_1 FAN"));
|
||||
fans.push_back(new FanDeviceFan(&fdevice, FAN_B, "MI100_2 FAN"));
|
||||
fans.push_back(new FanDeviceFan(&fdevice, FAN_D, "TOP SYSTEM FAN"));
|
||||
fans.push_back(new FanDeviceFan(&fdevice, FAN_C, "FRONT SYSTEM FAN"));
|
||||
|
||||
std::array<bool, 2> pushDownStates = {true, true};
|
||||
|
||||
std::vector<FanZone*> fanZones;
|
||||
fanZones.push_back(new FanZone(ipmi_sensors[0], fans[0], [](double in){return fan_curve(in, 0.1, 1, 45, 65, false);}, "CPU FAN ZONE"));
|
||||
fanZones.push_back(new FanZone({ipmi_sensors[0], ipmi_sensors[1]}, fans[1], [](double in){return fan_curve(in, 0.2, 1, 40, 55, false);}, "SYSTEM FAN ZONE"));
|
||||
fanZones.push_back(new FanZone({ipmi_sensors[0], ipmi_sensors[1]}, fans[4], [](double in){return fan_curve(in, 0.5, 1, 60, 65, true);}, "TOP FAN ZONE"));
|
||||
fanZones.push_back(new FanZone({lmSensors[0], lmSensors[1]}, fans[5], [](double in){return fan_curve(in, 0, 1, 60, 80, true);}, "FRONT FAN ZONE"));
|
||||
fanZones.push_back(new FanZone(lmSensors[1], fans[2], [&pushDownStates](double in){return mi100_fan_curve(in, 0.14, 0.7, 65, 80, 50, 70, pushDownStates[0]);}, "MI100_1 FAN ZONE"));
|
||||
fanZones.push_back(new FanZone(lmSensors[0], fans[3], [&pushDownStates](double in){return mi100_fan_curve(in, 0.14, 0.7, 65, 80, 50, 70, pushDownStates[1]);}, "MI100_2 FAN ZONE"));
|
||||
while(running)
|
||||
{
|
||||
std::vector<Sensor> sensors = gather_sensors(ipmi_sensors, monitoring_ctx, lm_chips);
|
||||
std::vector<double> fanzones = get_fan_zones(sensors);
|
||||
|
||||
if(!quiet)
|
||||
{
|
||||
for(const Sensor& sensor : sensors)
|
||||
std::cout<<"Sensor "<<sensor.chip<<':'<<sensor.name<<"\t= "<<sensor.reading<<'\n';
|
||||
for(size_t i = 0; i < fanzones.size(); ++i)
|
||||
std::cout<<"setting fan group "<<i<<" to "<<fanzones[i]*100<<"%\n";
|
||||
std::cout<<sensor.chip<<' '<<sensor.name<<": "<<sensor.reading<<'\n';
|
||||
|
||||
for(FanZone* zone : fanZones)
|
||||
zone->print(sensors);
|
||||
}
|
||||
|
||||
ipmi_set_fan_group(raw_ctx, 0, fanzones[0]);
|
||||
ipmi_set_fan_group(raw_ctx, 1, fanzones[1]);
|
||||
for(FanZone* zone : fanZones)
|
||||
zone->step(sensors);
|
||||
std::cout<<'\n';
|
||||
sleep(10);
|
||||
}
|
||||
|
||||
ipmi_set_fan_group(raw_ctx, 0, 1);
|
||||
ipmi_set_fan_group(raw_ctx, 1, 1);
|
||||
ipmi_ctx_close(raw_ctx);
|
||||
ipmi_ctx_destroy(raw_ctx);
|
||||
for(FanZone* zone : fanZones)
|
||||
delete zone;
|
||||
for(Fan* fan : fans)
|
||||
delete fan;
|
||||
|
||||
ipmi_cleanup(raw_ctx);
|
||||
ipmi_monitoring_ctx_destroy(monitoring_ctx);
|
||||
sensors_cleanup();
|
||||
|
||||
|
2
sensor.h
2
sensor.h
@ -13,5 +13,5 @@ public:
|
||||
public:
|
||||
Sensor() = default;
|
||||
Sensor(std::string chipI, std::string nameI, int idI = 0): name(nameI), chip(chipI), id(idI) {}
|
||||
bool operator==(const Sensor& other) {return other.name == name && other.chip == chip;}
|
||||
bool operator==(const Sensor& other) const {return other.name == name && other.chip == chip;}
|
||||
};
|
||||
|
Reference in New Issue
Block a user