ipmifan/main.cpp

229 lines
6.2 KiB
C++

#include <cstddef>
#include <cstdint>
#include <freeipmi/api/ipmi-api.h>
#include <vector>
#include <string>
#include <iostream>
#include <algorithm>
#include <unistd.h>
#include <sensors/sensors.h>
#include <sensors/error.h>
#include <signal.h>
#include <limits>
#include <array>
#include <freeipmi/freeipmi.h>
#include <fandevice.h>
#include "ipmi.h"
#include "lm.h"
#include "fan.h"
#include "ipmifan.h"
#include "fandevicefan.h"
#include "fanzone.h"
sig_atomic_t running = true;
void sig_handler(int sig)
{
(void)sig;
running = false;
}
bool quiet;
std::vector<Sensor> gather_sensors(std::vector<Sensor>& ipmi_sensors, ipmi_monitoring_ctx_t ctx, std::vector<const sensors_chip_name*>& lm_chips)
{
std::vector<Sensor> out;
struct ipmi_monitoring_ipmi_config ipmi_config = {};
ipmi_config.driver_type = IPMI_MONITORING_DRIVER_TYPE_OPENIPMI;
bool grabids = false;
for(Sensor& sensor : ipmi_sensors)
{
if(sensor.id <= 0)
{
grabids = true;
break;
}
}
if(grabids)
{
if(!ipmi_fill_sensor_ids(ipmi_sensors, ctx, &ipmi_config))
{
std::cout<<"could not get ids for all the required sensors\n";
return out;
}
}
else
{
ipmi_update_sensors(ipmi_sensors, ctx, &ipmi_config);
}
out.insert(out.end(), ipmi_sensors.begin(), ipmi_sensors.end());
std::vector<Sensor> lm_sensors = lm_get_temperatures(lm_chips);
out.insert(out.end(), lm_sensors.begin(), lm_sensors.end());
return out;
}
static double fan_curve(double temperature, double min_fan, double max_fan, double low_temperature, double high_temperature, bool stop)
{
if(stop && temperature <low_temperature)
return 0;
double slope = (max_fan-min_fan)/(high_temperature-low_temperature);
return std::max(std::min(max_fan, min_fan+slope*(temperature-low_temperature)), min_fan);
}
static double mi100_fan_curve(double temperature, double min_fan, double max_fan, double low_temperature, double high_temperature,
double push_down_low_temperature, double push_down_high_temperature, bool &push_down_state)
{
double speed = fan_curve(temperature, min_fan, max_fan, low_temperature, high_temperature, false);
if(push_down_state)
speed = std::max(speed, 0.6);
if(temperature < push_down_low_temperature)
push_down_state = false;
else if(temperature > push_down_high_temperature)
push_down_state = true;
if(temperature > high_temperature)
return std::min((1-max_fan)*((temperature-high_temperature)/5.0)+max_fan, 1.0);
return speed;
}
void ipmi_cleanup(ipmi_ctx_t raw_ctx)
{
ipmi_set_fan_group(raw_ctx, 0, 1);
ipmi_set_fan_group(raw_ctx, 1, 1);
ipmi_ctx_close(raw_ctx);
ipmi_ctx_destroy(raw_ctx);
}
int main_loop()
{
ipmi_ctx_t raw_ctx = ipmi_open_context();
if(!raw_ctx)
{
std::cerr<<"Unable to connect to impi\n";
return 1;
}
int ret = sensors_init(nullptr);
if(ret < 0)
{
std::cerr<<"Could not init lm_sensors\n";
ipmi_cleanup(raw_ctx);
return 1;
}
std::vector<const sensors_chip_name*> lm_chips = lm_get_chips("amdgpu-*");
std::vector<Sensor> ipmi_sensors;
ipmi_sensors.push_back(Sensor("IPMI", "CPU Temp"));
ipmi_sensors.push_back(Sensor("IPMI", "System Temp"));
std::vector<Sensor> lmSensors;
lmSensors.push_back(Sensor("amdgpu-pci-0300", "edge"));
lmSensors.push_back(Sensor("amdgpu-pci-8300", "edge"));
if(lm_chips.size() < 2)
{
std::cerr<<"Could not get enough monitored gpus!\n";
ipmi_cleanup(raw_ctx);
sensors_cleanup();
return 1;
}
ipmi_monitoring_ctx_t monitoring_ctx = init_ipmi_monitoring();
if(!monitoring_ctx)
{
std::cerr<<"Unable to connect to impi for monitoring\n";
ipmi_cleanup(raw_ctx);
sensors_cleanup();
return 1;
}
struct fandevice fdevice;
ret = fandevice_connect(&fdevice, 0);
if(ret < 0)
{
std::cerr<<"Unable to connect to FanDevice\n";
ipmi_cleanup(raw_ctx);
sensors_cleanup();
return 1;
}
std::vector<Fan*> fans;
fans.push_back(new IpmiFan(raw_ctx, 0, "IPMI CPU FAN"));
fans.push_back(new IpmiFan(raw_ctx, 1, "IPMI SYSTEM FAN"));
fans.push_back(new FanDeviceFan(&fdevice, FAN_A, "MI100_1 FAN"));
fans.push_back(new FanDeviceFan(&fdevice, FAN_B, "MI100_2 FAN"));
fans.push_back(new FanDeviceFan(&fdevice, FAN_D, "TOP SYSTEM FAN"));
fans.push_back(new FanDeviceFan(&fdevice, FAN_C, "FRONT SYSTEM FAN"));
std::array<bool, 2> pushDownStates = {true, true};
std::vector<FanZone*> fanZones;
fanZones.push_back(new FanZone(ipmi_sensors[0], fans[0], [](double in){return fan_curve(in, 0.1, 1, 45, 65, false);}, "CPU FAN ZONE"));
fanZones.push_back(new FanZone({ipmi_sensors[0], ipmi_sensors[1]}, fans[1], [](double in){return fan_curve(in, 0.2, 1, 40, 55, false);}, "SYSTEM FAN ZONE"));
fanZones.push_back(new FanZone({ipmi_sensors[0], ipmi_sensors[1]}, fans[4], [](double in){return fan_curve(in, 0.5, 1, 60, 65, true);}, "TOP FAN ZONE"));
fanZones.push_back(new FanZone({lmSensors[0], lmSensors[1]}, fans[5], [](double in){return fan_curve(in, 0, 1, 60, 80, true);}, "FRONT FAN ZONE"));
fanZones.push_back(new FanZone(lmSensors[1], fans[2], [&pushDownStates](double in){return mi100_fan_curve(in, 0.14, 0.7, 65, 80, 50, 70, pushDownStates[0]);}, "MI100_1 FAN ZONE"));
fanZones.push_back(new FanZone(lmSensors[0], fans[3], [&pushDownStates](double in){return mi100_fan_curve(in, 0.14, 0.7, 65, 80, 50, 70, pushDownStates[1]);}, "MI100_2 FAN ZONE"));
while(running)
{
std::vector<Sensor> sensors = gather_sensors(ipmi_sensors, monitoring_ctx, lm_chips);
if(!quiet)
{
for(const Sensor& sensor : sensors)
std::cout<<sensor.chip<<' '<<sensor.name<<": "<<sensor.reading<<'\n';
for(FanZone* zone : fanZones)
zone->print(sensors);
}
for(FanZone* zone : fanZones)
zone->step(sensors);
std::cout<<'\n';
sleep(10);
}
for(FanZone* zone : fanZones)
delete zone;
for(Fan* fan : fans)
delete fan;
ipmi_cleanup(raw_ctx);
ipmi_monitoring_ctx_destroy(monitoring_ctx);
sensors_cleanup();
return 0;
}
int main (int argc, char **argv)
{
signal(SIGABRT, sig_handler);
signal(SIGTERM, sig_handler);
signal(SIGHUP, sig_handler);
signal(SIGINT, sig_handler);
if(argc > 1)
quiet = true;
int ret = 0;
for(size_t i = 0; i < 3; ++i)
{
ret = main_loop();
if(!running)
break;
std::cerr<<"Mainloop unable to start, retrying in 10 sec\n";
sleep(10);
}
if(ret != 0)
std::cerr<<"Error not clearing, giveing up\n";
return ret;
}