Samhain's thermal handling removal

This commit is contained in:
kat witch 2021-07-27 01:13:30 +01:00
parent 2f45c545f2
commit 72bcd1fc12
No known key found for this signature in database
GPG key ID: 1B477797DCA5EC72
4 changed files with 0 additions and 569 deletions

View file

@ -1,39 +0,0 @@
{ config, pkgs, ... }:
{
boot.kernelParams = [ "amdgpu.ppfeaturemask=0xffffffff" ];
powerManagement = {
enable = true;
cpuFreqGovernor = "conservative";
};
systemd = {
services = {
kaede-thermals = {
wantedBy = [ "multi-user.target" ];
path = [ pkgs.bash pkgs.coreutils-full pkgs.gawk ];
serviceConfig = {
RemainAfterExit = "no";
Type = "simple";
ExecStart = "${pkgs.runtimeShell} ${./kaede-thermals.sh} start";
ExecStop = "${pkgs.runtimeShell} ${./kaede-thermals.sh} stop";
User = "root";
};
};
kaede-power = {
wantedBy = [ "multi-user.target" ];
path = [ pkgs.bash pkgs.linuxPackages.cpupower ];
serviceConfig = {
RemainAfterExit = "yes";
Type = "oneshot";
ExecStart = "${pkgs.runtimeShell} ${./kaede-power.sh} start";
ExecStop = "${pkgs.runtimeShell} ${./kaede-power.sh} stop";
User = "root";
};
};
};
};
services.thermald = {
enable = true;
configFile = "${./kaede-thermald.xml}";
};
}

View file

@ -1,65 +0,0 @@
#!/usr/bin/env bash
################################################################################
# Written by Kaede Fox <kaede@boxedfox.org>
##########
# Default governor, currently required to be "conservative".
METHOD=conservative
# Configuration parameters.
SCALE_UP=40
SCALE_DOWN=30
SCALE_STEP=1
SAMPLING_RATE=10000
SAMPLING_FACTOR=10
NO_NICE=1
##########
script_name=`basename $0`
case "$1" in
"start")
modprobe cpufreq_conservative
modprobe cpufreq_ondemand
# This also needs to be loaded, in case the service is stopped later
# we can fall back to it.
modprobe cpufreq_performance
# The governor must be selected for its relevant configuration entries
# to appear in sysfs.
cpupower frequency-set -g $METHOD
sleep .5s
CONFIG=/sys/devices/system/cpu/cpufreq/$METHOD
chmod $CONFIG 644
echo $SCALE_UP > $CONFIG/up_threshold
echo $SCALE_DOWN > $CONFIG/down_threshold
echo $SCALE_STEP > $CONFIG/freq_step
echo $SAMPLING_RATE > $CONFIG/sampling_rate
echo $SAMPLING_FACTOR > $CONFIG/sampling_down_factor
echo $NO_NICE > $CONFIG/ignore_nice_load
sleep .5s
# Force reload all configuration.
cpupower frequency-set -g $METHOD
echo "enabled cpupower"
echo "loaded ${script_name}"
;;
"stop")
cpupower frequency-set -g performance &&
echo "disabled cpupower" &
wait
sleep .5s
modprobe -r cpufreq_conservative
modprobe -r cpufreq_ondemand
echo "unloaded ${script_name}"
;;
*)
echo "Usage: ${script_name} (start | stop)"
exit 1
;;
esac

View file

@ -1,308 +0,0 @@
<?xml version="1.0"?>
<!--
use "man thermal-conf.xml" for details
-->
<!-- BEGIN -->
<ThermalConfiguration>
<Platform>
<Name>AMD Ryzen 5 3600 6-Core Processor</Name>
<ProductName>*</ProductName>
<UUID>*</UUID>
<Preference>quiet</Preference>
<ThermalSensors>
<ThermalSensor>
<!--CPU sensor on die -->
<Type>CPU_TEMP_CORE</Type>
<Path>/var/cache/kaede-thermals/cpu_core_temp</Path>
<AsyncCapable>1</AsyncCapable>
</ThermalSensor>
<ThermalSensor>
<!-- GPU sensor -->
<Type>GPU_TEMP</Type>
<Path>/var/cache/kaede-thermals/gpu_temp</Path>
<AsyncCapable>1</AsyncCapable>
</ThermalSensor>
</ThermalSensors>
<ThermalZones>
<ThermalZone>
<Type>CPU_CTRL</Type>
<TripPoints>
<!-- CPU PASSIVE CONTROL -->
<!-- Ideal temp: 70-75*C, Max temp: 95*C -->
<TripPoint>
<SensorType>CPU_TEMP_CORE</SensorType>
<Temperature>65000</Temperature>
<Type>passive</Type>
<ControlType>parallel</ControlType>
<CoolingDevice>
<index>1</index>
<type>CPU_FREQ0</type>
<influence>100</influence>
<SamplingPeriod>1</SamplingPeriod>
</CoolingDevice>
<CoolingDevice>
<index>2</index>
<type>CPU_FREQ1</type>
<influence>100</influence>
<SamplingPeriod>1</SamplingPeriod>
</CoolingDevice>
<CoolingDevice>
<index>3</index>
<type>CPU_FREQ2</type>
<influence>100</influence>
<SamplingPeriod>1</SamplingPeriod>
</CoolingDevice>
<CoolingDevice>
<index>4</index>
<type>CPU_FREQ3</type>
<influence>100</influence>
<SamplingPeriod>1</SamplingPeriod>
</CoolingDevice>
<CoolingDevice>
<index>5</index>
<type>CPU_FREQ4</type>
<influence>100</influence>
<SamplingPeriod>1</SamplingPeriod>
</CoolingDevice>
<CoolingDevice>
<index>6</index>
<type>CPU_FREQ5</type>
<influence>100</influence>
<SamplingPeriod>1</SamplingPeriod>
</CoolingDevice>
<CoolingDevice>
<index>7</index>
<type>CPU_FREQ6</type>
<influence>100</influence>
<SamplingPeriod>1</SamplingPeriod>
</CoolingDevice>
<CoolingDevice>
<index>8</index>
<type>CPU_FREQ7</type>
<influence>100</influence>
<SamplingPeriod>1</SamplingPeriod>
</CoolingDevice>
<CoolingDevice>
<index>9</index>
<type>CPU_FREQ8</type>
<influence>100</influence>
<SamplingPeriod>1</SamplingPeriod>
</CoolingDevice>
<CoolingDevice>
<index>10</index>
<type>CPU_FREQ9</type>
<influence>100</influence>
<SamplingPeriod>1</SamplingPeriod>
</CoolingDevice>
<CoolingDevice>
<index>11</index>
<type>CPU_FREQ10</type>
<influence>100</influence>
<SamplingPeriod>1</SamplingPeriod>
</CoolingDevice>
<CoolingDevice>
<index>12</index>
<type>CPU_FREQ11</type>
<influence>100</influence>
<SamplingPeriod>1</SamplingPeriod>
</CoolingDevice>
</TripPoint>
<!-- EMERGENCY THROTTLING. -->
<!-- HOT means suspend the system. -->
<!-- CRITICAL means turn off the system. -->
<TripPoint>
<SensorType>CPU_TEMP_CORE</SensorType>
<Temperature>85000</Temperature>
<Type>hot</Type>
</TripPoint>
<TripPoint>
<SensorType>CPU_TEMP_CORE</SensorType>
<Temperature>90000</Temperature>
<Type>critical</Type>
</TripPoint>
</TripPoints>
</ThermalZone>
<ThermalZone>
<Type>GPU_CTRL</Type>
<TripPoints>
<!-- GPU PASSIVE CONTROL -->
<!-- Ideal temp: 70-75*C, Max temp: 90*C -->
<TripPoint>
<SensorType>GPU_TEMP</SensorType>
<Temperature>70000</Temperature>
<Type>passive</Type>
<ControlType>parallel</ControlType>
<CoolingDevice>
<index>1</index>
<type>GPU_FREQ_CORE</type>
<influence>50</influence>
<SamplingPeriod>1</SamplingPeriod>
</CoolingDevice>
<CoolingDevice>
<index>2</index>
<type>GPU_FREQ_MEM</type>
<influence>50</influence>
<SamplingPeriod>1</SamplingPeriod>
</CoolingDevice>
</TripPoint>
</TripPoints>
</ThermalZone>
</ThermalZones>
<CoolingDevices>
<!-- CPU frequency scaling. -->
<!-- This allows finer control of the CPU scaling in comparison to -->
<!-- thermald's built in 'cpufreq' driver. -->
<CoolingDevice>
<Type>CPU_FREQ0</Type>
<Path>/sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq</Path>
<MinState>3600000</MinState>
<MaxState>2200000</MaxState>
<IncDecStep>-100000</IncDecStep>
<ReadBack>1</ReadBack>
<AutoOffMode>1</AutoOffMode>
<DebouncePeriod>10</DebouncePeriod>
</CoolingDevice>
<CoolingDevice>
<Type>CPU_FREQ1</Type>
<Path>/sys/devices/system/cpu/cpu1/cpufreq/scaling_max_freq</Path>
<MinState>3600000</MinState>
<MaxState>2200000</MaxState>
<IncDecStep>-100000</IncDecStep>
<ReadBack>1</ReadBack>
<AutoOffMode>1</AutoOffMode>
<DebouncePeriod>10</DebouncePeriod>
</CoolingDevice>
<CoolingDevice>
<Type>CPU_FREQ2</Type>
<Path>/sys/devices/system/cpu/cpu2/cpufreq/scaling_max_freq</Path>
<MinState>3600000</MinState>
<MaxState>2200000</MaxState>
<IncDecStep>-100000</IncDecStep>
<ReadBack>1</ReadBack>
<AutoOffMode>1</AutoOffMode>
<DebouncePeriod>10</DebouncePeriod>
</CoolingDevice>
<CoolingDevice>
<Type>CPU_FREQ3</Type>
<Path>/sys/devices/system/cpu/cpu3/cpufreq/scaling_max_freq</Path>
<MinState>3600000</MinState>
<MaxState>2200000</MaxState>
<IncDecStep>-100000</IncDecStep>
<ReadBack>1</ReadBack>
<AutoOffMode>1</AutoOffMode>
<DebouncePeriod>10</DebouncePeriod>
</CoolingDevice>
<CoolingDevice>
<Type>CPU_FREQ4</Type>
<Path>/sys/devices/system/cpu/cpu4/cpufreq/scaling_max_freq</Path>
<MinState>3600000</MinState>
<MaxState>2200000</MaxState>
<IncDecStep>-100000</IncDecStep>
<ReadBack>1</ReadBack>
<AutoOffMode>1</AutoOffMode>
<DebouncePeriod>10</DebouncePeriod>
</CoolingDevice>
<CoolingDevice>
<Type>CPU_FREQ5</Type>
<Path>/sys/devices/system/cpu/cpu5/cpufreq/scaling_max_freq</Path>
<MinState>3600000</MinState>
<MaxState>2200000</MaxState>
<IncDecStep>-100000</IncDecStep>
<ReadBack>1</ReadBack>
<AutoOffMode>1</AutoOffMode>
<DebouncePeriod>10</DebouncePeriod>
</CoolingDevice>
<CoolingDevice>
<Type>CPU_FREQ6</Type>
<Path>/sys/devices/system/cpu/cpu6/cpufreq/scaling_max_freq</Path>
<MinState>3600000</MinState>
<MaxState>2200000</MaxState>
<IncDecStep>-100000</IncDecStep>
<ReadBack>1</ReadBack>
<AutoOffMode>1</AutoOffMode>
<DebouncePeriod>10</DebouncePeriod>
</CoolingDevice>
<CoolingDevice>
<Type>CPU_FREQ7</Type>
<Path>/sys/devices/system/cpu/cpu7/cpufreq/scaling_max_freq</Path>
<MinState>3600000</MinState>
<MaxState>2200000</MaxState>
<IncDecStep>-100000</IncDecStep>
<ReadBack>1</ReadBack>
<AutoOffMode>1</AutoOffMode>
<DebouncePeriod>10</DebouncePeriod>
</CoolingDevice>
<CoolingDevice>
<Type>CPU_FREQ8</Type>
<Path>/sys/devices/system/cpu/cpu8/cpufreq/scaling_max_freq</Path>
<MinState>3600000</MinState>
<MaxState>2200000</MaxState>
<IncDecStep>-100000</IncDecStep>
<ReadBack>1</ReadBack>
<AutoOffMode>1</AutoOffMode>
<DebouncePeriod>10</DebouncePeriod>
</CoolingDevice>
<CoolingDevice>
<Type>CPU_FREQ9</Type>
<Path>/sys/devices/system/cpu/cpu9/cpufreq/scaling_max_freq</Path>
<MinState>3600000</MinState>
<MaxState>2200000</MaxState>
<IncDecStep>-100000</IncDecStep>
<ReadBack>1</ReadBack>
<AutoOffMode>1</AutoOffMode>
<DebouncePeriod>10</DebouncePeriod>
</CoolingDevice>
<CoolingDevice>
<Type>CPU_FREQ10</Type>
<Path>/sys/devices/system/cpu/cpu10/cpufreq/scaling_max_freq</Path>
<MinState>3600000</MinState>
<MaxState>2200000</MaxState>
<IncDecStep>-100000</IncDecStep>
<ReadBack>1</ReadBack>
<AutoOffMode>1</AutoOffMode>
<DebouncePeriod>10</DebouncePeriod>
</CoolingDevice>
<CoolingDevice>
<Type>CPU_FREQ11</Type>
<Path>/sys/devices/system/cpu/cpu7/cpufreq/scaling_max_freq</Path>
<MinState>3600000</MinState>
<MaxState>2200000</MaxState>
<IncDecStep>-100000</IncDecStep>
<ReadBack>1</ReadBack>
<AutoOffMode>1</AutoOffMode>
<DebouncePeriod>10</DebouncePeriod>
</CoolingDevice>
<!-- GPU frequency scaling. -->
<!-- This is read by a custom service and translated into the format
expected by the video driver. -->
<CoolingDevice>
<Type>GPU_FREQ_CORE</Type>
<Path>/var/cache/kaede-thermals/gpu_power_core</Path>
<MinState>255</MinState>
<MaxState>0</MaxState>
<IncDecStep>-5</IncDecStep>
<!-- Make sure ReadBack is disabled as our servive needs to
reset the value periodically. -->
<ReadBack>0</ReadBack>
<AutoOffMode>1</AutoOffMode>
<DebouncePeriod>10</DebouncePeriod>
<!-- As negative values are possible and have special meaning,
leave room for the sign. -->
<WritePrefix> </WritePrefix>
</CoolingDevice>
<CoolingDevice>
<Type>GPU_FREQ_MEM</Type>
<Path>/var/cache/kaede-thermals/gpu_power_mem</Path>
<MinState>255</MinState>
<MaxState>0</MaxState>
<IncDecStep>-5</IncDecStep>
<ReadBack>0</ReadBack>
<AutoOffMode>1</AutoOffMode>
<DebouncePeriod>10</DebouncePeriod>
<WritePrefix> </WritePrefix>
</CoolingDevice>
</CoolingDevices>
</Platform>
</ThermalConfiguration>

View file

@ -1,157 +0,0 @@
#!/usr/bin/env bash
################################################################################
# Written by Kaede Fox <kaede@boxedfox.org>
##########
# thermald can't handle hwmon* moving around, so we use symlinks to resolve
# dynamic paths to static paths.
THERMAL_PATH="/var/cache/kaede-thermals"
# CPU sensor paths.
CPU_SENSOR_CORE="/sys/devices/pci0000:00/0000:00:18.3/hwmon/hwmon*/temp1_input"
#CPU_SENSOR_SOCKET="/sys/devices/platform/nct6775.656/hwmon/hwmon*/temp2_input"
# GPU control and sensor paths (using amdgpu).
GPU_CONTROL="/sys/class/drm/card0/device/"
GPU_SENSOR="/sys/class/drm/card0/device/hwmon/hwmon*/temp1_input"
# WARNING: MAKE ABSOLUTELY SURE THESE ARE THE PERFORMANCE STATES AND NOT THE
# ACTUAL CORE/MEM CLOCKS OR WE COULD DAMAGE THE HARDWARE.
GPUCTRL_CORECLK="${GPU_CONTROL}/pp_dpm_sclk"
GPUCTRL_MEMCLK="${GPU_CONTROL}/pp_dpm_mclk"
# How often to resolve paths (in seconds). Changes are extremely rare, but they
# can occur even while the system is booted.
INTERVAL='300'
# How often to update GPU power settings from thermald.
TIMESLICE='1'
# Formatter used to round floats to integers via printf.
float_to_int='%'\''.0f'
##########
script_name=`basename $0`
# Our simple path resolver subroutine, which uses ls to replace hwmon* with
# whichever hwmon is currently active.
sub_resolve_paths() {
ln -s -f "`ls $CPU_SENSOR_CORE`" "./cpu_core_temp"
# ln -s -f "`ls $CPU_SENSOR_SOCKET`" "./cpu_socket_temp"
ln -s -f "`ls $GPU_SENSOR`" "./gpu_temp"
}
case "$1" in
"start")
echo "loaded ${script_name}"
mkdir -p "$THERMAL_PATH" 2>/dev/null
cd "$THERMAL_PATH"
# Switch the video driver into manual control mode for
# performance levels.
echo "manual" >${GPU_CONTROL}/power_dpm_force_performance_level
echo "enabled gpupower"
# Immediately resolve paths at startup.
sub_resolve_paths
# Followed by starting thermald in case the paths didn't
# exist, which would've made it fail to start.
sleep .5s
systemctl start thermald
# thermald will create these files with the wrong permissions.
rm "./gpu_power_core"; printf "%d\n" '-1'>./gpu_power_core
rm "./gpu_power_mem"; printf "%d\n" '-1'>./gpu_power_mem
# Enter service mode.
slice_counter='0'
while true; do
# Execute a timeslice. Paths are updated every time the
# counter wraps around.
slice_counter=$((slice_counter + TIMESLICE))
if [ "$slice_counter" -ge "$INTERVAL" ]; then
#printf "DEBUG: *** Updating paths after %d seconds ***\n" "$slice_counter"
slice_counter=$((slice_counter - INTERVAL))
# Resolve paths.
sub_resolve_paths
fi
# Read GPU thermal settings from thermald and apply
# them to the driver via sysfs.
#
# The core and memory clock lists are highly specific
# to the hardware, and need updating if the video card
# is ever changed.
#
gpu_power_core="`cat ./gpu_power_core`"
gpu_power_mem="`cat ./gpu_power_mem`"
if [ -z "$gpu_power_core" ]; then gpu_power_core='-1'; fi
if [ -z "$gpu_power_mem" ]; then gpu_power_mem='-1'; fi
# Check if we need to update the core clock.
if [ "$gpu_power_core" -ge '0' ]; then
#printf "DEBUG: *** Updating GPU core clock ***\n"
printf "%d\n" '-1'>./gpu_power_core
printf "$gpu_power_core\n">./gpu_power_core_cached
core_count_f="$(echo "$gpu_power_core" | awk '{ x=(($1/255.0)*(7-1))+1; printf("%f",x) }')"
core_count="$(printf "$float_to_int" "$core_count_f")"
if [ "$core_count" -lt '1' ]; then core_count='1'; fi
if [ "$core_count" -gt '7' ]; then core_count='7'; fi
gpu_list_core="1"
for ((i=2; i<=core_count; i++)); do
gpu_list_core="$gpu_list_core $i"; done
echo "$gpu_list_core" >${GPUCTRL_CORECLK}
#echo "dbg: gpu_power_core: $gpu_power_core"
#echo "dbg: core_count_f: $core_count_f, core_count: $core_count"
#echo "dbg: gpu_list_core: $gpu_list_core"
fi
# Check if we need to update the memory clock.
if [ "$gpu_power_mem" -ge '0' ]; then
#printf "DEBUG: *** Updating GPU memory clock ***\n"
printf "%d\n" '-1'>./gpu_power_mem
printf "$gpu_power_mem\n">./gpu_power_mem_cached
mem_count_f="$(echo "$gpu_power_mem" | awk '{ x=(($1/255.0)*(3-1))+1; printf("%f",x) }')"
mem_count="$(printf "$float_to_int" "$mem_count_f")"
if [ "$mem_count" -lt '1' ]; then mem_count='1'; fi
if [ "$mem_count" -gt '3' ]; then mem_count='3'; fi
gpu_list_mem="1"
for ((i=2; i<=mem_count; i++)); do
gpu_list_mem="$gpu_list_mem $i"; done
echo "$gpu_list_mem" >${GPUCTRL_MEMCLK}
#echo "dbg: gpu_power_mem: $gpu_power_mem"
#echo "dbg: mem_count_f: $mem_count_f, mem_count: $mem_count"
#echo "dbg: gpu_list_mem: $gpu_list_mem"
fi
# Sleep until the next cycle.
sleep "${TIMESLICE}s"
done
;;
"stop")
# Reset all performance level tunings.
echo "1 2 3 4 5 6 7" >${GPUCTRL_CORECLK}
echo "1 2 3" >${GPUCTRL_MEMCLK}
# Switch the video driver into automatic control mode for
# performance levels.
echo "auto" >${GPU_CONTROL}/power_dpm_force_performance_level
echo "disabled gpupower"
sleep .5s
echo "unloaded ${script_name}"
;;
*)
echo "Usage: ${script_name} (start | stop)"
exit 1
;;
esac