Improvements to heap-memory and PSRAM handling (#4791)

* Improved heap and PSRAM handling

- Segment `allocateData()` uses more elaborate DRAM checking to reduce fragmentation and allow for larger setups to run on low heap
- Segment data allocation fails if minimum contiguous block size runs low to keep the UI working
- Increased `MAX_SEGMENT_DATA` to account for better segment data handling
- Memory allocation functions try to keep enough DRAM for segment data
- Added constant `PSRAM_THRESHOLD` to improve PSARM usage
- Increase MIN_HEAP_SIZE to reduce risk of breaking UI due to low memory for JSON response
- ESP32 makes use of IRAM (no 8bit access) for pixeluffers, freeing up to 50kB of RAM
- Fix to properly get available heap on all platforms: added function `getFreeHeapSize()`
- Bugfix for effects that divide by SEGLEN: don't run FX in service() if segment is not active
-Syntax fix in AR: calloc() uses (numelements, size) as arguments

* Added new functions for allocation and heap checking

- added `allocate_buffer()` function that can be used to allocate large buffers: takes parameters to set preferred ram location, including 32bit accessible RAM on ESP32. Returns null if heap runs low or switches to PSRAM
- getFreeHeapSize() and getContiguousFreeHeap() helper functions for all platforms to correctly report free useable heap
- updated some constants
- updated segment data allocation to free the data if it is large

- replaced "psramsafe" variable with it's #ifdef: BOARD_HAS_PSRAM and made accomodating changes
- added some compile-time checks to handle invalid env. definitions
- updated all allocation functions and some of the logic behind them
- added use of fast RTC-Memory where available
- increased MIN_HEAP_SIZE for all systems (improved stability in tests)
- updated memory calculation in web-UI to account for required segment buffer
- added UI alerts if buffer allocation fails
- made getUsedSegmentData() non-private (used in buffer alloc function)
- changed MAX_SEGMENT_DATA
- added more detailed memory log to DEBUG output
- added debug output to buffer alloc function
This commit is contained in:
Damian Schneider
2025-09-16 19:46:16 +02:00
committed by GitHub
parent 9d706010f5
commit 76cb2e9988
15 changed files with 364 additions and 219 deletions

89
wled00/FX_fcn.cpp Normal file → Executable file
View File

@@ -68,10 +68,10 @@ Segment::Segment(const Segment &orig) {
if (!stop) return; // nothing to do if segment is inactive/invalid
if (orig.pixels) {
// allocate pixel buffer: prefer IRAM/PSRAM
pixels = static_cast<uint32_t*>(d_malloc(sizeof(uint32_t) * orig.length()));
pixels = static_cast<uint32_t*>(allocate_buffer(orig.length() * sizeof(uint32_t), BFRALLOC_PREFER_PSRAM | BFRALLOC_NOBYTEACCESS));
if (pixels) {
memcpy(pixels, orig.pixels, sizeof(uint32_t) * orig.length());
if (orig.name) { name = static_cast<char*>(d_malloc(strlen(orig.name)+1)); if (name) strcpy(name, orig.name); }
if (orig.name) { name = static_cast<char*>(allocate_buffer(strlen(orig.name)+1, BFRALLOC_PREFER_PSRAM)); if (name) strcpy(name, orig.name); }
if (orig.data) { if (allocateData(orig._dataLen)) memcpy(data, orig.data, orig._dataLen); }
} else {
DEBUGFX_PRINTLN(F("!!! Not enough RAM for pixel buffer !!!"));
@@ -97,10 +97,10 @@ Segment& Segment::operator= (const Segment &orig) {
//DEBUG_PRINTF_P(PSTR("-- Copying segment: %p -> %p\n"), &orig, this);
if (this != &orig) {
// clean destination
if (name) { d_free(name); name = nullptr; }
if (name) { p_free(name); name = nullptr; }
if (_t) stopTransition(); // also erases _t
deallocateData();
d_free(pixels);
p_free(pixels);
// copy source
memcpy((void*)this, (void*)&orig, sizeof(Segment));
// erase pointers to allocated data
@@ -111,10 +111,10 @@ Segment& Segment::operator= (const Segment &orig) {
// copy source data
if (orig.pixels) {
// allocate pixel buffer: prefer IRAM/PSRAM
pixels = static_cast<uint32_t*>(d_malloc(sizeof(uint32_t) * orig.length()));
pixels = static_cast<uint32_t*>(allocate_buffer(orig.length() * sizeof(uint32_t), BFRALLOC_PREFER_PSRAM | BFRALLOC_NOBYTEACCESS));
if (pixels) {
memcpy(pixels, orig.pixels, sizeof(uint32_t) * orig.length());
if (orig.name) { name = static_cast<char*>(d_malloc(strlen(orig.name)+1)); if (name) strcpy(name, orig.name); }
if (orig.name) { name = static_cast<char*>(allocate_buffer(strlen(orig.name)+1, BFRALLOC_PREFER_PSRAM)); if (name) strcpy(name, orig.name); }
if (orig.data) { if (allocateData(orig._dataLen)) memcpy(data, orig.data, orig._dataLen); }
} else {
DEBUG_PRINTLN(F("!!! Not enough RAM for pixel buffer !!!"));
@@ -130,10 +130,10 @@ Segment& Segment::operator= (const Segment &orig) {
Segment& Segment::operator= (Segment &&orig) noexcept {
//DEBUG_PRINTF_P(PSTR("-- Moving segment: %p -> %p\n"), &orig, this);
if (this != &orig) {
if (name) { d_free(name); name = nullptr; } // free old name
if (name) { p_free(name); name = nullptr; } // free old name
if (_t) stopTransition(); // also erases _t
deallocateData(); // free old runtime data
d_free(pixels); // free old pixel buffer
p_free(pixels); // free old pixel buffer
// move source data
memcpy((void*)this, (void*)&orig, sizeof(Segment));
orig.name = nullptr;
@@ -147,35 +147,38 @@ Segment& Segment::operator= (Segment &&orig) noexcept {
// allocates effect data buffer on heap and initialises (erases) it
bool Segment::allocateData(size_t len) {
if (len == 0) return false; // nothing to do
if (data && _dataLen >= len) { // already allocated enough (reduce fragmentation)
if (len == 0) return false; // nothing to do
if (data && _dataLen >= len) { // already allocated enough (reduce fragmentation)
if (call == 0) {
//DEBUG_PRINTF_P(PSTR("-- Clearing data (%d): %p\n"), len, this);
memset(data, 0, len); // erase buffer if called during effect initialisation
if (_dataLen < FAIR_DATA_PER_SEG) { // segment data is small
//DEBUG_PRINTF_P(PSTR("-- Clearing data (%d): %p\n"), len, this);
memset(data, 0, len); // erase buffer if called during effect initialisation
return true; // no need to reallocate
}
}
return true;
else
return true;
}
//DEBUG_PRINTF_P(PSTR("-- Allocating data (%d): %p\n"), len, this);
// limit to MAX_SEGMENT_DATA if there is no PSRAM, otherwise prefer functionality over speed
#ifndef BOARD_HAS_PSRAM
if (Segment::getUsedSegmentData() + len - _dataLen > MAX_SEGMENT_DATA) {
// not enough memory
DEBUG_PRINTF_P(PSTR("!!! Not enough RAM: %d/%d !!!\n"), len, Segment::getUsedSegmentData());
DEBUG_PRINTF_P(PSTR("SegmentData limit reached: %d/%d\n"), len, Segment::getUsedSegmentData());
errorFlag = ERR_NORAM;
return false;
}
// prefer DRAM over SPI RAM on ESP32 since it is slow
if (data) {
data = (byte*)d_realloc_malloc(data, len); // realloc with malloc fallback
if (!data) {
data = nullptr;
Segment::addUsedSegmentData(-_dataLen); // subtract original buffer size
_dataLen = 0; // reset data length
}
}
else data = (byte*)d_malloc(len);
#endif
if (data) {
memset(data, 0, len); // erase buffer
Segment::addUsedSegmentData(len - _dataLen);
d_free(data); // free data and try to allocate again (segment buffer may be blocking contiguous heap)
Segment::addUsedSegmentData(-_dataLen); // subtract buffer size
}
data = static_cast<byte*>(allocate_buffer(len, BFRALLOC_PREFER_DRAM | BFRALLOC_CLEAR)); // prefer DRAM over PSRAM for speed
if (data) {
Segment::addUsedSegmentData(len);
_dataLen = len;
//DEBUG_PRINTF_P(PSTR("--- Allocated data (%p): %d/%d -> %p\n"), this, len, Segment::getUsedSegmentData(), data);
return true;
@@ -209,7 +212,11 @@ void Segment::deallocateData() {
void Segment::resetIfRequired() {
if (!reset || !isActive()) return;
//DEBUG_PRINTF_P(PSTR("-- Segment reset: %p\n"), this);
if (data && _dataLen > 0) memset(data, 0, _dataLen); // prevent heap fragmentation (just erase buffer instead of deallocateData())
if (data && _dataLen > 0) {
if (_dataLen > FAIR_DATA_PER_SEG) deallocateData(); // do not keep large allocations
else memset(data, 0, _dataLen); // can prevent heap fragmentation
DEBUG_PRINTF_P(PSTR("-- Segment %p reset, data cleared\n"), this);
}
if (pixels) for (size_t i = 0; i < length(); i++) pixels[i] = BLACK; // clear pixel buffer
next_time = 0; step = 0; call = 0; aux0 = 0; aux1 = 0;
reset = false;
@@ -466,7 +473,7 @@ void Segment::setGeometry(uint16_t i1, uint16_t i2, uint8_t grp, uint8_t spc, ui
if (length() != oldLength) {
// allocate render buffer (always entire segment), prefer IRAM/PSRAM. Note: impact on FPS with PSRAM buffer is low (<2% with QSPI PSRAM) on S2/S3
p_free(pixels);
pixels = static_cast<uint32_t*>(d_malloc(sizeof(uint32_t) * length()));
pixels = static_cast<uint32_t*>(allocate_buffer(length() * sizeof(uint32_t), BFRALLOC_PREFER_PSRAM | BFRALLOC_NOBYTEACCESS));
if (!pixels) {
DEBUGFX_PRINTLN(F("!!! Not enough RAM for pixel buffer !!!"));
deallocateData();
@@ -581,8 +588,8 @@ Segment &Segment::setName(const char *newName) {
if (newName) {
const int newLen = min(strlen(newName), (size_t)WLED_MAX_SEGNAME_LEN);
if (newLen) {
if (name) d_free(name); // free old name
name = static_cast<char*>(d_malloc(newLen+1));
if (name) p_free(name); // free old name
name = static_cast<char*>(allocate_buffer(newLen+1, BFRALLOC_PREFER_PSRAM));
if (mode == FX_MODE_2DSCROLLTEXT) startTransition(strip.getTransition(), true); // if the name changes in scrolling text mode, we need to copy the segment for blending
if (name) strlcpy(name, newName, newLen+1);
return *this;
@@ -1177,7 +1184,10 @@ void WS2812FX::finalizeInit() {
mem += bus.memUsage(Bus::isDigital(bus.type) && !Bus::is2Pin(bus.type) ? digitalCount++ : 0); // includes global buffer
if (mem <= MAX_LED_MEMORY) {
if (BusManager::add(bus) == -1) break;
} else DEBUG_PRINTF_P(PSTR("Out of LED memory! Bus %d (%d) #%u not created."), (int)bus.type, (int)bus.count, digitalCount);
} else {
errorFlag = ERR_NORAM_PX; // alert UI
DEBUG_PRINTF_P(PSTR("Out of LED memory! Bus %d (%d) #%u not created."), (int)bus.type, (int)bus.count, digitalCount);
}
}
busConfigs.clear();
busConfigs.shrink_to_fit();
@@ -1209,10 +1219,11 @@ void WS2812FX::finalizeInit() {
deserializeMap(); // (re)load default ledmap (will also setUpMatrix() if ledmap does not exist)
// allocate frame buffer after matrix has been set up (gaps!)
d_free(_pixels); // using realloc on large buffers can cause additional fragmentation instead of reducing it
_pixels = static_cast<uint32_t*>(d_malloc(getLengthTotal() * sizeof(uint32_t)));
p_free(_pixels); // using realloc on large buffers can cause additional fragmentation instead of reducing it
// use PSRAM if available: there is no measurable perfomance impact between PSRAM and DRAM on S2/S3 with QSPI PSRAM for this buffer
_pixels = static_cast<uint32_t*>(allocate_buffer(getLengthTotal() * sizeof(uint32_t), BFRALLOC_ENFORCE_PSRAM | BFRALLOC_NOBYTEACCESS | BFRALLOC_CLEAR));
DEBUG_PRINTF_P(PSTR("strip buffer size: %uB\n"), getLengthTotal() * sizeof(uint32_t));
DEBUG_PRINTF_P(PSTR("Heap after strip init: %uB\n"), ESP.getFreeHeap());
DEBUG_PRINTF_P(PSTR("Heap after strip init: %uB\n"), getFreeHeapSize());
}
void WS2812FX::service() {
@@ -1552,7 +1563,11 @@ void WS2812FX::blendSegment(const Segment &topSegment) const {
}
void WS2812FX::show() {
if (!_pixels) return; // no pixels allocated, nothing to show
if (!_pixels) {
DEBUGFX_PRINTLN(F("Error: no _pixels!"));
errorFlag = ERR_NORAM;
return; // no pixels allocated, nothing to show
}
unsigned long showNow = millis();
size_t diff = showNow - _lastShow;
@@ -1562,7 +1577,7 @@ void WS2812FX::show() {
// we need to keep track of each pixel's CCT when blending segments (if CCT is present)
// and then set appropriate CCT from that pixel during paint (see below).
if ((hasCCTBus() || correctWB) && !cctFromRgb)
_pixelCCT = static_cast<uint8_t*>(d_malloc(totalLen * sizeof(uint8_t))); // allocate CCT buffer if necessary
_pixelCCT = static_cast<uint8_t*>(allocate_buffer(totalLen * sizeof(uint8_t), BFRALLOC_PREFER_PSRAM)); // allocate CCT buffer if necessary, prefer PSRAM
if (_pixelCCT) memset(_pixelCCT, 127, totalLen); // set neutral (50:50) CCT
if (realtimeMode == REALTIME_MODE_INACTIVE || useMainSegmentOnly || realtimeOverride > REALTIME_OVERRIDE_NONE) {
@@ -1596,7 +1611,7 @@ void WS2812FX::show() {
}
Bus::setCCT(oldCCT); // restore old CCT for ABL adjustments
d_free(_pixelCCT);
p_free(_pixelCCT);
_pixelCCT = nullptr;
// some buses send asynchronously and this method will return before