lots of tweaks, updated 1D rendering and collisions

- bugfix in mass based 2D collisions
- added improved and faster large size rendering to 1D system
- added per-particle size rendering to 1D system
- improved and simplified collision handling in 1D system
- removed local blurring functions in PS as they are not needed anymore for particle rendering
- adapted FX to work with the new rendering
- fixed outdated AR handling in PS FX
- fixed infinite loop if not enough memory
- updated PS Hourglass drop interval to simpler math: speed / 10 = time in seconds and improved particle handling
- reduced speed in PS Pinball to fix collision slip-through
- PS Box now auto-adjusts number of particles based on matrix size and particle size
- added safety check to 2D particle rendering to not crash if something goes wrong with out-of bounds particle rendering
- improved binning for particle collisions: dont use binning for small number of particles (faster)
- Some code cleanup
This commit is contained in:
Damian Schneider
2025-12-13 19:05:21 +01:00
parent a421cfeabe
commit 19bc3c513a
3 changed files with 422 additions and 444 deletions

View File

@@ -88,7 +88,7 @@ void ParticleSystem2D::updateFire(const uint8_t intensity,const bool renderonly)
// set percentage of used particles as uint8_t i.e 127 means 50% for example
void ParticleSystem2D::setUsedParticles(uint8_t percentage) {
usedParticles = (numParticles * ((int)percentage+1)) >> 8; // number of particles to use (percentage is 0-255, 255 = 100%)
usedParticles = max((uint32_t)1, (numParticles * ((int)percentage+1)) >> 8); // number of particles to use (percentage is 0-255, 255 = 100%)
PSPRINT(" SetUsedpaticles: allocated particles: ");
PSPRINT(numParticles);
PSPRINT(" ,used particles: ");
@@ -214,7 +214,7 @@ void ParticleSystem2D::flameEmit(const PSsource &emitter) {
// angle = 0 means in positive x-direction (i.e. to the right)
int32_t ParticleSystem2D::angleEmit(PSsource &emitter, const uint16_t angle, const int32_t speed) {
emitter.vx = ((int32_t)cos16_t(angle) * speed) / (int32_t)32600; // cos16_t() and sin16_t() return signed 16bit, division should be 32767 but 32600 gives slightly better rounding
emitter.vy = ((int32_t)sin16_t(angle) * speed) / (int32_t)32600; // note: cannot use bit shifts as bit shifting is asymmetrical for positive and negative numbers and this needs to be accurate!
emitter.vy = ((int32_t)sin16_t(angle) * speed) / (int32_t)32600; // note: cannot use bit shifts as bit shifting is asymmetrical (1>>1=0 / -1>>1=-1) and this needs to be accurate!
return sprayEmit(emitter);
}
@@ -236,8 +236,11 @@ void ParticleSystem2D::particleMoveUpdate(PSparticle &part, PSparticleFlags &par
partFlags.outofbounds = false; // reset out of bounds (in case particle was created outside the matrix and is now moving into view) note: moving this to checks below adds code and is not faster
if (perParticleSize && advancedproperties != nullptr) { // using individual particle size
renderradius = PS_P_HALFRADIUS - 1 + advancedproperties->size;
particleHardRadius = PS_P_MINHARDRADIUS + ((advancedproperties->size * 52) >> 6); // use 1 pixel + 80% of size for hard radius (slight overlap with boarders so they do not "float")
renderradius = PS_P_HALFRADIUS - 1 + advancedproperties->size; // note: single pixel particles should be zero but OOB checks in rendering function handle this
if (advancedproperties->size > 0)
particleHardRadius = PS_P_MINHARDRADIUS + ((advancedproperties->size * 52) >> 6); // use 1 pixel + 80% of size for hard radius (slight overlap with boarders so they do not "float")
else // single pixel particles use half the collision distance for walls
particleHardRadius = PS_P_MINHARDRADIUS >> 1;
}
// note: if wall collisions are enabled, bounce them before they reach the edge, it looks much nicer if the particle does not go half out of view
if (options->bounceY) {
@@ -446,7 +449,7 @@ void ParticleSystem2D::applyForce(const int8_t xforce, const int8_t yforce) {
// force is in 3.4 fixed point notation so force=16 means apply v+1 each frame (useful force range is +/- 127)
void ParticleSystem2D::applyAngleForce(PSparticle &part, const int8_t force, const uint16_t angle, uint8_t &counter) {
int8_t xforce = ((int32_t)force * cos16_t(angle)) / 32767; // force is +/- 127
int8_t yforce = ((int32_t)force * sin16_t(angle)) / 32767; // note: cannot use bit shifts as bit shifting is asymmetrical for positive and negative numbers and this needs to be accurate!
int8_t yforce = ((int32_t)force * sin16_t(angle)) / 32767; // note: cannot use bit shifts as bit shifting is asymmetrical (1>>1=0 / -1>>1=-1) and this needs to be accurate!
applyForce(part, xforce, yforce, counter);
}
@@ -460,7 +463,7 @@ void ParticleSystem2D::applyAngleForce(const uint32_t particleindex, const int8_
// angle is from 0-65535 (=0-360deg) angle = 0 means in positive x-direction (i.e. to the right)
void ParticleSystem2D::applyAngleForce(const int8_t force, const uint16_t angle) {
int8_t xforce = ((int32_t)force * cos16_t(angle)) / 32767; // force is +/- 127
int8_t yforce = ((int32_t)force * sin16_t(angle)) / 32767; // note: cannot use bit shifts as bit shifting is asymmetrical for positive and negative numbers and this needs to be accurate!
int8_t yforce = ((int32_t)force * sin16_t(angle)) / 32767; // note: cannot use bit shifts as bit shifting is asymmetrical (1>>1=0 / -1>>1=-1) and this needs to be accurate!
applyForce(xforce, yforce);
}
@@ -543,7 +546,7 @@ void ParticleSystem2D::pointAttractor(const uint32_t particleindex, PSparticle &
int32_t force = ((int32_t)strength << 16) / distanceSquared;
int8_t xforce = (force * dx) / 1024; // scale to a lower value, found by experimenting
int8_t yforce = (force * dy) / 1024; // note: cannot use bit shifts as bit shifting is asymmetrical for positive and negative numbers and this needs to be accurate!
int8_t yforce = (force * dy) / 1024; // note: cannot use bit shifts as bit shifting is asymmetrical (1>>1=0 / -1>>1=-1) and this needs to be accurate!
applyForce(particleindex, xforce, yforce);
}
@@ -602,109 +605,16 @@ void ParticleSystem2D::render() {
// apply 2D blur to rendered frame
if (smearBlur) {
blur2D(framebuffer, maxXpixel + 1, maxYpixel + 1, smearBlur, smearBlur);
SEGMENT.blur2D(smearBlur, smearBlur, true);
}
}
// render particle as ellipse/circle with linear brightness falloff and sub-pixel precision
void WLED_O2_ATTR ParticleSystem2D::renderParticleEllipse(const uint32_t particleindex, const uint8_t brightness, const CRGBW& color, const bool wrapX, const bool wrapY) {
uint32_t size = particlesize;
if (perParticleSize && advPartProps != nullptr) // individual particle size
size = advPartProps[particleindex].size;
// particle position with sub-pixel precision
int32_t x_subcenter = particles[particleindex].x;
int32_t y_subcenter = particles[particleindex].y;
// example: for x = 128, a paticle is exacly between pixel 1 and 2, with a radius of 2 pixels, we draw pixels 0-3
// integer center jumps when x = 127 -> pixel 1 goes to x = 128 -> pixel 2
// when calculating the dx, we need to take this into account: at x = 128 the x offset is 1, the pixel center is at pixel 2:
// for pixel 1, dx = 1 * PS_P_RADIUS - 128 = -64 but the center of the pixel is actually only -32 from the particle center so need to add half a radius:
// dx = pixel_x * PS_P_RADIUS - x_subcenter + PS_P_HALFRADIUS
// sub-pixel offset (0-63)
int32_t x_offset = x_subcenter & (PS_P_RADIUS - 1); // same as modulo PS_P_RADIUS but faster
int32_t y_offset = y_subcenter & (PS_P_RADIUS - 1);
// integer pixel position, this is rounded down
int32_t x_center = (x_subcenter) >> PS_P_RADIUS_SHIFT;
int32_t y_center = (y_subcenter) >> PS_P_RADIUS_SHIFT;
// ellipse radii in pixels
uint32_t xsize = size;
uint32_t ysize = size;
if (advPartSize != nullptr && advPartSize[particleindex].asymmetry > 0) {
getParticleXYsize(&advPartProps[particleindex], &advPartSize[particleindex], xsize, ysize);
}
int32_t rx_subpixel = xsize+65; // size = 1 means radius of just over 1 pixel
int32_t ry_subpixel = ysize+65; // size = 255 is radius of 5, so add 65 -> 65+255=320, 320>>6=5 pixels
// rendering bounding box in pixels
int32_t rx_pixels = (rx_subpixel >> PS_P_RADIUS_SHIFT);
int32_t ry_pixels = (ry_subpixel >> PS_P_RADIUS_SHIFT);
int32_t x_min = x_center - rx_pixels;
int32_t x_max = x_center + rx_pixels;
int32_t y_min = y_center - ry_pixels;
int32_t y_max = y_center + ry_pixels;
// cache for speed
uint32_t matrixX = maxXpixel + 1;
uint32_t matrixY = maxYpixel + 1;
uint32_t rx_sq = rx_subpixel * rx_subpixel;
uint32_t ry_sq = ry_subpixel * ry_subpixel;
// iterate over bounding box and render each pixel
for (int32_t py = y_min; py <= y_max; py++) {
for (int32_t px = x_min; px <= x_max; px++) {
// distance from particle center, explanation see above
int32_t dx_subpixel = (px << PS_P_RADIUS_SHIFT) - x_subcenter + PS_P_HALFRADIUS;
int32_t dy_subpixel = (py << PS_P_RADIUS_SHIFT) - y_subcenter + PS_P_HALFRADIUS;
// calculate brightness based on squared distance to ellipse center
uint8_t pixel_brightness = calculateEllipseBrightness(dx_subpixel, dy_subpixel, rx_sq, ry_sq, brightness);
if (pixel_brightness == 0) continue; // Skip fully transparent pixels
// apply inverse gamma correction if needed, if this is skipped, particles flicker due to changing total brightness
if (gammaCorrectCol) {
pixel_brightness = gamma8inv(pixel_brightness); // invert brigthess so brightness distribution is linear after gamma correction
}
// Handle wrapping and bounds
int32_t render_x = px;
int32_t render_y = py;
// Check bounds and apply wrapping
if (render_x < 0) {
if (!wrapX) continue;
render_x += matrixX;
} else if (render_x > maxXpixel) {
if (!wrapX) continue;
render_x -= matrixX;
}
if (render_y < 0) {
if (!wrapY) continue;
render_y += matrixY;
} else if (render_y > maxYpixel) {
if (!wrapY) continue;
render_y -= matrixY;
}
// Render pixel
uint32_t idx = render_x + (maxYpixel - render_y) * matrixX; // flip y coordinate (0,0 is bottom left in PS but top left in framebuffer)
framebuffer[idx] = fast_color_scaleAdd(framebuffer[idx], color, pixel_brightness);
}
}
}
// calculate pixel positions and brightness distribution and render the particle to local buffer or global buffer
void WLED_O2_ATTR ParticleSystem2D::renderParticle(const uint32_t particleindex, const uint8_t brightness, const CRGBW& color, const bool wrapX, const bool wrapY) {
uint32_t size = particlesize;
if (perParticleSize && advPartProps != nullptr) // use advanced size properties
size = advPartProps[particleindex].size;
size = 1 + advPartProps[particleindex].size; // add 1 to avoid single pixel size particles (collisions do not support it)
if (size == 0) { // single pixel rendering
uint32_t x = particles[particleindex].x >> PS_P_RADIUS_SHIFT;
@@ -717,7 +627,7 @@ void WLED_O2_ATTR ParticleSystem2D::renderParticle(const uint32_t particleindex,
}
if (size > 1) { // size > 1: render as ellipse
renderParticleEllipse(particleindex, brightness, color, wrapX, wrapY); // larger size rendering
renderLargeParticle(size, particleindex, brightness, color, wrapX, wrapY); // larger size rendering
return;
}
@@ -760,19 +670,19 @@ void WLED_O2_ATTR ParticleSystem2D::renderParticle(const uint32_t particleindex,
// - apply inverse gamma correction to brightness values
// - gamma is applied again in show() -> the resulting brightness distribution is linear but gamma corrected in total
if (gammaCorrectCol) {
pxlbrightness[0] = gamma8inv(pxlbrightness[0]); // use look-up-table for invers gamma
pxlbrightness[1] = gamma8inv(pxlbrightness[1]);
pxlbrightness[2] = gamma8inv(pxlbrightness[2]);
pxlbrightness[3] = gamma8inv(pxlbrightness[3]);
for (uint32_t i = 0; i < 4; i++) {
pxlbrightness[i] = gamma8inv(pxlbrightness[i]); // use look-up-table for invers gamma
}
}
// standard rendering (2x2 pixels)
// check for out of frame pixels and wrap them if required: x,y is bottom left pixel coordinate of the particle
if (x < 0) { // left pixels out of frame
if (pixco[0].x < 0) { // left pixels out of frame
if (wrapX) { // wrap x to the other side if required
pixco[0].x = pixco[3].x = maxXpixel;
} else {
pixelvalid[0] = pixelvalid[3] = false; // out of bounds
if (pixco[0].x < -1) return; // both left pixels out of bounds, no need to continue (safety check)
}
}
else if (pixco[1].x > (int32_t)maxXpixel) { // right pixels, only has to be checked if left pixel is in frame
@@ -780,14 +690,16 @@ void WLED_O2_ATTR ParticleSystem2D::renderParticle(const uint32_t particleindex,
pixco[1].x = pixco[2].x = 0;
} else {
pixelvalid[1] = pixelvalid[2] = false; // out of bounds
if (pixco[0].x > (int32_t)maxXpixel) return; // both pixels out of bounds, no need to continue (safety check)
}
}
if (y < 0) { // bottom pixels out of frame
if (pixco[0].y < 0) { // bottom pixels out of frame
if (wrapY) { // wrap y to the other side if required
pixco[0].y = pixco[1].y = maxYpixel;
} else {
pixelvalid[0] = pixelvalid[1] = false; // out of bounds
if (pixco[0].y < -1) return; // both bottom pixels out of bounds, no need to continue (safety check)
}
}
else if (pixco[2].y > maxYpixel) { // top pixels
@@ -795,6 +707,7 @@ void WLED_O2_ATTR ParticleSystem2D::renderParticle(const uint32_t particleindex,
pixco[2].y = pixco[3].y = 0;
} else {
pixelvalid[2] = pixelvalid[3] = false; // out of bounds
if (pixco[2].y > (int32_t)maxYpixel + 1) return; // both top pixels out of bounds, no need to continue (safety check)
}
}
for (uint32_t i = 0; i < 4; i++) {
@@ -805,32 +718,123 @@ void WLED_O2_ATTR ParticleSystem2D::renderParticle(const uint32_t particleindex,
}
}
// render particle as ellipse/circle with linear brightness falloff and sub-pixel precision
void WLED_O2_ATTR ParticleSystem2D::renderLargeParticle(const uint32_t size, const uint32_t particleindex, const uint8_t brightness, const CRGBW& color, const bool wrapX, const bool wrapY) {
// particle position with sub-pixel precision
int32_t x_subcenter = particles[particleindex].x;
int32_t y_subcenter = particles[particleindex].y;
// example: for x = 128, a paticle is exacly between pixel 1 and 2, with a radius of 2 pixels, we draw pixels 0-3
// integer center jumps when x = 127 -> pixel 1 goes to x = 128 -> pixel 2
// when calculating the dx, we need to take this into account: at x = 128 the x offset is 1, the pixel center is at pixel 2:
// for pixel 1, dx = 1 * PS_P_RADIUS - 128 = -64 but the center of the pixel is actually only -32 from the particle center so need to add half a radius:
// dx = pixel_x * PS_P_RADIUS - x_subcenter + PS_P_HALFRADIUS
// sub-pixel offset (0-63)
int32_t x_offset = x_subcenter & (PS_P_RADIUS - 1); // same as modulo PS_P_RADIUS but faster
int32_t y_offset = y_subcenter & (PS_P_RADIUS - 1);
// integer pixel position, this is rounded down
int32_t x_center = (x_subcenter) >> PS_P_RADIUS_SHIFT;
int32_t y_center = (y_subcenter) >> PS_P_RADIUS_SHIFT;
// ellipse radii in pixels
uint32_t xsize = size;
uint32_t ysize = size;
if (advPartSize != nullptr && advPartSize[particleindex].asymmetry > 0) {
getParticleXYsize(&advPartProps[particleindex], &advPartSize[particleindex], xsize, ysize);
}
int32_t rx_subpixel = xsize + PS_P_RADIUS + 1; // size = 1 means radius of just over 1 pixel, + PS_P_RADIUS (+1 to accoutn for bit-shift loss)
int32_t ry_subpixel = ysize + PS_P_RADIUS + 1; // size = 255 is radius of 5, so add 65 -> 65+255=320, 320>>6=5 pixels
// rendering bounding box in pixels
int32_t rx_pixels = (rx_subpixel >> PS_P_RADIUS_SHIFT);
int32_t ry_pixels = (ry_subpixel >> PS_P_RADIUS_SHIFT);
int32_t x_min = x_center - rx_pixels; // note: the "+1" extension needed for 1D is not required for 2D, it is smooth as-is
int32_t x_max = x_center + rx_pixels;
int32_t y_min = y_center - ry_pixels;
int32_t y_max = y_center + ry_pixels;
// cache for speed
uint32_t matrixX = maxXpixel + 1;
uint32_t matrixY = maxYpixel + 1;
uint32_t rx_sq = rx_subpixel * rx_subpixel;
uint32_t ry_sq = ry_subpixel * ry_subpixel;
// iterate over bounding box and render each pixel
for (int32_t py = y_min; py <= y_max; py++) {
for (int32_t px = x_min; px <= x_max; px++) {
// Check bounds and apply wrapping
int32_t render_x = px;
int32_t render_y = py;
if (render_x < 0) {
if (!wrapX) continue;
render_x += matrixX;
} else if (render_x > maxXpixel) {
if (!wrapX) continue;
render_x -= matrixX;
}
if (render_y < 0) {
if (!wrapY) continue;
render_y += matrixY;
} else if (render_y > maxYpixel) {
if (!wrapY) continue;
render_y -= matrixY;
}
// distance from particle center, explanation see above
int32_t dx_subpixel = (px << PS_P_RADIUS_SHIFT) - x_subcenter + PS_P_HALFRADIUS;
int32_t dy_subpixel = (py << PS_P_RADIUS_SHIFT) - y_subcenter + PS_P_HALFRADIUS;
// calculate brightness based on squared distance to ellipse center
uint8_t pixel_brightness = calculateEllipseBrightness(dx_subpixel, dy_subpixel, rx_sq, ry_sq, brightness);
if (pixel_brightness == 0) continue; // skip black pixels
// apply inverse gamma correction if needed, if this is skipped, particles flicker due to changing total brightness
if (gammaCorrectCol) {
pixel_brightness = gamma8inv(pixel_brightness); // invert brigthess so brightness distribution is linear after gamma correction
}
// Render pixel
uint32_t idx = render_x + (maxYpixel - render_y) * matrixX; // flip y coordinate (0,0 is bottom left in PS but top left in framebuffer)
framebuffer[idx] = fast_color_scaleAdd(framebuffer[idx], color, pixel_brightness);
}
}
}
// detect collisions in an array of particles and handle them
// uses binning by dividing the frame into slices in x direction which is efficient if using gravity in y direction (but less efficient for FX that use forces in x direction)
// for code simplicity, no y slicing is done, making very tall matrix configurations less efficient
// note: also tested adding y slicing, it gives diminishing returns, some FX even get slower. FX not using gravity would benefit with a 10% FPS improvement
void ParticleSystem2D::handleCollisions() {
if (perParticleSize && advPartProps != nullptr)
particleHardRadius = 255; // max radius for collision detection if using per-particle size TODO: could optimize by fetching max size from advPartProps
uint32_t collDistSq = particleHardRadius << 1; // distance is double the radius note: particleHardRadius is updated when setting global particle size
collDistSq = collDistSq * collDistSq; // square it for faster comparison (square is one operation)
// note: partices are binned in x-axis, assumption is that no more than half of the particles are in the same bin
// if they are, collisionStartIdx is increased so each particle collides at least every second frame (which still gives decent collisions)
constexpr int BIN_WIDTH = 6 * PS_P_RADIUS; // width of a bin in sub-pixels
int binWidth = 6 * PS_P_RADIUS; // width of a bin in sub-pixels
int32_t overlap = particleHardRadius << 1; // overlap bins to include edge particles to neighbouring bins
if (perParticleSize && advPartProps != nullptr)
overlap = 512; // max overlap for collision detection if using per-particle size, enough to catch all particles even at max speed
uint32_t maxBinParticles = max((uint32_t)50, (usedParticles + 1) / 2); // assume no more than half of the particles are in the same bin, do not bin small amounts of particles
uint32_t numBins = (maxX + (BIN_WIDTH - 1)) / BIN_WIDTH; // number of bins in x direction
uint32_t numBins = (maxX + (binWidth - 1)) / binWidth; // number of bins in x direction
if (usedParticles < maxBinParticles) {
numBins = 1; // use single bin for small number of particles
binWidth = maxX + 1;
}
uint16_t binIndices[maxBinParticles]; // creat array on stack for indices, 2kB max for 1024 particles (ESP32_MAXPARTICLES/2)
uint32_t binParticleCount; // number of particles in the current bin
uint16_t nextFrameStartIdx = hw_random16(usedParticles); // index of the first particle in the next frame (set to fixed value if bin overflow)
uint32_t nextFrameStartIdx = hw_random16(usedParticles); // index of the first particle in the next frame (set to fixed value if bin overflow)
uint32_t pidx = collisionStartIdx; //start index in case a bin is full, process remaining particles next frame
// fill the binIndices array for this bin
for (uint32_t bin = 0; bin < numBins; bin++) {
binParticleCount = 0; // reset for this bin
int32_t binStart = bin * BIN_WIDTH - overlap; // note: first bin will extend to negative, but that is ok as out of bounds particles are ignored
int32_t binEnd = binStart + BIN_WIDTH + overlap; // note: last bin can be out of bounds, see above;
int32_t binStart = bin * binWidth - overlap; // note: first bin will extend to negative, but that is ok as out of bounds particles are ignored
int32_t binEnd = binStart + binWidth + overlap; // note: last bin can be out of bounds, see above;
// fill the binIndices array for this bin
for (uint32_t i = 0; i < usedParticles; i++) {
@@ -849,8 +853,8 @@ void ParticleSystem2D::handleCollisions() {
if (pidx >= usedParticles) pidx = 0; // wrap around
}
uint32_t massratio1 = 0; // 0 means dont use mass ratio (equal mass)
uint32_t massratio2 = 0;
int32_t massratio1 = 0; // 0 means dont use mass ratio (equal mass)
int32_t massratio2 = 0; // TODO: if implementing "fixed" particles, set to 1 (fixed) and 255 (movable)
for (uint32_t i = 0; i < binParticleCount; i++) { // go though all 'higher number' particles in this bin and see if any of those are in close proximity and if they are, make them collide
uint32_t idx_i = binIndices[i];
for (uint32_t j = i + 1; j < binParticleCount; j++) { // check against higher number particles
@@ -859,12 +863,15 @@ void ParticleSystem2D::handleCollisions() {
collDistSq = (PS_P_MINHARDRADIUS << 1) + ((((uint32_t)advPartProps[idx_i].size + (uint32_t)advPartProps[idx_j].size) * 52) >> 6); // collision distance, use 80% of size for tighter stacking (slight overlap)
collDistSq = collDistSq * collDistSq; // square it for faster comparison
// calculate mass ratio for collision response
uint32_t mass1 = 1 + ((uint32_t)advPartProps[idx_i].size * advPartProps[idx_i].size); // +1 to avoid division by zero
uint32_t mass2 = ((uint32_t)advPartProps[idx_j].size * advPartProps[idx_j].size);
uint32_t mass1 = PS_P_RADIUS + advPartProps[idx_i].size;
uint32_t mass2 = PS_P_RADIUS + advPartProps[idx_j].size;
mass1 = mass1 * mass1; // mass proportional to area
mass2 = mass2 * mass2;
uint32_t totalmass = mass1 + mass2;
massratio1 = (mass2 << 8) / totalmass; // massratio 1 depends on mass of particle 2, i.e. if 2 is heavier -> higher velocity impact on 1
massratio2 = (mass1 << 8) / totalmass;
}
// note: using the same logic as in 1D is much slower though it would be more accurate but it is not really needed in 2D
int32_t dx = (particles[idx_j].x + particles[idx_j].vx) - (particles[idx_i].x + particles[idx_i].vx); // distance with lookahead
if (dx * dx < collDistSq) { // check x direction, if close, check y direction (squaring is faster than abs() or dual compare)
int32_t dy = (particles[idx_j].y + particles[idx_j].vy) - (particles[idx_i].y + particles[idx_i].vy); // distance with lookahead
@@ -879,7 +886,7 @@ void ParticleSystem2D::handleCollisions() {
// handle a collision if close proximity is detected, i.e. dx and/or dy smaller than 2*PS_P_RADIUS
// takes two pointers to the particles to collide and the particle hardness (softer means more energy lost in collision, 255 means full hard)
void WLED_O2_ATTR ParticleSystem2D::collideParticles(PSparticle &particle1, PSparticle &particle2, int32_t dx, int32_t dy, const uint32_t collDistSq, uint32_t massratio1, uint32_t massratio2) {
void WLED_O2_ATTR ParticleSystem2D::collideParticles(PSparticle &particle1, PSparticle &particle2, int32_t dx, int32_t dy, const uint32_t collDistSq, int32_t massratio1, int32_t massratio2) {
int32_t distanceSquared = dx * dx + dy * dy;
// Calculate relative velocity note: could zero check but that does not improve overall speed but deminish it as that is rarely the case and pushing is still required
int32_t relativeVx = (int32_t)particle2.vx - (int32_t)particle1.vx;
@@ -909,24 +916,29 @@ void WLED_O2_ATTR ParticleSystem2D::collideParticles(PSparticle &particle1, PSpa
if (dotProduct < 0) {// particles are moving towards each other
// integer math is much faster than using floats (float divisions are slow on all ESPs)
// overflow check: dx/dy are 7bit, relativV are 8bit -> dotproduct is 15bit, dotproduct/distsquared ist 8b, multiplied by collisionhardness of 8bit. so a 16bit shift is ok, make it 15 to be sure no overflows happen
// note: cannot use right shifts as bit shifting in right direction is asymmetrical for positive and negative numbers and this needs to be accurate! the trick is: only shift positive numers
// note: cannot use right shifts as bit shifting in right direction is asymmetrical (1>>1=0 / -1>>1=-1) and this needs to be accurate! the trick is: only shift positive numers
// Calculate new velocities after collision
int32_t surfacehardness = max(collisionHardness, (int32_t)PS_P_MINSURFACEHARDNESS >> 1); // if particles are soft, the impulse must stay above a limit or collisions slip through at higher speeds, 170 seems to be a good value
int32_t impulse = (((((-dotProduct) << 15) / distanceSquared) * surfacehardness) >> 8); // note: inverting before bitshift corrects for asymmetry in right-shifts (is slightly faster)
#if defined(CONFIG_IDF_TARGET_ESP32C3) || defined(ESP8266) // use bitshifts with rounding instead of division (2x faster)
int32_t ximpulse = (impulse * dx + ((dx >> 31) & 32767)) >> 15; // note: extracting sign bit and adding rounding value to correct for asymmetry in right shifts
int32_t yimpulse = (impulse * dy + ((dy >> 31) & 32767)) >> 15;
int32_t ximpulse = (impulse * dx + ((dx >> 31) & 0x7FFF)) >> 15; // note: extracting sign bit and adding rounding value to correct for asymmetry in right shifts
int32_t yimpulse = (impulse * dy + ((dy >> 31) & 0x7FFF)) >> 15;
#else
int32_t ximpulse = (impulse * dx) / 32767;
int32_t yimpulse = (impulse * dy) / 32767;
#endif
// if particles are not the same size, use a mass ratio. mass ratio is set to 0 if particles are the same size
if (massratio1) {
particle1.vx -= (ximpulse * massratio1) >> 7; // mass ratio is in fixed point 8bit, multiply by two to account for the fact that we distribute the impulse to both particles
particle1.vy -= (yimpulse * massratio1) >> 7;
particle2.vx += (ximpulse * massratio2) >> 7;
particle2.vy += (yimpulse * massratio2) >> 7;
int32_t vx1 = (int32_t)particle1.vx - ((ximpulse * massratio1) >> 7); // mass ratio is in fixed point 8bit, multiply by two to account for the fact that we distribute the impulse to both particles
int32_t vy1 = (int32_t)particle1.vy - ((yimpulse * massratio1) >> 7);
int32_t vx2 = (int32_t)particle2.vx + ((ximpulse * massratio2) >> 7);
int32_t vy2 = (int32_t)particle2.vy + ((yimpulse * massratio2) >> 7);
// limit speeds to max speed (required if a lot of impulse is transferred from a large to a small particle)
particle1.vx = limitSpeed(vx1);
particle1.vy = limitSpeed(vy1);
particle2.vx = limitSpeed(vx2);
particle2.vy = limitSpeed(vy2);
}
else {
particle1.vx -= ximpulse; // note: impulse is inverted, so subtracting it
@@ -951,11 +963,11 @@ void WLED_O2_ATTR ParticleSystem2D::collideParticles(PSparticle &particle1, PSpa
}
// particles have volume, push particles apart if they are too close
// tried lots of configurations, it works best if not moved but given a little velocity, it tends to oscillate less this way
// tried lots of configurations, it works best if given a little velocity, it tends to oscillate less this way
// when hard pushing by offsetting position, they sink into each other under gravity
// a problem with giving velocity is, that on harder collisions, this adds up as it is not dampened enough, so add friction in the FX if required
if (distanceSquared < collDistSq && dotProduct > -250) { // too close and also slow, push them apart
int32_t notsorandom = dotProduct & 0x01; //dotprouct LSB should be somewhat random, so no need to calculate a random number
bool fairlyrandom = dotProduct & 0x01; //dotprouct LSB should be somewhat random, so no need to calculate a random number
int32_t pushamount = 1 + ((250 + dotProduct) >> 6); // the closer dotproduct is to zero, the closer the particles are
int32_t push = 0;
if (dx < 0) // particle 1 is on the right
@@ -963,7 +975,7 @@ void WLED_O2_ATTR ParticleSystem2D::collideParticles(PSparticle &particle1, PSpa
else if (dx > 0)
push = -pushamount;
else { // on the same x coordinate, shift it a little so they do not stack
if (notsorandom)
if (fairlyrandom)
particle1.x++; // move it so pile collapses
else
particle1.x--;
@@ -975,7 +987,7 @@ void WLED_O2_ATTR ParticleSystem2D::collideParticles(PSparticle &particle1, PSpa
else if (dy > 0)
push = -pushamount;
else { // dy==0
if (notsorandom)
if (fairlyrandom)
particle1.y++; // move it so pile collapses
else
particle1.y--;
@@ -1037,56 +1049,6 @@ void ParticleSystem2D::updatePSpointers(bool isadvanced, bool sizecontrol) {
}
// blur a matrix in x and y direction, blur can be asymmetric in x and y
// for speed, 1D array and 32bit variables are used, make sure to limit them to 8bit (0-255) or result is undefined
// to blur a subset of the buffer, change the xsize/ysize and set xstart/ystart to the desired starting coordinates (default start is 0/0)
// subset blurring only works on 10x10 buffer (single particle rendering), if other sizes are needed, buffer width must be passed as parameter
void blur2D(uint32_t *colorbuffer, uint32_t xsize, uint32_t ysize, uint32_t xblur, uint32_t yblur, uint32_t xstart, uint32_t ystart, bool isparticle) {
CRGBW seeppart, carryover;
uint32_t seep = xblur >> 1;
uint32_t width = xsize; // width of the buffer, used to calculate the index of the pixel
if (isparticle) { //first and last row are always black in first pass of particle rendering
ystart++;
ysize--;
width = 10; // buffer size is 10x10
}
for (uint32_t y = ystart; y < ystart + ysize; y++) {
carryover = BLACK;
uint32_t indexXY = xstart + y * width;
for (uint32_t x = xstart; x < xstart + xsize; x++) {
seeppart = fast_color_scale(colorbuffer[indexXY], seep); // scale it and seep to neighbours
if (x > 0) {
colorbuffer[indexXY - 1] = fast_color_scaleAdd(colorbuffer[indexXY - 1], seeppart);
colorbuffer[indexXY] = fast_color_scaleAdd(colorbuffer[indexXY], carryover);
}
carryover = seeppart;
indexXY++; // next pixel in x direction
}
}
if (isparticle) { // first and last row are now smeared
ystart--;
ysize++;
}
seep = yblur >> 1;
for (uint32_t x = xstart; x < xstart + xsize; x++) {
carryover = BLACK;
uint32_t indexXY = x + ystart * width;
for (uint32_t y = ystart; y < ystart + ysize; y++) {
seeppart = fast_color_scale(colorbuffer[indexXY], seep); // scale it and seep to neighbours
if (y > 0) {
colorbuffer[indexXY - width] = fast_color_scaleAdd(colorbuffer[indexXY - width], seeppart);
colorbuffer[indexXY] = fast_color_scaleAdd(colorbuffer[indexXY], carryover);
}
carryover = seeppart;
indexXY += width; // next pixel in y direction
}
}
}
//non class functions to use for initialization
uint32_t calculateNumberOfParticles2D(uint32_t const pixels, const bool isadvanced, const bool sizecontrol) {
uint32_t numberofParticles = pixels; // 1 particle per pixel (for example 512 particles on 32x16)
@@ -1142,7 +1104,7 @@ bool initParticleSystem2D(ParticleSystem2D *&PartSys, uint32_t requestedsources,
PSPRINTLN(" request numparticles:" + String(numparticles));
uint32_t numsources = calculateNumberOfSources2D(pixels, requestedsources);
bool allocsuccess = false;
while(numparticles >= 4) { // make sure we have at least 4 particles or quit
while(numparticles >= 5) { // make sure we have at least 5 particles or quit
if (allocateParticleSystemMemory2D(numparticles, numsources, advanced, sizecontrol, additionalbytes)) {
PSPRINTLN(F("PS 2D alloc succeeded"));
allocsuccess = true;
@@ -1205,8 +1167,11 @@ void ParticleSystem1D::update(void) {
applyGravity();
// handle collisions (can push particles, must be done before updating particles or they can render out of bounds, causing a crash if using local buffer for speed)
if (particlesettings.useCollisions)
if (particlesettings.useCollisions) {
handleCollisions();
if (perParticleSize)
handleCollisions(); // second pass for per particle size (as impulse transfer can recoil at high speed, this improves "slip through" issues for small particles but is expensive)
}
//move all particles
for (uint32_t i = 0; i < usedParticles; i++) {
@@ -1214,7 +1179,7 @@ void ParticleSystem1D::update(void) {
}
if (particlesettings.colorByPosition) {
uint32_t scale = (255 << 16) / maxX; // speed improvement: multiplication is faster than division
uint32_t scale = (255 << 16) / maxX;
for (uint32_t i = 0; i < usedParticles; i++) {
particles[i].hue = (scale * particles[i].x) >> 16; // note: x is > 0 if not out of bounds
}
@@ -1225,7 +1190,7 @@ void ParticleSystem1D::update(void) {
// set percentage of used particles as uint8_t i.e 127 means 50% for example
void ParticleSystem1D::setUsedParticles(const uint8_t percentage) {
usedParticles = (numParticles * ((int)percentage+1)) >> 8; // number of particles to use (percentage is 0-255, 255 = 100%)
usedParticles = max((uint32_t)1, (numParticles * ((int)percentage+1)) >> 8); // number of particles to use (percentage is 0-255, 255 = 100%)
PSPRINT(" SetUsedpaticles: allocated particles: ");
PSPRINT(numParticles);
PSPRINT(" ,used particles: ");
@@ -1269,10 +1234,16 @@ void ParticleSystem1D::setSmearBlur(const uint8_t bluramount) {
smearBlur = bluramount;
}
// render size, 0 = 1 pixel, 1 = 2 pixel (interpolated), bigger sizes require adanced properties
// render size, 0 = 1 pixel, 1 = 2 pixel (interpolated), 255 = 18 pixel diameter
void ParticleSystem1D::setParticleSize(const uint8_t size) {
particlesize = size > 0 ? 1 : 0; // TODO: add support for global sizes? see note above (motion blur)
particleHardRadius = PS_P_MINHARDRADIUS_1D >> (!particlesize); // 2 pixel sized particles or single pixel sized particles
particlesize = size;
particleHardRadius = PS_P_MINHARDRADIUS_1D; // ~1 pixel
perParticleSize = false; // disable per particle size control if global size is set
if (particlesize > 1) {
particleHardRadius = PS_P_MINHARDRADIUS_1D + ((particlesize * 52) >> 6); // use 1 pixel + 80% of size for hard radius (slight overlap with boarders so they do not "float" and nicer stacking)
}
else if (particlesize == 0)
particleHardRadius = particleHardRadius >> 1; // single pixel particles have half the radius (i.e. 1/2 pixel)
}
// enable/disable gravity, optionally, set the force (force=8 is default) can be -127 to +127, 0 is disable
@@ -1328,16 +1299,16 @@ void ParticleSystem1D::particleMoveUpdate(PSparticle1D &part, PSparticleFlags1D
if (options->colorByAge)
part.hue = min(part.ttl, (uint16_t)255); // set color to ttl
int32_t renderradius = PS_P_HALFRADIUS_1D; // used to check out of bounds, default for 2 pixel rendering
int32_t renderradius = PS_P_HALFRADIUS_1D - 1 + particlesize; // used to check out of bounds, default for 2 pixel rendering
int32_t newX = part.x + (int32_t)part.vx;
partFlags.outofbounds = false; // reset out of bounds (in case particle was created outside the matrix and is now moving into view)
if (advancedproperties) { // using individual particle size?
if (perParticleSize && advancedproperties != nullptr) { // using individual particle size?
renderradius = PS_P_HALFRADIUS - 1 + advancedproperties->size; // note: for single pixel particles, it should be zero, but it does not matter as out of bounds checking is done in rendering function
if (advancedproperties->size > 1)
particleHardRadius = PS_P_MINHARDRADIUS_1D + (advancedproperties->size >> 1);
particleHardRadius = PS_P_MINHARDRADIUS_1D + ((advancedproperties->size * 52) >> 6); // use 1 pixel + 80% of size for hard radius (slight overlap with boarders so they do not "float" and nicer stacking)
else // single pixel particles use half the collision distance for walls
particleHardRadius = PS_P_MINHARDRADIUS_1D >> 1;
renderradius = particleHardRadius; // note: for single pixel particles, it should be zero, but it does not matter as out of bounds checking is done in rendering function
}
// if wall collisions are enabled, bounce them before they reach the edge, it looks much nicer if the particle is not half out of view
@@ -1493,7 +1464,7 @@ void ParticleSystem1D::render() {
}
// apply smear-blur to rendered frame
if (smearBlur) {
blur1D(framebuffer, maxXpixel + 1, smearBlur, 0);
SEGMENT.blur(smearBlur, true);
}
// add background color
@@ -1517,8 +1488,8 @@ void ParticleSystem1D::render() {
// calculate pixel positions and brightness distribution and render the particle to local buffer or global buffer
void WLED_O2_ATTR ParticleSystem1D::renderParticle(const uint32_t particleindex, const uint8_t brightness, const CRGBW &color, const bool wrap) {
uint32_t size = particlesize;
if (advPartProps != nullptr) // use advanced size properties (1D system has no large size global rendering TODO: add large global rendering?)
size = advPartProps[particleindex].size;
if (perParticleSize && advPartProps != nullptr) // use advanced size properties
size = 1 + advPartProps[particleindex].size; // add 1 to avoid single pixel size particles (collisions do not support it)
if (size == 0) { //single pixel particle, can be out of bounds as oob checking is made for 2-pixel particles (and updating it uses more code)
uint32_t x = particles[particleindex].x >> PS_P_RADIUS_SHIFT_1D;
@@ -1528,6 +1499,12 @@ void WLED_O2_ATTR ParticleSystem1D::renderParticle(const uint32_t particleindex,
return;
}
//render larger particles
if (size > 1) { // size > 1: render as gradient line
renderLargeParticle(size, particleindex, brightness, color, wrap); // larger size rendering
return;
}
// standard rendering (2 pixels per particle)
bool pxlisinframe[2] = {true, true};
int32_t pxlbrightness[2];
int32_t pixco[2]; // physical pixel coordinates of the two pixels representing a particle
@@ -1548,99 +1525,110 @@ void WLED_O2_ATTR ParticleSystem1D::renderParticle(const uint32_t particleindex,
// adjust brightness such that distribution is linear after gamma correction:
// - scale brigthness with gamma correction (done in render())
// - apply inverse gamma correction to brightness values
// - gamma is applied again in show() -> the resulting brightness distribution is linear but gamma corrected in total
// - gamma is applied again in show() -> the resulting brightness distribution is linear but gamma corrected in total -> fixes brightness fluctuations
if (gammaCorrectCol) {
pxlbrightness[0] = gamma8inv(pxlbrightness[0]); // use look-up-table for invers gamma
pxlbrightness[1] = gamma8inv(pxlbrightness[1]);
}
// check if particle has advanced size properties and buffer is available
if (advPartProps != nullptr && advPartProps[particleindex].size > 1) {
uint32_t renderbuffer[10]; // 10 pixel buffer
memset(renderbuffer, 0, sizeof(renderbuffer)); // clear buffer
//render particle to a bigger size
//particle size to pixels: 2 - 63 is 4 pixels, < 128 is 6pixels, < 192 is 8 pixels, bigger is 10 pixels
//first, render the pixel to the center of the renderbuffer, then apply 1D blurring
renderbuffer[4] = fast_color_scaleAdd(renderbuffer[4], color, pxlbrightness[0]);
renderbuffer[5] = fast_color_scaleAdd(renderbuffer[5], color, pxlbrightness[1]);
uint32_t rendersize = 2; // initialize render size, minimum is 4 pixels, it is incremented int he loop below to start with 4
uint32_t offset = 4; // offset to zero coordinate to write/read data in renderbuffer (actually needs to be 3, is decremented in the loop below)
uint32_t blurpasses = size/64 + 1; // number of blur passes depends on size, four passes max
uint32_t bitshift = 0;
for (uint32_t i = 0; i < blurpasses; i++) {
if (i == 2) //for the last two passes, use higher amount of blur (results in a nicer brightness gradient with soft edges)
bitshift = 1;
rendersize += 2;
offset--;
blur1D(renderbuffer, rendersize, size << bitshift, offset);
size = size > 64 ? size - 64 : 0;
}
// calculate origin coordinates to render the particle to in the framebuffer
uint32_t xfb_orig = x - (rendersize>>1) + 1 - offset; //note: using uint is fine
uint32_t xfb; // coordinates in frame buffer to write to note: by making this uint, only overflow has to be checked
// transfer particle renderbuffer to framebuffer
for (uint32_t xrb = offset; xrb < rendersize+offset; xrb++) {
xfb = xfb_orig + xrb;
if (xfb > (uint32_t)maxXpixel) {
if (wrap) { // wrap x to the other side if required
if (xfb > (uint32_t)maxXpixel << 1) // xfb is "negative"
xfb = (maxXpixel + 1) + (int32_t)xfb; // this always overflows to within bounds
else
xfb = xfb % (maxXpixel + 1); // note: without the above "negative" check, this works only for powers of 2
}
else
continue;
}
#ifdef ESP8266 // no local buffer on ESP8266
SEGMENT.addPixelColor(xfb, renderbuffer[xrb], true);
#else
framebuffer[xfb] = fast_color_scaleAdd(framebuffer[xfb], renderbuffer[xrb]);
#endif
// check if any pixels are out of frame
if (pixco[0] < 0) { // left pixels out of frame
if (wrap) // wrap x to the other side if required
pixco[0] = maxXpixel;
else {
pxlisinframe[0] = false; // pixel is out of matrix boundaries, do not render
if (pixco[0] < -1)
return; // both pixels out of frame (safety check)
}
}
else { // standard rendering (2 pixels per particle)
// check if any pixels are out of frame
if (x < 0) { // left pixels out of frame
if (wrap) // wrap x to the other side if required
pixco[0] = maxXpixel;
else
pxlisinframe[0] = false; // pixel is out of matrix boundaries, do not render
}
else if (pixco[1] > (int32_t)maxXpixel) { // right pixel, only has to be checkt if left pixel did not overflow
if (wrap) // wrap y to the other side if required
pixco[1] = 0;
else
pxlisinframe[1] = false;
}
for (uint32_t i = 0; i < 2; i++) {
if (pxlisinframe[i]) {
framebuffer[pixco[i]] = fast_color_scaleAdd(framebuffer[pixco[i]], color, pxlbrightness[i]);
}
else if (pixco[1] > (int32_t)maxXpixel) { // right pixel, only has to be checkt if left pixel did not overflow
if (wrap) // wrap y to the other side if required
pixco[1] = 0;
else {
pxlisinframe[1] = false;
if (pixco[0] > (int32_t)maxXpixel)
return; // both pixels out of frame (safety check)
}
}
for (uint32_t i = 0; i < 2; i++) {
if (pxlisinframe[i]) {
framebuffer[pixco[i]] = fast_color_scaleAdd(framebuffer[pixco[i]], color, pxlbrightness[i]);
}
}
}
// render particle as a line with linear brightness falloff and sub-pixel precision, size is in 0-255 (1-9 pixel radius)
void WLED_O2_ATTR ParticleSystem1D::renderLargeParticle(const uint32_t size, const uint32_t particleindex, const uint8_t brightness, const CRGBW& color, const bool wrap) {
int32_t x_subcenter = particles[particleindex].x; // particle position in sub-pixel space
// sub-pixel offset (0-31)
int32_t x_offset = x_subcenter & (PS_P_RADIUS_1D - 1); // same as modulo PS_P_RADIUS but faster
int32_t x_center = x_subcenter >> PS_P_RADIUS_SHIFT_1D; // integer pixel position, this is rounded down
// particle radius in pixels, size = 1 means radius of just over 1 pixel
int32_t r_subpixel = size + PS_P_RADIUS_1D + 1; // size = 255 is radius of 9, so add 33 -> 33+255=288, 288>>5=9 pixels (i.e. the +1 is needed to correct for bitshift losses)
// rendering bounding box in pixels
int32_t r_pixels = r_subpixel >> PS_P_RADIUS_SHIFT_1D;
int32_t x_min = x_center - r_pixels - 1; // extend by one for much smoother movement
int32_t x_max = x_center + r_pixels + 1;
// cache for speed
uint32_t matrixX = maxXpixel + 1;
// iterate over bounding box and render each pixel
for (int32_t px = x_min; px <= x_max; px++) {
// Check bounds and apply wrapping
int32_t render_x = px;
if (render_x < 0) {
if (!wrap) continue; // skip out of frame pixels
render_x += matrixX;
} else if (render_x > maxXpixel) {
if (!wrap) continue;
render_x -= matrixX;
}
// squared distance from particle center
int32_t dx_sq = ((px << PS_P_RADIUS_SHIFT_1D) - x_subcenter + PS_P_HALFRADIUS_1D); // explanation see 2D version
dx_sq = dx_sq * dx_sq;
int32_t rx_sq = r_subpixel * r_subpixel;
uint32_t dist_sq = (dx_sq << 8) / rx_sq; // normalized squared distance in fixed point (0-256)
// calculate brightness based on distance from particle center with linear falloff
uint8_t pixel_brightness = dist_sq >= 256 ? 0 : ((256 - dist_sq) * brightness) >> 8;
//if (pixel_brightness == 0) continue; // skip black pixels note: very few pixels will be black, skipping this is usually faster
// Render pixel
framebuffer[render_x] = fast_color_scaleAdd(framebuffer[render_x], color, pixel_brightness);
}
}
// detect collisions in an array of particles and handle them
void ParticleSystem1D::handleCollisions() {
uint32_t collisiondistance = particleHardRadius << 1;
uint32_t collisiondistance = particleHardRadius << 1; // twice the radius is min distance between colliding particles
uint32_t checkDistSq = max(2 * PS_P_MAXSPEED, (int)collisiondistance);
if (perParticleSize && advPartProps != nullptr) // using individual particle size
checkDistSq = max(2 * PS_P_MAXSPEED, (512 * 52) >> 6); // max possible collision distance that catches all collisons
checkDistSq = checkDistSq * checkDistSq; // square it for distance comparison (faster than abs() )
// note: partices are binned by position, assumption is that no more than half of the particles are in the same bin
// if they are, collisionStartIdx is increased so each particle collides at least every second frame (which still gives decent collisions)
constexpr int BIN_WIDTH = 32 * PS_P_RADIUS_1D; // width of each bin, a compromise between speed and accuracy (larger bins are faster but collapse more)
int32_t overlap = particleHardRadius << 1; // overlap bins to include edge particles to neighbouring bins
if (advPartProps != nullptr) //may be using individual particle size
overlap += 256; // add 2 * max radius (approximately)
int binWidth = 64 * PS_P_RADIUS_1D; // width of each bin, a compromise between speed and accuracy
int32_t overlap = collisiondistance + (2 * PS_P_MAXSPEED); // overlap bins to include edge particles to neighbouring bins (+ look-ahead of speed)
if (perParticleSize && advPartProps != nullptr) //may be using individual particle size
overlap = 512; // 2 * max radius, enough to catch all collisions even at full speed
uint32_t maxBinParticles = max((uint32_t)50, (usedParticles + 1) / 4); // do not bin small amounts, limit max to 1/4 of particles
uint32_t numBins = (maxX + (BIN_WIDTH - 1)) / BIN_WIDTH; // calculate number of bins
uint32_t numBins = (maxX + (binWidth - 1)) / binWidth; // calculate number of bins
if (usedParticles < maxBinParticles) {
numBins = 1; // use single bin for small number of particles
binWidth = maxX + 1;
}
uint16_t binIndices[maxBinParticles]; // array to store indices of particles in a bin
uint32_t binParticleCount; // number of particles in the current bin
uint16_t nextFrameStartIdx = hw_random16(usedParticles); // index of the first particle in the next frame (set to fixed value if bin overflow)
uint32_t nextFrameStartIdx = hw_random16(usedParticles); // index of the first particle in the next frame (set to fixed value if bin overflow)
uint32_t pidx = collisionStartIdx; //start index in case a bin is full, process remaining particles next frame
for (uint32_t bin = 0; bin < numBins; bin++) {
binParticleCount = 0; // reset for this bin
int32_t binStart = bin * BIN_WIDTH - overlap; // note: first bin will extend to negative, but that is ok as out of bounds particles are ignored
int32_t binEnd = binStart + BIN_WIDTH + overlap; // note: last bin can be out of bounds, see above
int32_t binStart = bin * binWidth - overlap; // note: first bin will extend to negative, but that is ok as out of bounds particles are ignored
int32_t binEnd = binStart + binWidth + overlap; // note: last bin can be out of bounds, see above
// fill the binIndices array for this bin
for (uint32_t i = 0; i < usedParticles; i++) {
@@ -1663,87 +1651,104 @@ void ParticleSystem1D::handleCollisions() {
uint32_t idx_i = binIndices[i];
for (uint32_t j = i + 1; j < binParticleCount; j++) { // check against higher number particles
uint32_t idx_j = binIndices[j];
if (advPartProps != nullptr) { // use advanced size properties
collisiondistance = (PS_P_MINHARDRADIUS_1D << particlesize) + ((advPartProps[idx_i].size + advPartProps[idx_j].size) >> 1);
}
int32_t dx = (particles[idx_j].x + particles[idx_j].vx) - (particles[idx_i].x + particles[idx_i].vx); // distance between particles with lookahead
uint32_t dx_abs = abs(dx);
if (dx_abs <= collisiondistance) { // collide if close
collideParticles(particles[idx_i], particleFlags[idx_i], particles[idx_j], particleFlags[idx_j], dx, dx_abs, collisiondistance);
int32_t dx = particles[idx_j].x - particles[idx_i].x; // distance between particles
uint32_t dx_sq = dx * dx; // square distance (faster than abs() and works the same)
if (dx_sq <= checkDistSq) { // possible collision imminent, check properly note: this is slower than using direct speed look-ahead (like in 2D) but more accurate and fast enough for 1D
collideParticles(idx_i, idx_j, dx, collisiondistance); // handle the collision
}
}
}
}
collisionStartIdx = nextFrameStartIdx; // set the start index for the next frame
}
// handle a collision if close proximity is detected, i.e. dx and/or dy smaller than 2*PS_P_RADIUS
// takes two pointers to the particles to collide and the particle hardness (softer means more energy lost in collision, 255 means full hard)
void WLED_O2_ATTR ParticleSystem1D::collideParticles(PSparticle1D &particle1, const PSparticleFlags1D &particle1flags, PSparticle1D &particle2, const PSparticleFlags1D &particle2flags, const int32_t dx, const uint32_t dx_abs, const uint32_t collisiondistance) {
int32_t dv = particle2.vx - particle1.vx;
// handle a collision if close proximity is detected, i.e. dx smaller than 2*radius + speed look-ahead
void WLED_O2_ATTR ParticleSystem1D::collideParticles(uint32_t partIdx1, uint32_t partIdx2, int32_t dx, uint32_t collisiondistance) {
int32_t massratio1 = 0; // 0 means dont use mass ratio (equal mass)
int32_t massratio2 = 0;
if (perParticleSize && advPartProps != nullptr) { // use advanced size properties, calculate collision distance and mass ratio
collisiondistance = (PS_P_MINHARDRADIUS_1D * 2) + ((((uint32_t)advPartProps[partIdx1].size + (uint32_t)advPartProps[partIdx2].size) * 52) >> 6); // collision distance, use 80% of size for tighter stacking (slight overlap)
// calculate mass ratio for collision response
uint32_t mass1 = PS_P_RADIUS_1D + advPartProps[partIdx1].size;
uint32_t mass2 = PS_P_RADIUS_1D + advPartProps[partIdx2].size;
uint32_t totalmass = mass1 + mass2 - 2; // -2 to account for rounding
massratio1 = (mass2 << 8) / totalmass; // massratio 1 depends on mass of particle 2, i.e. if 2 is heavier -> higher velocity impact on 1
massratio2 = (mass1 << 8) / totalmass;
}
int32_t dv = (int)particles[partIdx2].vx - (int)particles[partIdx1].vx;
int32_t absdv = abs(dv);
int32_t dotProduct = (dx * dv); // is always negative if moving towards each other
uint32_t dx_abs = abs(dx);
if (dotProduct < 0) { // particles are moving towards each other
uint32_t surfacehardness = max(collisionHardness, (int32_t)PS_P_MINSURFACEHARDNESS_1D); // if particles are soft, the impulse must stay above a limit or collisions slip through
// Calculate new velocities after collision note: not using dot product like in 2D as impulse is purely speed depnedent
#if defined(CONFIG_IDF_TARGET_ESP32C3) || defined(ESP8266) // use bitshifts with rounding instead of division (2x faster)
int32_t impulse = ((dv * surfacehardness) + ((dv >> 31) & 0xFF)) >> 8; // note: (v>>31) & 0xFF)) extracts the sign and adds 255 if negative for correct rounding using shifts
#else // division is faster on ESP32, S2 and S3
int32_t impulse = (dv * surfacehardness) / 255;
#endif
particle1.vx += impulse;
particle2.vx -= impulse;
// if one of the particles is fixed, transfer the impulse back so it bounces
if (particle1flags.fixed)
particle2.vx = -particle1.vx;
else if (particle2flags.fixed)
particle1.vx = -particle2.vx;
if (collisionHardness < PS_P_MINSURFACEHARDNESS_1D && (SEGMENT.call & 0x07) == 0) { // if particles are soft, they become 'sticky' i.e. apply some friction
const uint32_t coeff = collisionHardness + (250 - PS_P_MINSURFACEHARDNESS_1D);
uint32_t lookaheadDistance = collisiondistance + absdv; // add look-ahead: if reaching collisiondistance in this frame, collide
if (dx_abs <= lookaheadDistance) {
// if one of the particles is fixed, invert the other particle's velocity and multiply by hardness, also set its position to the edge of the fixed particle
if (particleFlags[partIdx1].fixed) {
particles[partIdx2].vx = -(particles[partIdx2].vx * collisionHardness) / 255;
particles[partIdx2].x = particles[partIdx1].x + (dx < 0 ? -collisiondistance : collisiondistance); // dv < 0 means particle2.x < particle1.x
return;
}
else if (particleFlags[partIdx2].fixed) {
particles[partIdx1].vx = -(particles[partIdx1].vx * collisionHardness) / 255;
particles[partIdx1].x = particles[partIdx2].x + (dx < 0 ? collisiondistance : -collisiondistance);
return;
}
int32_t surfacehardness = max(collisionHardness, (int32_t)PS_P_MINSURFACEHARDNESS_1D); // if particles are soft, the impulse must stay above a limit or collisions slip through
// Calculate new velocities after collision note: not using dot product like in 2D as impulse is purely speed depnedent
#if defined(CONFIG_IDF_TARGET_ESP32C3) || defined(ESP8266) // use bitshifts with rounding instead of division (2x faster)
particle1.vx = ((int32_t)particle1.vx * coeff + (((int32_t)particle1.vx >> 31) & 0xFF)) >> 8; // note: (v>>31) & 0xFF)) extracts the sign and adds 255 if negative for correct rounding using shifts
particle2.vx = ((int32_t)particle2.vx * coeff + (((int32_t)particle2.vx >> 31) & 0xFF)) >> 8;
int32_t impulse = (dv * surfacehardness + ((dv >> 31) & 0xFF)) >> 8; // note: (v>>31) & 0xFF)) extracts the sign and adds 255 if negative for correct rounding using shifts
#else // division is faster on ESP32, S2 and S3
particle1.vx = ((int32_t)particle1.vx * coeff) / 255;
particle2.vx = ((int32_t)particle2.vx * coeff) / 255;
int32_t impulse = (dv * surfacehardness) / 255;
#endif
// if particles are not the same size, use a mass ratio. mass ratio is set to 0 if particles are the same size
if (massratio1) {
int vx1 = (int)particles[partIdx1].vx + ((impulse * massratio1) >> 7); // mass ratio is in fixed point 8bit
int vx2 = (int)particles[partIdx2].vx - ((impulse * massratio2) >> 7);
// limit speeds to max speed (required as a lot of impulse can be transferred from a large to a small particle)
particles[partIdx1].vx = limitSpeed(vx1);
particles[partIdx2].vx = limitSpeed(vx2);
}
else {
particles[partIdx1].vx += impulse;
particles[partIdx2].vx -= impulse;
}
if (collisionHardness < PS_P_MINSURFACEHARDNESS_1D && (SEGMENT.call & 0x07) == 0) { // if particles are soft, they become 'sticky' i.e. apply some friction
const uint32_t coeff = collisionHardness + (250 - PS_P_MINSURFACEHARDNESS_1D);
#if defined(CONFIG_IDF_TARGET_ESP32C3) || defined(ESP8266) // use bitshifts with rounding instead of division (2x faster)
particles[partIdx1].vx = ((int32_t)particles[partIdx1].vx * coeff + (((int32_t)particles[partIdx1].vx >> 31) & 0xFF)) >> 8; // note: (v>>31) & 0xFF)) extracts the sign and adds 255 if negative for correct rounding using shifts
particles[partIdx2].vx = ((int32_t)particles[partIdx2].vx * coeff + (((int32_t)particles[partIdx2].vx >> 31) & 0xFF)) >> 8;
#else // division is faster on ESP32, S2 and S3
particles[partIdx1].vx = ((int32_t)particles[partIdx1].vx * coeff) / 255;
particles[partIdx2].vx = ((int32_t)particles[partIdx2].vx * coeff) / 255;
#endif
}
} else {
return; // not close enough yet
}
}
// particles have volume, push particles apart if they are too close
// note: like in 2D, pushing by a distance makes softer piles collapse, giving particles speed prevents that and looks nicer
if (dx_abs < (collisiondistance - 8) && abs(dv) < 5) { // overlapping and moving slowly
// particles have volume, push particles apart if they are too close
// behaviour is different than in 2D, we need pixel accurate stacking here, push the top particle
// note: like in 2D, pushing by a distance makes softer piles collapse, giving particles speed prevents that and looks nicer
int32_t pushamount = 1;
if (dx < 0) // particle2.x < particle1.x
if (dx_abs < collisiondistance) { // too close, force push particles so they dont collapse
int32_t pushamount = 1 + ((collisiondistance - dx_abs) >> 3); // push by eighth of deviation (plus 1 to push at least a little), note: pushing too much leads to pass-throughs and more flickering
int32_t addspeed = 1;
if (dx < 0) { // particle2.x < particle1.x
pushamount = -pushamount;
particle1.vx -= pushamount;
particle2.vx += pushamount;
if (dx_abs < collisiondistance >> 1) { // too close, force push particles so they dont collapse
pushamount = 1 + ((collisiondistance - dx_abs) >> 3); // note: push amount found by experimentation
if (particle1.x < (maxX >> 1)) { // lower half, push particle with larger x in positive direction
if (dx < 0 && !particle1flags.fixed) { // particle2.x < particle1.x -> push particle 1
particle1.vx++;// += pushamount;
particle1.x += pushamount;
}
else if (!particle2flags.fixed) { // particle1.x < particle2.x -> push particle 2
particle2.vx++;// += pushamount;
particle2.x += pushamount;
}
}
else { // upper half, push particle with smaller x
if (dx < 0 && !particle2flags.fixed) { // particle2.x < particle1.x -> push particle 2
particle2.vx--;// -= pushamount;
particle2.x -= pushamount;
}
else if (!particle1flags.fixed) { // particle1.x < particle2.x -> push particle 1
particle1.vx--;// -= pushamount;
particle1.x -= pushamount;
}
}
addspeed = -addspeed;
}
if (absdv < 4) { // low relative speed, add speed to help with the pushing (less collapsing piles)
particles[partIdx1].vx -= addspeed;
particles[partIdx2].vx += addspeed;
}
// push only one particle to avoid oscillations
bool fairlyrandom = dotProduct & 0x01;
if (fairlyrandom) {
particles[partIdx1].x -= pushamount;
}
else {
particles[partIdx2].x += pushamount;
}
}
}
@@ -1855,24 +1860,6 @@ bool initParticleSystem1D(ParticleSystem1D *&PartSys, const uint32_t requestedso
PartSys = new (SEGENV.data) ParticleSystem1D(SEGMENT.virtualLength(), numparticles, numsources, advanced); // particle system constructor
return true;
}
// blur a 1D buffer, sub-size blurring can be done using start and size
// for speed, 32bit variables are used, make sure to limit them to 8bit (0-255) or result is undefined
// to blur a subset of the buffer, change the size and set start to the desired starting coordinates
void blur1D(uint32_t *colorbuffer, uint32_t size, uint32_t blur, uint32_t start)
{
CRGBW seeppart, carryover;
uint32_t seep = blur >> 1;
carryover = BLACK;
for (uint32_t x = start; x < start + size; x++) {
seeppart = fast_color_scale(colorbuffer[x], seep); // scale it and seep to neighbours
if (x > 0) {
colorbuffer[x-1] = fast_color_scaleAdd(colorbuffer[x-1], seeppart);
colorbuffer[x] = fast_color_scaleAdd(colorbuffer[x], carryover); // is black on first pass
}
carryover = seeppart;
}
}
#endif // WLED_DISABLE_PARTICLESYSTEM1D
#if !(defined(WLED_DISABLE_PARTICLESYSTEM2D) && defined(WLED_DISABLE_PARTICLESYSTEM1D)) // not both disabled