replaced blur-rendering with ellipse rendering

- better size control as size is now exactly mappable to pixels so it can be matched exactly to the collision distance - no more gaps due to collision distance mismatch - much faster: saw up to 30% improvement in FPS - also adjusted some of the FX to make better use of the new rendering
2025-12-06 14:27:15 +01:00
parent cc5b504771
commit 6f6ac066c9
3 changed files with 221 additions and 177 deletions
--- a/wled00/FXparticleSystem.cpp
+++ b/wled00/FXparticleSystem.cpp
@@ -33,7 +33,7 @@ ParticleSystem2D::ParticleSystem2D(uint32_t width, uint32_t height, uint32_t num
  setWallHardness(255); // set default wall hardness to max
  setWallRoughness(0); // smooth walls by default
  setGravity(0); //gravity disabled by default
-  setParticleSize(1); // 2x2 rendering size by default
+  setParticleSize(1); // 2x2 rendering size by default (disables per particle size control by default)
  motionBlur = 0; //no fading by default
  smearBlur = 0; //no smearing by default
  emitIndex = 0;
@@ -58,7 +58,7 @@ void ParticleSystem2D::update(void) {
    applyGravity();

  //update size settings before handling collisions
-  if (advPartSize) {
+  if (advPartSize != nullptr) {
    for (uint32_t i = 0; i < usedParticles; i++) {
      if (updateSize(&advPartProps[i], &advPartSize[i]) == false) { // if particle shrinks to 0 size
        particles[i].ttl = 0; // kill particle
@@ -139,7 +139,6 @@ void ParticleSystem2D::setColorByAge(bool enable) {
 }

 void ParticleSystem2D::setMotionBlur(uint8_t bluramount) {
-  if (particlesize < 2) // only allow motion blurring on default particle sizes or advanced size (cannot combine motion blur with normal blurring used for particlesize, would require another buffer)
    motionBlur = bluramount;
 }

@@ -148,13 +147,13 @@ void ParticleSystem2D::setSmearBlur(uint8_t bluramount) {
 }


-// render size using smearing (see blur function)
+// set global particle size
 void ParticleSystem2D::setParticleSize(uint8_t size) {
  particlesize = size;
  particleHardRadius = PS_P_MINHARDRADIUS; // ~1 pixel
+  perParticleSize = false; // disable per particle size control if global size is set
  if (particlesize > 1) {
-    particleHardRadius = max(particleHardRadius, (uint32_t)particlesize); // radius used for wall collisions & particle collisions
-    motionBlur = 0; // disable motion blur if particle size is set
+    particleHardRadius = PS_P_MINHARDRADIUS + ((particlesize * 52) >> 6); // use 1 pixel + 80% of size for hard radius (slight overlap with boarders so they do not "float" and nicer stacking)
  }
  else if (particlesize == 0)
    particleHardRadius = particleHardRadius >> 1; // single pixel particles have half the radius (i.e. 1/2 pixel)
@@ -194,7 +193,7 @@ int32_t ParticleSystem2D::sprayEmit(const PSsource &emitter) {
      particles[emitIndex].sat = emitter.source.sat;
      particleFlags[emitIndex].collide = emitter.sourceFlags.collide;
      particles[emitIndex].ttl = hw_random16(emitter.minLife, emitter.maxLife);
-      if (advPartProps)
+      if (advPartProps != nullptr)
        advPartProps[emitIndex].size = emitter.size;
      break;
    }
@@ -231,17 +230,14 @@ void ParticleSystem2D::particleMoveUpdate(PSparticle &part, PSparticleFlags &par
    if (options->colorByAge)
      part.hue = min(part.ttl, (uint16_t)255); //set color to ttl

-    int32_t renderradius = PS_P_HALFRADIUS; // used to check out of bounds
+    int32_t renderradius = PS_P_HALFRADIUS - 1 + particlesize; // used to check out of bounds, if its more than half a radius out of bounds, it will render to x = -2/-1 or x=max/max+1 in standard 2x2 rendering
    int32_t newX = part.x + (int32_t)part.vx;
    int32_t newY = part.y + (int32_t)part.vy;
    partFlags.outofbounds = false; // reset out of bounds (in case particle was created outside the matrix and is now moving into view) note: moving this to checks below adds code and is not faster

-    if (advancedproperties) { //using individual particle size?
-      setParticleSize(particlesize); // updates default particleHardRadius
-      if (advancedproperties->size > PS_P_MINHARDRADIUS) {
-        particleHardRadius += (advancedproperties->size - PS_P_MINHARDRADIUS); // update radius
-        renderradius = particleHardRadius;
-      }
+    if (perParticleSize && advancedproperties != nullptr) { // using individual particle size
+      renderradius = PS_P_HALFRADIUS - 1 + advancedproperties->size;
+      particleHardRadius = PS_P_MINHARDRADIUS + ((advancedproperties->size * 52) >> 6); // use 1 pixel + 80% of size for hard radius (slight overlap with boarders so they do not "float")
    }
    // note: if wall collisions are enabled, bounce them before they reach the edge, it looks much nicer if the particle does not go half out of view
    if (options->bounceY) {
@@ -347,7 +343,7 @@ bool ParticleSystem2D::updateSize(PSadvancedParticle *advprops, PSsizeControl *a
    if (newsize > advsize->minsize) {
      newsize -= increment;
      if (newsize <= advsize->minsize) {
-        if (advsize->minsize == 0) 
+        if (advsize->minsize == 0)
          return false; // particle shrunk to zero
        advsize->shrink = false; // disable shrinking
        newsize = advsize->minsize; // limit
@@ -556,7 +552,7 @@ void ParticleSystem2D::pointAttractor(const uint32_t particleindex, PSparticle &
 // warning: do not render out of bounds particles or system will crash! rendering does not check if particle is out of bounds
 // firemode is only used for PS Fire FX
 void ParticleSystem2D::render() {
-  if(framebuffer == nullptr) {
+  if (framebuffer == nullptr) {
    PSPRINTLN(F("PS render: no framebuffer!"));
    return;
  }
@@ -600,33 +596,114 @@ void ParticleSystem2D::render() {
        hsv2rgb(baseHSV, baseRGB.color32); // convert back to RGB
      }
    }
-    if(gammaCorrectCol) brightness = gamma8(brightness); // apply gamma correction, used for gamma-inverted brightness distribution
+    if (gammaCorrectCol) brightness = gamma8(brightness); // apply gamma correction, used for gamma-inverted brightness distribution
    renderParticle(i, brightness, baseRGB, particlesettings.wrapX, particlesettings.wrapY);
  }

-  // apply global size rendering
-  if (particlesize > 1) {
-    uint32_t passes = particlesize / 64 + 1; // number of blur passes, four passes max
-    uint32_t bluramount = particlesize;
-    uint32_t bitshift = 0;
-    for (uint32_t i = 0; i < passes; i++) {
-      if (i == 2) // for the last two passes, use higher amount of blur (results in a nicer brightness gradient with soft edges)
-        bitshift = 1;
-      blur2D(framebuffer, maxXpixel + 1, maxYpixel + 1, bluramount << bitshift, bluramount << bitshift);
-      bluramount -= 64;
-    }
-  }
-
  // apply 2D blur to rendered frame
  if (smearBlur) {
    blur2D(framebuffer, maxXpixel + 1, maxYpixel + 1, smearBlur, smearBlur);
  }
 }

+// render particle as ellipse/circle with linear brightness falloff and sub-pixel precision
+void WLED_O2_ATTR ParticleSystem2D::renderParticleEllipse(const uint32_t particleindex, const uint8_t brightness, const CRGBW& color, const bool wrapX, const bool wrapY) {
+  uint32_t size = particlesize;
+  if (perParticleSize && advPartProps != nullptr) // individual particle size
+    size = advPartProps[particleindex].size;
+
+  // particle position with sub-pixel precision
+  int32_t x_subcenter = particles[particleindex].x;
+  int32_t y_subcenter = particles[particleindex].y;
+
+  // example: for x = 128, a paticle is exacly between pixel 1 and 2, with a radius of 2 pixels, we draw pixels 0-3
+  // integer center jumps when x = 127 -> pixel 1 goes to x = 128 -> pixel 2
+  // when calculating the dx, we need to take this into account: at x = 128 the x offset is 1, the pixel center is at pixel 2:
+  // for pixel 1, dx = 1 * PS_P_RADIUS - 128 = -64 but the center of the pixel is actually only -32 from the particle center so need to add half a radius:
+  // dx = pixel_x * PS_P_RADIUS - x_subcenter + PS_P_HALFRADIUS
+
+  // sub-pixel offset (0-63)
+  int32_t x_offset = x_subcenter & (PS_P_RADIUS - 1); // same as modulo PS_P_RADIUS but faster
+  int32_t y_offset = y_subcenter & (PS_P_RADIUS - 1);
+  // integer pixel position, this is rounded down
+  int32_t x_center = (x_subcenter) >> PS_P_RADIUS_SHIFT;
+  int32_t y_center = (y_subcenter) >> PS_P_RADIUS_SHIFT;
+
+  // ellipse radii in pixels
+  uint32_t xsize = size;
+  uint32_t ysize = size;
+  if (advPartSize != nullptr && advPartSize[particleindex].asymmetry > 0) {
+    getParticleXYsize(&advPartProps[particleindex], &advPartSize[particleindex], xsize, ysize);
+  }
+
+  int32_t rx_subpixel = xsize+65; // size = 1 means radius of just over 1 pixel
+  int32_t ry_subpixel = ysize+65; // size = 255 is radius of 5, so add 65 -> 65+255=320, 320>>6=5 pixels
+
+  // rendering bounding box in pixels
+  int32_t rx_pixels = (rx_subpixel >> PS_P_RADIUS_SHIFT);
+  int32_t ry_pixels = (ry_subpixel >> PS_P_RADIUS_SHIFT);
+
+  int32_t x_min = x_center - rx_pixels;
+  int32_t x_max = x_center + rx_pixels;
+  int32_t y_min = y_center - ry_pixels;
+  int32_t y_max = y_center + ry_pixels;
+
+  // cache for speed
+  uint32_t matrixX = maxXpixel + 1;
+  uint32_t matrixY = maxYpixel + 1;
+  uint32_t rx_sq = rx_subpixel * rx_subpixel;
+  uint32_t ry_sq = ry_subpixel * ry_subpixel;
+
+  // iterate over bounding box and render each pixel
+  for (int32_t py = y_min; py <= y_max; py++) {
+    for (int32_t px = x_min; px <= x_max; px++) {
+      // distance from particle center, explanation see above
+      int32_t dx_subpixel = (px << PS_P_RADIUS_SHIFT) - x_subcenter + PS_P_HALFRADIUS;
+      int32_t dy_subpixel = (py << PS_P_RADIUS_SHIFT) - y_subcenter + PS_P_HALFRADIUS;
+
+      // calculate brightness based on squared distance to ellipse center
+      uint8_t pixel_brightness = calculateEllipseBrightness(dx_subpixel, dy_subpixel, rx_sq, ry_sq, brightness);
+
+      if (pixel_brightness == 0) continue; // Skip fully transparent pixels
+
+      // apply inverse gamma correction if needed, if this is skipped, particles flicker due to changing total brightness
+      if (gammaCorrectCol) {
+        pixel_brightness = gamma8inv(pixel_brightness); // invert brigthess so brightness distribution is linear after gamma correction
+      }
+
+      // Handle wrapping and bounds
+      int32_t render_x = px;
+      int32_t render_y = py;
+
+      // Check bounds and apply wrapping
+      if (render_x < 0) {
+        if (!wrapX) continue;
+        render_x += matrixX;
+      } else if (render_x > maxXpixel) {
+        if (!wrapX) continue;
+        render_x -= matrixX;
+      }
+
+      if (render_y < 0) {
+        if (!wrapY) continue;
+        render_y += matrixY;
+      } else if (render_y > maxYpixel) {
+        if (!wrapY) continue;
+        render_y -= matrixY;
+      }
+      // Render pixel
+      uint32_t idx = render_x + (maxYpixel - render_y) * matrixX; // flip y coordinate (0,0 is bottom left in PS but top left in framebuffer)
+      framebuffer[idx] = fast_color_scaleAdd(framebuffer[idx], color, pixel_brightness);
+    }
+  }
+}
+
+
 // calculate pixel positions and brightness distribution and render the particle to local buffer or global buffer
 void WLED_O2_ATTR ParticleSystem2D::renderParticle(const uint32_t particleindex, const uint8_t brightness, const CRGBW& color, const bool wrapX, const bool wrapY) {
  uint32_t size = particlesize;
-  if (advPartProps && advPartProps[particleindex].size > 0) // use advanced size properties (0 means use global size including single pixel rendering)
+
+  if (perParticleSize && advPartProps != nullptr) // use advanced size properties
    size = advPartProps[particleindex].size;

  if (size == 0) { // single pixel rendering
@@ -638,6 +715,13 @@ void WLED_O2_ATTR ParticleSystem2D::renderParticle(const uint32_t particleindex,
    }
    return;
  }
+
+  if (size > 1) { // size > 1: render as ellipse
+    renderParticleEllipse(particleindex, brightness, color, wrapX, wrapY); // larger size rendering
+    return;
+  }
+
+  // size = 1: standard 2x2 pixel rendering using bilinear interpolation (20% faster than ellipse rendering)
  uint8_t pxlbrightness[4]; // brightness values for the four pixels representing a particle
  struct {
    int32_t x,y;
@@ -645,6 +729,7 @@ void WLED_O2_ATTR ParticleSystem2D::renderParticle(const uint32_t particleindex,
  bool pixelvalid[4] = {true, true, true, true}; // is set to false if pixel is out of bounds

  // add half a radius as the rendering algorithm always starts at the bottom left, this leaves things positive, so shifts can be used, then shift coordinate by a full pixel (x--/y-- below)
+  // if sub-pixel position is 0-PS_P_HALFRADIUS it will render to x>>PS_P_RADIUS_SHIFT as the right pixel
  int32_t xoffset = particles[particleindex].x + PS_P_HALFRADIUS;
  int32_t yoffset = particles[particleindex].y + PS_P_HALFRADIUS;
  int32_t dx = xoffset & (PS_P_RADIUS - 1); // relativ particle position in subpixel space
@@ -662,7 +747,7 @@ void WLED_O2_ATTR ParticleSystem2D::renderParticle(const uint32_t particleindex,

  // calculate brightness values for all four pixels representing a particle using linear interpolation
  // could check for out of frame pixels here but calculating them is faster (very few are out)
-  // precalculate values for speed optimization
+  // precalculate values for speed optimization. Note: rounding is not perfect but close enough, some inaccuracy is traded for speed
  int32_t precal1 = (int32_t)PS_P_RADIUS - dx;
  int32_t precal2 = ((int32_t)PS_P_RADIUS - dy) * brightness;
  int32_t precal3 = dy * brightness;
@@ -674,118 +759,48 @@ void WLED_O2_ATTR ParticleSystem2D::renderParticle(const uint32_t particleindex,
  // - scale brigthness with gamma correction (done in render())
  // - apply inverse gamma correction to brightness values
  // - gamma is applied again in show() -> the resulting brightness distribution is linear but gamma corrected in total
-  if(gammaCorrectCol) {
+  if (gammaCorrectCol) {
    pxlbrightness[0] = gamma8inv(pxlbrightness[0]); // use look-up-table for invers gamma
    pxlbrightness[1] = gamma8inv(pxlbrightness[1]);
    pxlbrightness[2] = gamma8inv(pxlbrightness[2]);
    pxlbrightness[3] = gamma8inv(pxlbrightness[3]);
  }

-  if (advPartProps && advPartProps[particleindex].size > 1) { //render particle to a bigger size
-    uint32_t renderbuffer[100]; // 10x10 pixel buffer
-    memset(renderbuffer, 0, sizeof(renderbuffer)); // clear buffer
-    //particle size to pixels: < 64 is 4x4, < 128 is 6x6, < 192 is 8x8, bigger is 10x10
-    //first, render the pixel to the center of the renderbuffer, then apply 2D blurring
-    renderbuffer[4 + (4 * 10)] = fast_color_scaleAdd(renderbuffer[4 + (4 * 10)], color, pxlbrightness[0]); // order is: bottom left, bottom right, top right, top left
-    renderbuffer[5 + (4 * 10)] = fast_color_scaleAdd(renderbuffer[5 + (4 * 10)], color, pxlbrightness[1]);
-    renderbuffer[5 + (5 * 10)] = fast_color_scaleAdd(renderbuffer[5 + (5 * 10)], color, pxlbrightness[2]);
-    renderbuffer[4 + (5 * 10)] = fast_color_scaleAdd(renderbuffer[4 + (5 * 10)], color, pxlbrightness[3]);
-    uint32_t rendersize = 2; // initialize render size, minimum is 4x4 pixels, it is incremented int he loop below to start with 4
-    uint32_t offset = 4; // offset to zero coordinate to write/read data in renderbuffer (actually needs to be 3, is decremented in the loop below)
-    uint32_t maxsize = advPartProps[particleindex].size;
-    uint32_t xsize = maxsize;
-    uint32_t ysize = maxsize;
-    if (advPartSize) { // use advanced size control
-      if (advPartSize[particleindex].asymmetry > 0)
-        getParticleXYsize(&advPartProps[particleindex], &advPartSize[particleindex], xsize, ysize);
-      maxsize = (xsize > ysize) ? xsize : ysize; // choose the bigger of the two
+  // standard rendering (2x2 pixels)
+  // check for out of frame pixels and wrap them if required: x,y is bottom left pixel coordinate of the particle
+  if (x < 0) { // left pixels out of frame
+    if (wrapX) { // wrap x to the other side if required
+      pixco[0].x = pixco[3].x = maxXpixel;
+    } else {
+      pixelvalid[0] = pixelvalid[3] = false; // out of bounds
    }
-    maxsize = maxsize/64 + 1; // number of blur passes depends on maxsize, four passes max
-    uint32_t bitshift = 0;
-    for (uint32_t i = 0; i < maxsize; i++) {
-      if (i == 2) //for the last two passes, use higher amount of blur (results in a nicer brightness gradient with soft edges)
-        bitshift = 1;
-      rendersize += 2;
-      offset--;
-      blur2D(renderbuffer, rendersize, rendersize, xsize << bitshift, ysize << bitshift, offset, offset, true);
-      xsize = xsize > 64 ? xsize - 64 : 0;
-      ysize = ysize > 64 ? ysize - 64 : 0;
+  }
+  else if (pixco[1].x > (int32_t)maxXpixel) { // right pixels, only has to be checked if left pixel is in frame
+    if (wrapX) { // wrap y to the other side if required
+      pixco[1].x = pixco[2].x = 0;
+    } else {
+      pixelvalid[1] = pixelvalid[2] = false; // out of bounds
    }
+  }

-    // calculate origin coordinates to render the particle to in the framebuffer
-    uint32_t xfb_orig = x - (rendersize>>1) + 1 - offset;
-    uint32_t yfb_orig = y - (rendersize>>1) + 1 - offset;
-    uint32_t xfb, yfb; // coordinates in frame buffer to write to note: by making this uint, only overflow has to be checked (spits a warning though)
-
-    //note on y-axis flip: WLED has the y-axis defined from top to bottom, so y coordinates must be flipped. doing this in the buffer xfer clashes with 1D/2D combined rendering, which does not invert y
-    //                     transferring the 1D buffer in inverted fashion will flip the x-axis of overlaid 2D FX, so the y-axis flip is done here so the buffer is flipped in y, giving correct results
-
-    // transfer particle renderbuffer to framebuffer
-    for (uint32_t xrb = offset; xrb < rendersize + offset; xrb++) {
-      xfb = xfb_orig + xrb;
-      if (xfb > (uint32_t)maxXpixel) {
-      if (wrapX) { // wrap x to the other side if required
-        if (xfb > (uint32_t)maxXpixel << 1) // xfb is "negative", handle it
-          xfb = (maxXpixel + 1) + (int32_t)xfb; // this always overflows to within bounds
-        else
-          xfb = xfb % (maxXpixel + 1); // note: without the above "negative" check, this works only for powers of 2
-      }
-      else
-        continue;
-      }
-
-      for (uint32_t yrb = offset; yrb < rendersize + offset; yrb++) {
-        yfb = yfb_orig + yrb;
-        if (yfb > (uint32_t)maxYpixel) {
-          if (wrapY) {// wrap y to the other side if required
-            if (yfb > (uint32_t)maxYpixel << 1) // yfb is "negative", handle it
-              yfb = (maxYpixel + 1) + (int32_t)yfb; // this always overflows to within bounds
-            else
-              yfb = yfb % (maxYpixel + 1); // note: without the above "negative" check, this works only for powers of 2
-          }
-          else
-          continue;
-        }
-        uint32_t idx = xfb + (maxYpixel - yfb) * (maxXpixel + 1); // flip y coordinate (0,0 is bottom left in PS but top left in framebuffer)
-        framebuffer[idx] = fast_color_scaleAdd(framebuffer[idx], renderbuffer[xrb + yrb * 10]);
-      }
+  if (y < 0) { // bottom pixels out of frame
+    if (wrapY) { // wrap y to the other side if required
+      pixco[0].y = pixco[1].y = maxYpixel;
+    } else {
+      pixelvalid[0] = pixelvalid[1] = false; // out of bounds
    }
-    } else { // standard rendering (2x2 pixels)
-    // check for out of frame pixels and wrap them if required: x,y is bottom left pixel coordinate of the particle
-    if (x < 0) { // left pixels out of frame
-      if (wrapX) { // wrap x to the other side if required
-        pixco[0].x = pixco[3].x = maxXpixel;
-      } else {
-        pixelvalid[0] = pixelvalid[3] = false; // out of bounds
-      }
+  }
+  else if (pixco[2].y > maxYpixel) { // top pixels
+    if (wrapY) { // wrap y to the other side if required
+      pixco[2].y = pixco[3].y = 0;
+    } else {
+      pixelvalid[2] = pixelvalid[3] = false; // out of bounds
    }
-    else if (pixco[1].x > (int32_t)maxXpixel) { // right pixels, only has to be checked if left pixel is in frame
-      if (wrapX) { // wrap y to the other side if required
-        pixco[1].x = pixco[2].x = 0;
-      } else {
-        pixelvalid[1] = pixelvalid[2] = false; // out of bounds
-      }
-    }
-
-    if (y < 0) { // bottom pixels out of frame
-      if (wrapY) { // wrap y to the other side if required
-        pixco[0].y = pixco[1].y = maxYpixel;
-      } else {
-        pixelvalid[0] = pixelvalid[1] = false; // out of bounds
-      }
-    }
-    else if (pixco[2].y > maxYpixel) { // top pixels
-      if (wrapY) { // wrap y to the other side if required
-        pixco[2].y = pixco[3].y = 0;
-      } else {
-        pixelvalid[2] = pixelvalid[3] = false; // out of bounds
-      }
-    }
-    for (uint32_t i = 0; i < 4; i++) {
-      if (pixelvalid[i]) {
-        uint32_t idx = pixco[i].x + (maxYpixel - pixco[i].y) * (maxXpixel + 1); // flip y coordinate (0,0 is bottom left in PS but top left in framebuffer)
-        framebuffer[idx] = fast_color_scaleAdd(framebuffer[idx], color, pxlbrightness[i]); // order is: bottom left, bottom right, top right, top left
-      }
+  }
+  for (uint32_t i = 0; i < 4; i++) {
+    if (pixelvalid[i]) {
+      uint32_t idx = pixco[i].x + (maxYpixel - pixco[i].y) * (maxXpixel + 1); // flip y coordinate (0,0 is bottom left in PS but top left in framebuffer)
+      framebuffer[idx] = fast_color_scaleAdd(framebuffer[idx], color, pxlbrightness[i]); // order is: bottom left, bottom right, top right, top left
    }
  }
 }
@@ -795,14 +810,15 @@ void WLED_O2_ATTR ParticleSystem2D::renderParticle(const uint32_t particleindex,
 // for code simplicity, no y slicing is done, making very tall matrix configurations less efficient
 // note: also tested adding y slicing, it gives diminishing returns, some FX even get slower. FX not using gravity would benefit with a 10% FPS improvement
 void ParticleSystem2D::handleCollisions() {
+  if (perParticleSize && advPartProps != nullptr)
+    particleHardRadius = 255; // max radius for collision detection if using per-particle size TODO: could optimize by fetching max size from advPartProps
+
  uint32_t collDistSq = particleHardRadius << 1; // distance is double the radius note: particleHardRadius is updated when setting global particle size
  collDistSq = collDistSq * collDistSq; // square it for faster comparison (square is one operation)
  // note: partices are binned in x-axis, assumption is that no more than half of the particles are in the same bin
  // if they are, collisionStartIdx is increased so each particle collides at least every second frame (which still gives decent collisions)
  constexpr int BIN_WIDTH = 6 * PS_P_RADIUS; // width of a bin in sub-pixels
  int32_t overlap = particleHardRadius << 1; // overlap bins to include edge particles to neighbouring bins
-  if (advPartProps) //may be using individual particle size
-    overlap += 512; // add 2 * max radius (approximately)
  uint32_t maxBinParticles = max((uint32_t)50, (usedParticles + 1) / 2); // assume no more than half of the particles are in the same bin, do not bin small amounts of particles
  uint32_t numBins = (maxX + (BIN_WIDTH - 1)) / BIN_WIDTH; // number of bins in x direction
  uint16_t binIndices[maxBinParticles]; // creat array on stack for indices, 2kB max for 1024 particles (ESP32_MAXPARTICLES/2)
@@ -820,7 +836,7 @@ void ParticleSystem2D::handleCollisions() {
    for (uint32_t i = 0; i < usedParticles; i++) {
      if (particles[pidx].ttl > 0) { // is alive
        if (particles[pidx].x >= binStart && particles[pidx].x <= binEnd) { // >= and <= to include particles on the edge of the bin (overlap to ensure boarder particles collide with adjacent bins)
-          if(particleFlags[pidx].outofbounds == 0 && particleFlags[pidx].collide) { // particle is in frame and does collide note: checking flags is quite slow and usually these are set, so faster to check here
+          if (particleFlags[pidx].outofbounds == 0 && particleFlags[pidx].collide) { // particle is in frame and does collide note: checking flags is quite slow and usually these are set, so faster to check here
            if (binParticleCount >= maxBinParticles) { // bin is full, more particles in this bin so do the rest next frame
              nextFrameStartIdx = pidx; // bin overflow can only happen once as bin size is at least half of the particles (or half +1)
              break;
@@ -837,9 +853,8 @@ void ParticleSystem2D::handleCollisions() {
      uint32_t idx_i = binIndices[i];
      for (uint32_t j = i + 1; j < binParticleCount; j++) { // check against higher number particles
        uint32_t idx_j = binIndices[j];
-        if (advPartProps) { //may be using individual particle size
-          setParticleSize(particlesize); // updates base particleHardRadius
-          collDistSq = (particleHardRadius << 1) + (((uint32_t)advPartProps[idx_i].size + (uint32_t)advPartProps[idx_j].size) >> 1); // collision distance note: not 100% clear why the >> 1 is needed, but it is.
+        if (perParticleSize && advPartProps != nullptr) { // using individual particle size
+          collDistSq = (PS_P_MINHARDRADIUS << 1) + ((((uint32_t)advPartProps[idx_i].size + (uint32_t)advPartProps[idx_j].size) * 52) >> 6); // collision distance, use 80% of size for tighter stacking (slight overlap)
          collDistSq = collDistSq * collDistSq; // square it for faster comparison
        }
        int32_t dx = (particles[idx_j].x + particles[idx_j].vx) - (particles[idx_i].x + particles[idx_i].vx); // distance with lookahead
@@ -1103,6 +1118,8 @@ bool initParticleSystem2D(ParticleSystem2D *&PartSys, uint32_t requestedsources,
  uint32_t cols = SEGMENT.virtualWidth();
  uint32_t rows = SEGMENT.virtualHeight();
  uint32_t pixels = cols * rows;
+  if (sizecontrol)
+    advanced = true; // size control needs advanced properties, prevent wrong usage

  uint32_t numparticles = calculateNumberOfParticles2D(pixels, advanced, sizecontrol);
  PSPRINT(" segmentsize:" + String(cols) + " x " + String(rows));
@@ -1418,7 +1435,7 @@ void ParticleSystem1D::applyFriction(int32_t coefficient) {
 // if wrap is set, particles half out of bounds are rendered to the other side of the matrix
 // warning: do not render out of bounds particles or system will crash! rendering does not check if particle is out of bounds
 void ParticleSystem1D::render() {
-  if(framebuffer == nullptr) {
+  if (framebuffer == nullptr) {
    PSPRINTLN(F("PS render: no framebuffer!"));
    return;
  }
@@ -1447,7 +1464,7 @@ void ParticleSystem1D::render() {
    brightness = min(particles[i].ttl << 1, (int)255);
    baseRGB = ColorFromPaletteWLED(SEGPALETTE, particles[i].hue, 255, blend);

-    if (advPartProps) { //saturation is advanced property in 1D system
+    if (advPartProps != nullptr) { //saturation is advanced property in 1D system
      if (advPartProps[i].sat < 255) {
        CHSV32 baseHSV;
        rgb2hsv(baseRGB.color32, baseHSV); // convert to HSV
@@ -1455,7 +1472,7 @@ void ParticleSystem1D::render() {
        hsv2rgb(baseHSV, baseRGB.color32); // convert back to RGB
      }
    }
-    if(gammaCorrectCol) brightness = gamma8(brightness); // apply gamma correction, used for gamma-inverted brightness distribution
+    if (gammaCorrectCol) brightness = gamma8(brightness); // apply gamma correction, used for gamma-inverted brightness distribution
    renderParticle(i, brightness, baseRGB, particlesettings.wrap);
  }
  // apply smear-blur to rendered frame
@@ -1472,7 +1489,7 @@ void ParticleSystem1D::render() {
  }
 #ifndef WLED_DISABLE_2D
  // transfer local buffer to segment if using 1D->2D mapping
-  if(SEGMENT.is2D() && SEGMENT.map1D2D) {
+  if (SEGMENT.is2D() && SEGMENT.map1D2D) {
    for (int x = 0; x <= maxXpixel; x++) {
    //for (int x = 0; x < SEGMENT.vLength(); x++) {
      SEGMENT.setPixelColor(x, framebuffer[x]); // this applies the mapping
@@ -1484,7 +1501,7 @@ void ParticleSystem1D::render() {
 // calculate pixel positions and brightness distribution and render the particle to local buffer or global buffer
 void WLED_O2_ATTR ParticleSystem1D::renderParticle(const uint32_t particleindex, const uint8_t brightness, const CRGBW &color, const bool wrap) {
  uint32_t size = particlesize;
-  if (advPartProps) // use advanced size properties (1D system has no large size global rendering TODO: add large global rendering?)
+  if (advPartProps != nullptr) // use advanced size properties (1D system has no large size global rendering TODO: add large global rendering?)
    size = advPartProps[particleindex].size;

  if (size == 0) { //single pixel particle, can be out of bounds as oob checking is made for 2-pixel particles (and updating it uses more code)
@@ -1516,12 +1533,12 @@ void WLED_O2_ATTR ParticleSystem1D::renderParticle(const uint32_t particleindex,
  // - scale brigthness with gamma correction (done in render())
  // - apply inverse gamma correction to brightness values
  // - gamma is applied again in show() -> the resulting brightness distribution is linear but gamma corrected in total
-  if(gammaCorrectCol) {
+  if (gammaCorrectCol) {
    pxlbrightness[0] = gamma8inv(pxlbrightness[0]); // use look-up-table for invers gamma
    pxlbrightness[1] = gamma8inv(pxlbrightness[1]);
  }
  // check if particle has advanced size properties and buffer is available
-  if (advPartProps && advPartProps[particleindex].size > 1) {
+  if (advPartProps != nullptr && advPartProps[particleindex].size > 1) {
    uint32_t renderbuffer[10]; // 10 pixel buffer
    memset(renderbuffer, 0, sizeof(renderbuffer)); // clear buffer
    //render particle to a bigger size
@@ -1596,7 +1613,7 @@ void ParticleSystem1D::handleCollisions() {
  // if they are, collisionStartIdx is increased so each particle collides at least every second frame (which still gives decent collisions)
  constexpr int BIN_WIDTH = 32 * PS_P_RADIUS_1D; // width of each bin, a compromise between speed and accuracy (larger bins are faster but collapse more)
  int32_t overlap = particleHardRadius << 1; // overlap bins to include edge particles to neighbouring bins
-  if (advPartProps) //may be using individual particle size
+  if (advPartProps != nullptr) //may be using individual particle size
    overlap += 256; // add 2 * max radius (approximately)
  uint32_t maxBinParticles = max((uint32_t)50, (usedParticles + 1) / 4); // do not bin small amounts, limit max to 1/4 of particles
  uint32_t numBins = (maxX + (BIN_WIDTH - 1)) / BIN_WIDTH; // calculate number of bins
@@ -1613,7 +1630,7 @@ void ParticleSystem1D::handleCollisions() {
    for (uint32_t i = 0; i < usedParticles; i++) {
      if (particles[pidx].ttl > 0) { // alivee
        if (particles[pidx].x >= binStart && particles[pidx].x <= binEnd) { // >= and <= to include particles on the edge of the bin (overlap to ensure boarder particles collide with adjacent bins)
-          if(particleFlags[pidx].outofbounds == 0 && particleFlags[pidx].collide) { // particle is in frame and does collide note: checking flags is quite slow and usually these are set, so faster to check here
+          if (particleFlags[pidx].outofbounds == 0 && particleFlags[pidx].collide) { // particle is in frame and does collide note: checking flags is quite slow and usually these are set, so faster to check here
            if (binParticleCount >= maxBinParticles) { // bin is full, more particles in this bin so do the rest next frame
              nextFrameStartIdx = pidx; // bin overflow can only happen once as bin size is at least half of the particles (or half +1)
              break;
@@ -1630,7 +1647,7 @@ void ParticleSystem1D::handleCollisions() {
      uint32_t idx_i = binIndices[i];
      for (uint32_t j = i + 1; j < binParticleCount; j++) { // check against higher number particles
        uint32_t idx_j = binIndices[j];
-        if (advPartProps) { // use advanced size properties
+        if (advPartProps != nullptr) { // use advanced size properties
          collisiondistance = (PS_P_MINHARDRADIUS_1D << particlesize) + ((advPartProps[idx_i].size + advPartProps[idx_j].size) >> 1);
        }
        int32_t dx = (particles[idx_j].x + particles[idx_j].vx) - (particles[idx_i].x + particles[idx_i].vx); // distance between particles with lookahead
@@ -1735,7 +1752,7 @@ void ParticleSystem1D::updatePSpointers(bool isadvanced) {
  sources = reinterpret_cast<PSsource1D *>(particleFlags + numParticles); // pointer to source(s)
  PSdataEnd = reinterpret_cast<uint8_t *>(sources + numSources);   // pointer to first available byte after the PS for FX additional data (already aligned to 4 byte boundary)
 #ifndef WLED_DISABLE_2D
-  if(SEGMENT.is2D() && SEGMENT.map1D2D) {
+  if (SEGMENT.is2D() && SEGMENT.map1D2D) {
    framebuffer = reinterpret_cast<uint32_t *>(sources + numSources); // use local framebuffer for 1D->2D mapping
    PSdataEnd = reinterpret_cast<uint8_t *>(framebuffer + SEGMENT.maxMappingLength()); // pointer to first available byte after the PS for FX additional data (still aligned to 4 byte boundary)
  }
@@ -1790,7 +1807,7 @@ bool allocateParticleSystemMemory1D(const uint32_t numparticles, const uint32_t
  requiredmemory += sizeof(PSparticle1D) * numparticles;
  requiredmemory += sizeof(PSsource1D) * numsources;
 #ifndef WLED_DISABLE_2D
-  if(SEGMENT.is2D())
+  if (SEGMENT.is2D())
    requiredmemory += sizeof(uint32_t) * SEGMENT.maxMappingLength(); // need local buffer for mapped rendering
 #endif
  requiredmemory += additionalbytes;