qmk_firmware icon indicating copy to clipboard operation
qmk_firmware copied to clipboard

[Bug] RP2040 tentative fix for XIP cache misses and subsequent lockups

Open KarlK90 opened this issue 3 years ago • 6 comments

Description

Due to unpredictable spikes in execution when the XIP cache is missed and code / data has to be fetched from the external flash the virtual timers in ChibiOS could lockup. Until a proposed patch is merged into ChibiOS and lands in QMK we switch to the classic periodic tick implementation which is immune to the spike in the critical execution path.

Note: This change affects how short we could possibly sleep by yielding the current thread, but as we already use busy waiting for very short sleeps this change has no perceivable effect for us.

Types of Changes

  • [x] Core
  • [x] Bugfix
  • [ ] New feature
  • [ ] Enhancement/optimization
  • [ ] Keyboard (addition or update)
  • [ ] Keymap/layout/userspace (addition or update)
  • [ ] Documentation

Issues Fixed or Closed by This PR

Checklist

  • [x] My code follows the code style of this project: C, Python
  • [x] I have read the PR Checklist document and have made the appropriate changes.
  • [ ] My change requires a change to the documentation.
  • [ ] I have updated the documentation accordingly.
  • [x] I have read the CONTRIBUTING document.
  • [ ] I have added tests to cover my changes.
  • [x] I have tested the changes and verified that they work and don't break anything (as well as I can manage).

KarlK90 avatar Sep 11 '22 16:09 KarlK90

CC: @xyzz adjusted the values to your findings and added a safety margin

KarlK90 avatar Sep 11 '22 16:09 KarlK90

alternative:

diff --git a/platforms/chibios/boards/GENERIC_PROMICRO_RP2040/configs/chconf.h b/platforms/chibios/boards/GENERIC_PROMICRO_RP2040/configs/chconf.h
index d53f57edd9..eae6dcc529 100644
--- a/platforms/chibios/boards/GENERIC_PROMICRO_RP2040/configs/chconf.h
+++ b/platforms/chibios/boards/GENERIC_PROMICRO_RP2040/configs/chconf.h
@@ -5,9 +5,13 @@
 
 #define CH_CFG_SMP_MODE                     TRUE
 #define CH_CFG_ST_RESOLUTION                32
-#define CH_CFG_ST_FREQUENCY                 1000000
+#define CH_CFG_ST_FREQUENCY                 10000
 #define CH_CFG_INTERVALS_SIZE               32
 #define CH_CFG_TIME_TYPES_SIZE              32
-#define CH_CFG_ST_TIMEDELTA                 20
+#define CH_CFG_ST_TIMEDELTA                 0
+
+/* Workaround a bug in chibios where port_timer_enable is not defined for RP2040 in tick mode */
+void stBind(void);
+#define port_timer_enable(oip) stBind()
 
 #include_next <chconf.h>
diff --git a/platforms/chibios/boards/GENERIC_RP_RP2040/configs/chconf.h b/platforms/chibios/boards/GENERIC_RP_RP2040/configs/chconf.h
index d53f57edd9..eae6dcc529 100644
--- a/platforms/chibios/boards/GENERIC_RP_RP2040/configs/chconf.h
+++ b/platforms/chibios/boards/GENERIC_RP_RP2040/configs/chconf.h
@@ -5,9 +5,13 @@
 
 #define CH_CFG_SMP_MODE                     TRUE
 #define CH_CFG_ST_RESOLUTION                32
-#define CH_CFG_ST_FREQUENCY                 1000000
+#define CH_CFG_ST_FREQUENCY                 10000
 #define CH_CFG_INTERVALS_SIZE               32
 #define CH_CFG_TIME_TYPES_SIZE              32
-#define CH_CFG_ST_TIMEDELTA                 20
+#define CH_CFG_ST_TIMEDELTA                 0
+
+/* Workaround a bug in chibios where port_timer_enable is not defined for RP2040 in tick mode */
+void stBind(void);
+#define port_timer_enable(oip) stBind()
 
 #include_next <chconf.h>
diff --git a/platforms/chibios/boards/QMK_PM2040/configs/chconf.h b/platforms/chibios/boards/QMK_PM2040/configs/chconf.h
index d53f57edd9..eae6dcc529 100644
--- a/platforms/chibios/boards/QMK_PM2040/configs/chconf.h
+++ b/platforms/chibios/boards/QMK_PM2040/configs/chconf.h
@@ -5,9 +5,13 @@
 
 #define CH_CFG_SMP_MODE                     TRUE
 #define CH_CFG_ST_RESOLUTION                32
-#define CH_CFG_ST_FREQUENCY                 1000000
+#define CH_CFG_ST_FREQUENCY                 10000
 #define CH_CFG_INTERVALS_SIZE               32
 #define CH_CFG_TIME_TYPES_SIZE              32
-#define CH_CFG_ST_TIMEDELTA                 20
+#define CH_CFG_ST_TIMEDELTA                 0
+
+/* Workaround a bug in chibios where port_timer_enable is not defined for RP2040 in tick mode */
+void stBind(void);
+#define port_timer_enable(oip) stBind()
 
 #include_next <chconf.h>

xyzz avatar Sep 12 '22 00:09 xyzz

@xyzz thanks, I'll give it a try this afternoon.

KarlK90 avatar Sep 12 '22 07:09 KarlK90

You'll want to find another location for the non-standard mods to chconf.h -- they'll get wiped out next ChibiOS update.

tzarc avatar Sep 12 '22 07:09 tzarc

That is actually fine as 21.11.3 will contain a fix for this issue.

KarlK90 avatar Sep 12 '22 08:09 KarlK90

#include "quantum.h"
#include <lib/lib8tion/lib8tion.h>
#include "pico/bootrom.h"

volatile uint8_t* flash_start = 0x10000000UL;

static THD_WORKING_AREA(waXIPTortureThread, 512);
static THD_FUNCTION(XIPTortureThread, arg) {
    chRegSetThreadName("XIP torture thread");
    rom_flash_flush_cache_fn flash_flush_cache = (rom_flash_flush_cache_fn)rom_func_lookup_inline(ROM_FUNC_FLASH_FLUSH_CACHE);

    while (true) {
        chSysLock();
        flash_flush_cache();
        chSysUnlock();
        uint8_t end = random8();
        for (int i = 0; i < end; i++) {
            (void)*(flash_start + random16() * 30);
        }
        eeconfig_update_user(random16());
        chThdSleepMicroseconds(200);
    }
}

static THD_WORKING_AREA(waXIPTortureThread2, 512);
static THD_FUNCTION(XIPTortureThread2, arg) {
    chRegSetThreadName("XIP torture thread 2");
    rom_flash_flush_cache_fn flash_flush_cache = (rom_flash_flush_cache_fn)rom_func_lookup_inline(ROM_FUNC_FLASH_FLUSH_CACHE);

    while (true) {
        chSysLock();
        flash_flush_cache();
        chSysUnlock();
        eeconfig_update_user(random16());
        uint8_t end = random8();
        for (int i = 0; i < end; i++) {
            (void)*(flash_start + random16() * 30);
        }
        chThdSleepMicroseconds(100);
    }
}

void housekeeping_task_user(void) {
    chThdSleepMicroseconds(40);
}

void keyboard_post_init_user() {
    chThdCreateStatic(waXIPTortureThread, sizeof(waXIPTortureThread), HIGHPRIO - 1, XIPTortureThread, NULL);
    chThdCreateStatic(waXIPTortureThread2, sizeof(waXIPTortureThread2), HIGHPRIO, XIPTortureThread2, NULL);
}
SRC += $(LIB_PATH)/lib8tion/lib8tion.c

I couldn't reproduce the lockups on current master with this torture test - which might be just plain wrong in design... Performance with the classic periodic tick is totally fine for split keebs. So I would say that this workaround is the best bet so far.

KarlK90 avatar Sep 13 '22 20:09 KarlK90

@xyzz the periodic tick actually uncovered a problem in the i2c driver that will be fixed by https://github.com/ChibiOS/ChibiOS-Contrib/pull/335

KarlK90 avatar Sep 17 '22 14:09 KarlK90

As the virtual timer bug has been backported to ChibiOS 21.11.2 I'll close this PR and open an ChibiOS update PR instead.

KarlK90 avatar Sep 18 '22 19:09 KarlK90