The PMLL
https://www.authorea.com/users/856117/articles/1297819-the-persistent-memory-logic-loop-from-proposal-design-to-formal-proof-of-p-np-using-the-pmll-algorithm
/* pmll.h — Persistent-Memory Logic Loop (CPU reference) Copyright (c) 2025 Dr. Josef Kurk Edwards MIT License (same as llm.c) -------------------------------------------------------------------- */ #pragma once #include <stddef.h>
#ifndef MAX_MEM_T #define MAX_MEM_T 128 /* timesteps stored per head */ #endif
/* Ring-buffer state for one batch element / typedef struct { int T; / current length 0 ≤ T ≤ MAX_MEM_T / int hs; / head-size (channels / num_heads) */ float k; / (NH, MAX_MEM_T, hs) contiguous */ float v; / (NH, MAX_MEM_T, hs) */ } pmll_state;
/* Life-cycle ------------------------------------------------------ */ int pmll_init (pmll_state S, int NH, int hs); / malloc → return 0 = OK */ void pmll_reset(pmll_state S); / T ← 0 (keep buffers) */ void pmll_free (pmll_state S); / free() buffers */
/* Data flow ------------------------------------------------------- /
/ Read stored KV for head h into out_k/out_v (both len = (Tmem+Tctx)*hs).
Fills first Tmem positions with history, then zero-pads the Tctx region. */
void pmll_read(float *out_k, float *out_v,
const pmll_state *S, int h, int Tctx);
/* Write new_k/new_v (len = Tctx*hs) into ring buffer for head h.
gate[t] ∈ [0,1] controls how much to blend new vs. old at slot (Told+t). */
void pmll_write(pmll_state *S, int h,
const float *new_k, const float *new_v,
int Tctx, const float *gate);
/* pmll.c — implementation for pmll.h (CPU, single-thread safe) */ #include "pmll.h" #include <stdlib.h> #include <string.h>
/* Allocate and zero-init ring buffers. Returns 0 on success, -1 on OOM. / int pmll_init(pmll_state S, int NH, int hs) { S->T = 0; S->hs = hs; size_t bytes = (size_t)NH * MAX_MEM_T * hs * sizeof(float); S->k = (float)calloc(1, bytes); S->v = (float)calloc(1, bytes); return (S->k && S->v) ? 0 : -1; }
void pmll_reset(pmll_state *S) { S->T = 0; }
void pmll_free(pmll_state *S) { free(S->k); S->k = NULL; free(S->v); S->v = NULL; S->T = S->hs = 0; }
/* Copy historic KV into the caller’s scratch buffers.
Layout: [Tmem history][Tctx zeros] */
void pmll_read(float *out_k, float *out_v,
const pmll_state *S, int h, int Tctx)
{
const int hs = S->hs;
const int Tmem = S->T;
const float src_k = S->k + (size_t)h * MAX_MEM_T * hs;
const float src_v = S->v + (size_t)h * MAX_MEM_T * hs;
size_t hist_bytes = (size_t)Tmem * hs * sizeof(float);
memcpy(out_k, src_k, hist_bytes);
memcpy(out_v, src_v, hist_bytes);
/ zero-pad the fresh-context part /
size_t pad_bytes = (size_t)Tctx * hs * sizeof(float);
memset(out_k + Tmemhs, 0, pad_bytes);
memset(out_v + Tmemhs, 0, pad_bytes);
}
/* Gate-controlled write (blend-in) new_buf shape = (Tctx,hs) */ void pmll_write(pmll_state *S, int h, const float *new_k, const float *new_v, int Tctx, const float *gate) { const int hs = S->hs; float *dst_k = S->k + (size_t)h * MAX_MEM_T * hs; float *dst_v = S->v + (size_t)h * MAX_MEM_T * hs;
for(int t=0; t<Tctx; ++t){
float g = gate ? gate[t] : 1.0f; /* default: keep all */
int idx = (S->T + t) % MAX_MEM_T; /* ring-slot */
const float *nk = new_k + t*hs;
const float *nv = new_v + t*hs;
float *dk = dst_k + idx*hs;
float *dv = dst_v + idx*hs;
for(int i=0;i<hs;++i){
dk[i] = g * nk[i] + (1.f - g) * dk[i];
dv[i] = g * nv[i] + (1.f - g) * dv[i];
}
}
/* advance length */
S->T += Tctx;
if(S->T > MAX_MEM_T) S->T = MAX_MEM_T;
}