llm.c icon indicating copy to clipboard operation
llm.c copied to clipboard

The PMLL

Open drQedwards opened this issue 6 months ago • 2 comments

https://www.authorea.com/users/856117/articles/1297819-the-persistent-memory-logic-loop-from-proposal-design-to-formal-proof-of-p-np-using-the-pmll-algorithm

drQedwards avatar Jun 21 '25 04:06 drQedwards

/* pmll.h — Persistent-Memory Logic Loop (CPU reference) Copyright (c) 2025 Dr. Josef Kurk Edwards MIT License (same as llm.c) -------------------------------------------------------------------- */ #pragma once #include <stddef.h>

#ifndef MAX_MEM_T #define MAX_MEM_T 128 /* timesteps stored per head */ #endif

/* Ring-buffer state for one batch element / typedef struct { int T; / current length 0 ≤ T ≤ MAX_MEM_T / int hs; / head-size (channels / num_heads) */ float k; / (NH, MAX_MEM_T, hs) contiguous */ float v; / (NH, MAX_MEM_T, hs) */ } pmll_state;

/* Life-cycle ------------------------------------------------------ */ int pmll_init (pmll_state S, int NH, int hs); / malloc → return 0 = OK */ void pmll_reset(pmll_state S); / T ← 0 (keep buffers) */ void pmll_free (pmll_state S); / free() buffers */

/* Data flow ------------------------------------------------------- / / Read stored KV for head h into out_k/out_v (both len = (Tmem+Tctx)*hs).
Fills first Tmem positions with history, then zero-pads the Tctx region. */ void pmll_read(float *out_k, float *out_v, const pmll_state *S, int h, int Tctx);

/* Write new_k/new_v (len = Tctx*hs) into ring buffer for head h.
gate[t] ∈ [0,1] controls how much to blend new vs. old at slot (Told+t). */ void pmll_write(pmll_state *S, int h, const float *new_k, const float *new_v, int Tctx, const float *gate);

drQedwards avatar Jun 21 '25 04:06 drQedwards

/* pmll.c — implementation for pmll.h (CPU, single-thread safe) */ #include "pmll.h" #include <stdlib.h> #include <string.h>

/* Allocate and zero-init ring buffers. Returns 0 on success, -1 on OOM. / int pmll_init(pmll_state S, int NH, int hs) { S->T = 0; S->hs = hs; size_t bytes = (size_t)NH * MAX_MEM_T * hs * sizeof(float); S->k = (float)calloc(1, bytes); S->v = (float)calloc(1, bytes); return (S->k && S->v) ? 0 : -1; }

void pmll_reset(pmll_state *S) { S->T = 0; }

void pmll_free(pmll_state *S) { free(S->k); S->k = NULL; free(S->v); S->v = NULL; S->T = S->hs = 0; }

/* Copy historic KV into the caller’s scratch buffers.
Layout: [Tmem history][Tctx zeros] */ void pmll_read(float *out_k, float *out_v, const pmll_state *S, int h, int Tctx) { const int hs = S->hs; const int Tmem = S->T; const float src_k = S->k + (size_t)h * MAX_MEM_T * hs; const float src_v = S->v + (size_t)h * MAX_MEM_T * hs; size_t hist_bytes = (size_t)Tmem * hs * sizeof(float); memcpy(out_k, src_k, hist_bytes); memcpy(out_v, src_v, hist_bytes); / zero-pad the fresh-context part / size_t pad_bytes = (size_t)Tctx * hs * sizeof(float); memset(out_k + Tmemhs, 0, pad_bytes); memset(out_v + Tmemhs, 0, pad_bytes); }

/* Gate-controlled write (blend-in) new_buf shape = (Tctx,hs) */ void pmll_write(pmll_state *S, int h, const float *new_k, const float *new_v, int Tctx, const float *gate) { const int hs = S->hs; float *dst_k = S->k + (size_t)h * MAX_MEM_T * hs; float *dst_v = S->v + (size_t)h * MAX_MEM_T * hs;

for(int t=0; t<Tctx; ++t){
    float g = gate ? gate[t] : 1.0f;          /* default: keep all */
    int   idx = (S->T + t) % MAX_MEM_T;       /* ring-slot */
    const float *nk = new_k + t*hs;
    const float *nv = new_v + t*hs;
    float       *dk = dst_k + idx*hs;
    float       *dv = dst_v + idx*hs;

    for(int i=0;i<hs;++i){
        dk[i] = g * nk[i] + (1.f - g) * dk[i];
        dv[i] = g * nv[i] + (1.f - g) * dv[i];
    }
}
/* advance length */
S->T += Tctx;
if(S->T > MAX_MEM_T) S->T = MAX_MEM_T;

}

drQedwards avatar Jun 21 '25 04:06 drQedwards