Reduce small string memory overhead by ~20-30% for embstr encoding by reusing robj->ptr memory
Achieved 20-30% reduction in overhead.
This was accomplished by reusing the 8B robj->ptr memory to store embedded data. For 16B key and 16B value, this reduces the per-item memory overhead by ~20%. Overall performance was not measurably changed. Raising the threshold for embedding strings produces ~30% reduction in per-item overhead for affected value sizes.
I plan to merge this as two separate commits - one for the interface refactor and one for the memory efficiency changes. I'm leaving the commits more separated for now so it's easier to review.
Here's what I did:
- Modify all
robj->ptraccess to use get/set methods. Most of this was done with a script (code listed below), with a few manual touches. Manual and programmatic changes are in separate commits to make review easier. - Next I changed
objectGetVal()to calculate the location instead of usingrobj->ptr, and modified the embedding code to start writing embedded data 8B earlier, overwriting the ptr location. I changedobjectSetVal()to assert that no value is embedded - all of the code that assignedo->ptrwould have been incorrect for embedded strings anyway. (Except for one instance in module.c which I made a separate method for.) - Lastly, I increased the embedding size threshold to save more memory.
Memory efficiency comparison:
Performance is not significantly changed (0-2% faster):
For each test I collected 65 minutes of data and discarded the first 5 minutes as warmup. I did not use cluster mode, iothreads=1, pipelining=100, and key size was 16B. Note that for 64B values valkey:unstable did not embed and my branch did embed because I raised the size threshold.
Appendix 1 - refactoring script
# parse robj->ptr references and replace them with objectGetVal(robj) and objectSetVal(robj)
from dataclasses import dataclass
from pathlib import Path
import re
@dataclass
class Ref:
path: Path
line: int
char: int
note: str
def load(base_path: Path, ref_listing_path: Path):
with open(ref_listing_path, "r") as f:
data = f.readlines()
file = ""
reflist: list[Ref] = []
for line in data:
if line.strip() == "":
continue
if not line.startswith(' '):
file = line.strip()
continue
# parse lines like " 2443, 39: sdscatsds(user, descr->ptr);\n"
# Extract line number, char position, and note
try:
parts = line.strip().split(":", 1)
loc, note = parts[0], parts[1].strip()
line_num, char_num = [int(x.strip()) for x in loc.split(",")]
ref = Ref(base_path / file, line_num, char_num, note)
reflist.append(ref)
except Exception as e:
print(f"Failed to parse line: {line.strip()} ({e})")
return reflist
def get_obj_var(line: str):
# extract the variable name from a line like "sdsdup(c->argv[argpos]->ptr"
assert line.endswith("->ptr"), f"Line does not end with '->ptr': {line}"
line = line[:-5].strip() # exclude "->ptr"
pairs = {
')': '(',
']': '[',
'}': '{',
}
pairends = "([{"
i = len(line) - 1
unclosed = ""
if line[i] == ')': # only start new parenthesis when no other unclosed pairs if it's an outer parenthesis wrapping the whole variable
unclosed += '('
i -= 1
for i in range(i, -1, -1):
char = line[i]
if unclosed:
if char in pairends:
assert char == unclosed[-1]
unclosed = unclosed[:-1]
else:
# no unclosed pairs, check stop conditions
if char in pairends or char.isspace() or char in ')!':
i += 1 # this char was invalid so don't include it
break
if char in pairs:
unclosed += pairs[char]
var = line[i:]
stripped_var = var.strip()
while stripped_var.startswith('(') and stripped_var.endswith(')'):
stripped_var = stripped_var[1:-1].strip()
return var, stripped_var
assert get_obj_var("sdsdup(c->argv[argpos]->ptr")[0] == "c->argv[argpos]"
assert get_obj_var("sdsdup(user->descr->ptr")[0] == "user->descr"
assert get_obj_var('addReplyErrorFormat(c, "Unknown node %s", (char *)c->argv[2]->ptr')[0] == "c->argv[2]"
assert get_obj_var(" sds key->ptr = argv[result.keys[i].pos]->ptr")[0] == "argv[result.keys[i].pos]"
assert get_obj_var(" sds err = getAclErrorMessage(result, u, cmd, c->argv[idx + 3]->ptr")[0] == "c->argv[idx + 3]"
assert get_obj_var("batch->keys[i] = ((robj *)batch->keys[i])->ptr")[0] == "((robj *)batch->keys[i])"
assert get_obj_var("if (!client->name || !client->name->ptr")[0] == "client->name"
assert get_obj_var(" set->ptr")[0] == "set"
assert get_obj_var("set->ptr")[0] == "set"
def get_assignment(line) -> str:
assert line.startswith("ptr = "), f"Line does not start with 'ptr = ': {line}"
first_semicolon = line.find(';')
if first_semicolon == -1:
raise ValueError(f"Line does not contain a semicolon")
assignment = line[5:first_semicolon].strip() # exclude "ptr = "
return assignment
def replace_read_ref(ref: Ref, line) -> str:
var, stripped_var = get_obj_var(line[:ref.char + 2]) # include "->ptr"
new_line = line[:ref.char - 1 - (len(var) + 2)] + f"objectGetVal({stripped_var})" + line[ref.char - 1 + 3:]
return new_line
def replace_write_ref(ref: Ref, line_num: int, lines: list[str]) -> str:
line = lines[ref.line - 1]
var, stripped_var = get_obj_var(line[:ref.char + 2]) # include "->ptr"
assignment = get_assignment(line[ref.char-1:]) # include "ptr = "
new_line = line[:ref.char - 1 - (len(var) + 2)] + f"objectSetVal({stripped_var}, {assignment})" + line[ref.char + 5 + len(assignment):]
if " = " in assignment:
print(f"Follow up on {ref.path}:{ref.line}:{ref.char} - assignment contains '=': {assignment}")
print(" old:", line[:-1])
print(" new:", new_line[:-1])
return new_line
def replace_all(refs: list[Ref], dry_run=False):
to_replace = refs.copy()
skipped: list[Ref] = []
i = 0
file = None
data: list[str] = []
while to_replace:
ref = to_replace.pop() # backwards in case there are multiple refs in the same line
if file != ref.path:
if file and not dry_run:
assert data, f"No data loaded for {file}"
# write previous file
with open(file, "w") as f:
f.writelines(data)
file = ref.path
with open(file, 'r') as f:
data = f.readlines()
assert ref.line - 1 < len(data), f"Line {ref.line} out of range for {file}"
line = data[ref.line - 1]
assert ref.char + 2 <= len(line), f"Char {ref.char} out of range for {file} at line {ref.line}"
if line[ref.char - 3:ref.char - 1] != "->":
print(f"Skipping {ref.path}:{ref.line}:{ref.char} - not pointer deref")
print(f" line: {line[:-1]}")
skipped.append(ref)
continue
if line[ref.char - 1:ref.char + 5] == "ptr = ":
try:
data[ref.line - 1] = replace_write_ref(ref, ref.line, data)
except ValueError as e:
print(f"Skipping {ref.path}:{ref.line}:{ref.char} - {e}")
print(f" line: {line[:-1]}")
skipped.append(ref)
continue
else:
# read ref
data[ref.line - 1] = replace_read_ref(ref, line)
i += 1
if file and not dry_run:
assert data, f"No data loaded for {file}"
# write previous file
with open(file, "w") as f:
f.writelines(data)
print(f"Replaced {i} references, skipped {len(skipped)} references")
reflist = load(Path("/home/ubuntu/SoftlyRaining"), Path("robj-ptr-refs.txt"))
replace_all(reflist, dry_run=False)
Script output (I addressed all of these manually in the following manual commit):
~/SoftlyRaining/src$ python replace_obj_refs.py
Skipping /home/ubuntu/SoftlyRaining/src/t_list.c:171:22 - Line does not contain a semicolon
line: subject->ptr = (where == LIST_HEAD) ? lpPrepend(objectGetVal(subject), objectGetVal(value), sdslen(objectGetVal(value)))
Skipping /home/ubuntu/SoftlyRaining/src/t_list.c:168:22 - Line does not contain a semicolon
line: subject->ptr = (where == LIST_HEAD) ? lpPrependInteger(objectGetVal(subject), (long)objectGetVal(value))
Skipping /home/ubuntu/SoftlyRaining/src/server.h:775:11 - not pointer deref
line: void *ptr;
Follow up on /home/ubuntu/SoftlyRaining/src/module.c:13797:16 - assignment contains '=': mv = newmv
old: value->ptr = mv = newmv;
new: objectSetVal(value, mv = newmv);
Follow up on /home/ubuntu/SoftlyRaining/src/defrag.c:658:44 - assignment contains '=': s = news
old: if ((news = activeDefragAlloc(s))) ob->ptr = s = news;
new: if ((news = activeDefragAlloc(s))) objectSetVal(ob, s = news);
Follow up on /home/ubuntu/SoftlyRaining/src/defrag.c:476:46 - assignment contains '=': zs = newzs
old: if ((newzs = activeDefragAlloc(zs))) ob->ptr = zs = newzs;
new: if ((newzs = activeDefragAlloc(zs))) objectSetVal(ob, zs = newzs);
Follow up on /home/ubuntu/SoftlyRaining/src/defrag.c:462:46 - assignment contains '=': ql = newql
old: if ((newql = activeDefragAlloc(ql))) ob->ptr = ql = newql;
new: if ((newql = activeDefragAlloc(ql))) objectSetVal(ob, ql = newql);
Replaced 1647 references, skipped 3 references
Appendix 2 - micro benchmarking results
ubuntu@ip-10-0-145-171:~/robj-microbench$ ./robj-bench
2025-08-18T23:54:11+00:00
Running ./robj-bench
Run on (4 X 2300.18 MHz CPU s)
CPU Caches:
L1 Data 32 KiB (x4)
L1 Instruction 32 KiB (x4)
L2 Unified 256 KiB (x4)
L3 Unified 46080 KiB (x1)
Load Average: 0.35, 0.18, 0.06
-----------------------------------------------------------------------
Benchmark Time CPU Iterations
-----------------------------------------------------------------------
BM_objectGetExpire/1 1.88 ns 1.88 ns 372201373
BM_objectGetExpire/2 1.89 ns 1.89 ns 371970617
BM_objectGetExpire/4 1.89 ns 1.89 ns 369780061
BM_objectGetExpire/8 1.90 ns 1.90 ns 366829968
BM_objectGetExpire/16 1.90 ns 1.90 ns 367467478
BM_objectGetExpire/32 1.88 ns 1.88 ns 369002953
BM_objectGetExpire/64 1.91 ns 1.91 ns 367496136
BM_objectGetExpire/128 1.90 ns 1.90 ns 372766529
BM_objectGetExpire/256 1.89 ns 1.88 ns 371629344
BM_objectGetExpire/512 1.90 ns 1.90 ns 369819056
BM_objectGetExpire/1024 1.90 ns 1.90 ns 372576954
BM_objectGetExpire/2048 1.89 ns 1.89 ns 367325743
BM_objectGetExpire/4096 1.88 ns 1.88 ns 371473964
BM_objectGetExpire_BigO 1.89 (1) 1.89 (1)
BM_objectGetExpire_RMS 0 % 0 %
BM_objectGetKey/1 2.88 ns 2.88 ns 243820017
BM_objectGetKey/2 2.91 ns 2.91 ns 242765864
BM_objectGetKey/4 2.91 ns 2.91 ns 241573647
BM_objectGetKey/8 2.89 ns 2.89 ns 242279472
BM_objectGetKey/16 2.90 ns 2.90 ns 240966931
BM_objectGetKey/32 2.92 ns 2.91 ns 241347563
BM_objectGetKey/64 2.92 ns 2.91 ns 239829973
BM_objectGetKey/128 2.92 ns 2.92 ns 240860846
BM_objectGetKey/256 2.89 ns 2.89 ns 239168435
BM_objectGetKey/512 2.91 ns 2.90 ns 240518452
BM_objectGetKey/1024 2.91 ns 2.91 ns 242862430
BM_objectGetKey/2048 2.91 ns 2.91 ns 240383966
BM_objectGetKey/4096 2.91 ns 2.91 ns 240724659
BM_objectGetKey_BigO 2.90 (1) 2.90 (1)
BM_objectGetKey_RMS 0 % 0 %
BM_objectGetVal/1 3.75 ns 3.75 ns 186373286
BM_objectGetVal/2 3.75 ns 3.75 ns 186349886
BM_objectGetVal/4 3.76 ns 3.76 ns 186069434
BM_objectGetVal/8 3.77 ns 3.77 ns 185854886
BM_objectGetVal/16 3.76 ns 3.76 ns 185938601
BM_objectGetVal/32 3.77 ns 3.77 ns 185350054
BM_objectGetVal/64 3.76 ns 3.76 ns 185015799
BM_objectGetVal/128 1.88 ns 1.88 ns 372679151
BM_objectGetVal/256 1.88 ns 1.88 ns 372541933
BM_objectGetVal/512 1.88 ns 1.88 ns 372547356
BM_objectGetVal/1024 1.88 ns 1.88 ns 372967463
BM_objectGetVal/2048 1.88 ns 1.88 ns 371578466
BM_objectGetVal/4096 1.88 ns 1.88 ns 372867744
BM_objectGetVal_BigO 2.89 (1) 2.89 (1)
BM_objectGetVal_RMS 32 % 32 %
BM_objectGetVal_Using_ptr/1 1.50 ns 1.50 ns 466016074
BM_objectGetVal_Using_ptr/2 1.51 ns 1.51 ns 465033531
BM_objectGetVal_Using_ptr/4 1.51 ns 1.51 ns 465690307
BM_objectGetVal_Using_ptr/8 1.50 ns 1.50 ns 465292160
BM_objectGetVal_Using_ptr/16 1.50 ns 1.50 ns 466284090
BM_objectGetVal_Using_ptr/32 1.50 ns 1.50 ns 465706801
BM_objectGetVal_Using_ptr/64 1.50 ns 1.50 ns 466260051
BM_objectGetVal_Using_ptr/128 1.50 ns 1.50 ns 465387679
BM_objectGetVal_Using_ptr/256 1.50 ns 1.50 ns 464241882
BM_objectGetVal_Using_ptr/512 1.50 ns 1.50 ns 466336830
BM_objectGetVal_Using_ptr/1024 1.51 ns 1.50 ns 465435503
BM_objectGetVal_Using_ptr/2048 1.50 ns 1.50 ns 466323005
BM_objectGetVal_Using_ptr/4096 1.50 ns 1.50 ns 465025662
BM_objectGetVal_Using_ptr_BigO 1.50 (1) 1.50 (1)
BM_objectGetVal_Using_ptr_RMS 0 % 0 %
Appendix 3 - rebasing For the programmatic commit, I resolved all merge conflicts by accepting the current version, then ran the script again with an updated reference list. The script didn't identify any new lines of code to manually fix.
We probably also want a performance test for IO threading as well either that or a deep pipeline. It will tease out a performance regression more clearly.
Codecov Report
:x: Patch coverage is 82.97232% with 283 lines in your changes missing coverage. Please review.
:white_check_mark: Project coverage is 72.49%. Comparing base (51f871a) to head (7afb48b).
:warning: Report is 3 commits behind head on unstable.
| Files with missing lines | Patch % | Lines |
|---|---|---|
| src/module.c | 1.11% | 89 Missing :warning: |
| src/sentinel.c | 0.00% | 74 Missing :warning: |
| src/object.c | 84.79% | 26 Missing :warning: |
| src/debug.c | 80.73% | 21 Missing :warning: |
| src/cluster_legacy.c | 83.82% | 11 Missing :warning: |
| src/defrag.c | 76.92% | 6 Missing :warning: |
| src/lua/debug_lua.c | 37.50% | 5 Missing :warning: |
| src/rdb.c | 94.38% | 5 Missing :warning: |
| src/scripting_engine.c | 58.33% | 5 Missing :warning: |
| src/t_list.c | 91.22% | 5 Missing :warning: |
| ... and 19 more |
Additional details and impacted files
@@ Coverage Diff @@
## unstable #2516 +/- ##
============================================
+ Coverage 72.46% 72.49% +0.03%
============================================
Files 129 129
Lines 70530 70581 +51
============================================
+ Hits 51108 51167 +59
+ Misses 19422 19414 -8
| Files with missing lines | Coverage Δ | |
|---|---|---|
| src/bitops.c | 94.47% <100.00%> (ø) |
|
| src/cluster_migrateslots.c | 92.23% <100.00%> (+<0.01%) |
:arrow_up: |
| src/cluster_slot_stats.c | 93.75% <100.00%> (ø) |
|
| src/commandlog.c | 96.58% <100.00%> (ø) |
|
| src/expire.c | 97.29% <100.00%> (ø) |
|
| src/functions.c | 94.47% <100.00%> (ø) |
|
| src/lazyfree.c | 85.41% <100.00%> (ø) |
|
| src/logreqres.c | 72.72% <ø> (ø) |
|
| src/lolwut.c | 100.00% <100.00%> (ø) |
|
| src/lua/script_lua.c | 89.80% <100.00%> (ø) |
|
| ... and 39 more |
:rocket: New features to boost your workflow:
- :snowflake: Test Analytics: Detect flaky tests, report on failures, and find test suite problems.
- :package: JS Bundle Analysis: Save yourself from yourself by tracking and limiting bundle sizes in JS merges.
I updated the test results using a pipeline depth of 100, and I've also rebased the branch, appended some minor fixes, and addressed PR feedback. :)
Benchmark ran on this commit: d8400e3
Benchmark Comparison by Command
GET | cluster disabled | tls disabled
| Metric | Baseline | PR | Diff | % Change |
|---|---|---|---|---|
| rps | 988142.31 | 977517.12 | -10625.19 | -1.08% |
| latency_avg_ms | 0.45 | 0.45 | 0.00 | +0.89% |
| latency_p50_ms | 0.43 | 0.43 | 0.00 | +0.00% |
| latency_p95_ms | 0.60 | 0.61 | 0.01 | +1.34% |
| latency_p99_ms | 0.71 | 0.71 | 0.00 | +0.00% |
GET | cluster enabled | tls disabled
| Metric | Baseline | PR | Diff | % Change |
|---|---|---|---|---|
| rps | 855432.00 | 843170.31 | -12261.69 | -1.43% |
| latency_avg_ms | 0.53 | 0.54 | 0.01 | +1.70% |
| latency_p50_ms | 0.52 | 0.54 | 0.02 | +3.08% |
| latency_p95_ms | 0.69 | 0.69 | 0.01 | +1.16% |
| latency_p99_ms | 0.74 | 0.74 | 0.00 | +0.00% |
MGET | cluster disabled | tls disabled
| Metric | Baseline | PR | Diff | % Change |
|---|---|---|---|---|
| rps | 319795.31 | 312891.09 | -6904.22 | -2.16% |
| latency_avg_ms | 1.39 | 1.43 | 0.03 | +2.52% |
| latency_p50_ms | 1.38 | 1.42 | 0.03 | +2.31% |
| latency_p95_ms | 1.74 | 1.77 | 0.03 | +1.84% |
| latency_p99_ms | 1.80 | 1.83 | 0.03 | +1.78% |
MSET | cluster disabled | tls disabled
| Metric | Baseline | PR | Diff | % Change |
|---|---|---|---|---|
| rps | 220070.42 | 213949.52 | -6120.90 | -2.78% |
| latency_avg_ms | 2.18 | 2.25 | 0.07 | +3.03% |
| latency_p50_ms | 2.28 | 2.35 | 0.07 | +3.16% |
| latency_p95_ms | 2.50 | 2.58 | 0.07 | +2.88% |
| latency_p99_ms | 2.59 | 2.67 | 0.08 | +3.09% |
SET | cluster disabled | tls disabled
| Metric | Baseline | PR | Diff | % Change |
|---|---|---|---|---|
| rps | 850340.12 | 840336.12 | -10004.00 | -1.18% |
| latency_avg_ms | 0.53 | 0.54 | 0.01 | +1.32% |
| latency_p50_ms | 0.53 | 0.53 | 0.00 | +0.00% |
| latency_p95_ms | 0.69 | 0.70 | 0.02 | +2.33% |
| latency_p99_ms | 0.74 | 0.75 | 0.01 | +1.08% |
SET | cluster enabled | tls disabled
| Metric | Baseline | PR | Diff | % Change |
|---|---|---|---|---|
| rps | 669792.38 | 670690.81 | 898.43 | +0.13% |
| latency_avg_ms | 0.69 | 0.69 | 0.00 | +0.00% |
| latency_p50_ms | 0.69 | 0.69 | 0.00 | +0.00% |
| latency_p95_ms | 0.88 | 0.87 | -0.01 | -0.91% |
| latency_p99_ms | 1.09 | 1.06 | -0.02 | -2.21% |
"Reduce small string memory overhead by ~20-30%"
This PR title isn't perfect. It describes the benefit, which is important, but a PR title should instead primarily summarize the change itself.
I mean, reducing memory overhead can be done in other ways too, so it's not immediately clear from the title what this change is.
Naming isn't easy. We also want to avoid going into code details, because these titles will end up in release notes and be read by users. How about something like "Remove internal pointer overhead in small strings"?
Small note: we should definitely take that, but it would probably impact our ability to simply backport fixed to older versions. Maybe we could just backport the function objectGetVal as a macro or something which will use the ptr in older versions?
(These are the cases I miss c++ operator overloading :))
It seems that the two failures are both flaky tests failing in unstable prior to this change.
- https://github.com/valkey-io/valkey/issues/2893
*** [TIMEOUT]: benchmark: warmup and duration are cumulative in tests/integration/valkey-benchmark.tcl
- https://github.com/valkey-io/valkey/issues/2908
*** [err]: TTL, TYPE and EXISTS do not alter the last access time of a key in tests/unit/introspection-2.tcl
Expected [r object idletime foo] >= 2 (context: type eval line 7 cmd {assert {[r object idletime foo] >= 2}} proc ::test)
Seems like the additional code coverage failure was probably from bad memory...
------ FAST MEMORY TEST ------
59636:M 16 Dec 2025 21:04:43.813 # Bio worker thread #0 terminated
59636:M 16 Dec 2025 21:04:43.813 # Bio worker thread #1 terminated
59636:M 16 Dec 2025 21:04:43.814 # Bio worker thread #2 terminated
59636:M 16 Dec 2025 21:04:43.814 # Bio worker thread #3 terminated
*** Preparing to test memory region 560549015000 (2859008 bytes)
*** Preparing to test memory region 5605587c9000 (135168 bytes)
*** Preparing to test memory region 7fb1fc000000 (135168 bytes)
*** Preparing to test memory region 7fb202d7c000 (16777216 bytes)
*** Preparing to test memory region 7fb203d7d000 (16777216 bytes)
*** Preparing to test memory region 7fb204d7e000 (16777216 bytes)
*** Preparing to test memory region 7fb207d80000 (2621440 bytes)
*** Preparing to test memory region 7fb208400000 (8388608 bytes)
*** Preparing to test memory region 7fb208e1c000 (53248 bytes)
*** Preparing to test memory region 7fb208ec1000 (12288 bytes)
*** Preparing to test memory region 7fb208fb7000 (8192 bytes)
.E.E.E.E.E.E.E.E.E.E.E
!!! MEMORY ERROR DETECTED! Check your memory ASAP !!!