Null pointer dereference in `TextIOWrapper.truncate` via re-entrant `flush`
What happened?
A custom raw stream can override flush to detach the wrapper when TextIOWrapper.truncate forces a flush; the truncate logic then calls self->buffer.truncate on the now NULL buffer pointer, dereferencing NULL and crashing.
Proof of Concept:
import io
class Raw(io.BytesIO):
def __init__(self):
super().__init__()
self._done = False
def flush(self):
if not self._done:
self._done = True
wrap.detach()
return None
wrap = io.TextIOWrapper(Raw())
wrap.truncate(0)
Affected Versions:
| Python Version | Status | Exit Code |
|---|---|---|
Python 3.9.24+ (heads/3.9:111bbc15b26, Oct 28 2025, 16:51:20) |
ASAN | 1 |
Python 3.10.19+ (heads/3.10:014261980b1, Oct 28 2025, 16:52:08) [Clang 18.1.3 (1ubuntu1)] |
ASAN | 1 |
Python 3.11.14+ (heads/3.11:88f3f5b5f11, Oct 28 2025, 16:53:08) [Clang 18.1.3 (1ubuntu1)] |
ASAN | 1 |
Python 3.12.12+ (heads/3.12:8cb2092bd8c, Oct 28 2025, 16:54:14) [Clang 18.1.3 (1ubuntu1)] |
ASAN | 1 |
Python 3.13.9+ (heads/3.13:9c8eade20c6, Oct 28 2025, 16:55:18) [Clang 18.1.3 (1ubuntu1)] |
ASAN | 1 |
Python 3.14.0+ (heads/3.14:2e216728038, Oct 28 2025, 16:56:16) [Clang 18.1.3 (1ubuntu1)] |
ASAN | 1 |
Python 3.15.0a1+ (heads/main:f5394c257ce, Oct 28 2025, 16:57:16) [Clang 18.1.3 (1ubuntu1)] |
ASAN | 1 |
Vulnerable Code:
static PyObject *
_io_TextIOWrapper_truncate_impl(textio *self, PyObject *pos)
/*[clinic end generated code: output=90ec2afb9bb7745f input=8bddb320834c93ee]*/
{
CHECK_ATTACHED(self)
// Bug: Trigger the flush and free the buffer
if (_PyFile_Flush((PyObject *)self) < 0) {
return NULL;
}
// self->buffer has been set to null
return PyObject_CallMethodOneArg(self->buffer, &_Py_ID(truncate), pos);
}
Sanitizer Output:
=================================================================
==1656115==ERROR: AddressSanitizer: SEGV on unknown address 0x000000000008 (pc 0x63319491efcf bp 0x7fff237438f0 sp 0x7fff23743820 T0)
==1656115==The signal is caused by a READ memory access.
==1656115==Hint: address points to the zero page.
#0 0x63319491efcf in _Py_TYPE /home/jackfromeast/Desktop/entropy/tasks/reproducexx/targets/cpython-main/./Include/object.h:277:20
#1 0x63319491efcf in _PyObject_GetMethodStackRef /home/jackfromeast/Desktop/entropy/tasks/reproducexx/targets/cpython-main/Objects/object.c:1698:24
#2 0x63319473c501 in PyObject_VectorcallMethod /home/jackfromeast/Desktop/entropy/tasks/reproducexx/targets/cpython-main/Objects/call.c:840:19
#3 0x6331951fdc17 in PyObject_CallMethodOneArg /home/jackfromeast/Desktop/entropy/tasks/reproducexx/targets/cpython-main/./Include/cpython/abstract.h:74:12
#4 0x6331951fdc17 in _io_TextIOWrapper_truncate_impl /home/jackfromeast/Desktop/entropy/tasks/reproducexx/targets/cpython-main/./Modules/_io/textio.c:2974:12
#5 0x6331951fdc17 in _io_TextIOWrapper_truncate /home/jackfromeast/Desktop/entropy/tasks/reproducexx/targets/cpython-main/./Modules/_io/clinic/textio.c.h:998:20
#6 0x633194739117 in _PyObject_VectorcallTstate /home/jackfromeast/Desktop/entropy/tasks/reproducexx/targets/cpython-main/./Include/internal/pycore_call.h:169:11
#7 0x633194739117 in PyObject_Vectorcall /home/jackfromeast/Desktop/entropy/tasks/reproducexx/targets/cpython-main/Objects/call.c:327:12
#8 0x633194d36c62 in _PyEval_EvalFrameDefault /home/jackfromeast/Desktop/entropy/tasks/reproducexx/targets/cpython-main/Python/generated_cases.c.h:1620:35
#9 0x633194d05bf4 in _PyEval_Vector /home/jackfromeast/Desktop/entropy/tasks/reproducexx/targets/cpython-main/Python/ceval.c:2005:12
#10 0x633194d05bf4 in PyEval_EvalCode /home/jackfromeast/Desktop/entropy/tasks/reproducexx/targets/cpython-main/Python/ceval.c:888:21
#11 0x633194fca4d4 in run_mod /home/jackfromeast/Desktop/entropy/tasks/reproducexx/targets/cpython-main/Python/pythonrun.c:1459:19
#12 0x633194fc402d in pyrun_file /home/jackfromeast/Desktop/entropy/tasks/reproducexx/targets/cpython-main/Python/pythonrun.c:1293:15
#13 0x633194fc12d3 in _PyRun_SimpleFileObject /home/jackfromeast/Desktop/entropy/tasks/reproducexx/targets/cpython-main/Python/pythonrun.c:521:13
#14 0x633194fc089e in _PyRun_AnyFileObject /home/jackfromeast/Desktop/entropy/tasks/reproducexx/targets/cpython-main/Python/pythonrun.c:81:15
#15 0x633195085b13 in pymain_run_file_obj /home/jackfromeast/Desktop/entropy/tasks/reproducexx/targets/cpython-main/Modules/main.c:410:15
#16 0x633195085b13 in pymain_run_file /home/jackfromeast/Desktop/entropy/tasks/reproducexx/targets/cpython-main/Modules/main.c:429:15
#17 0x633195082bcb in pymain_run_python /home/jackfromeast/Desktop/entropy/tasks/reproducexx/targets/cpython-main/Modules/main.c:691:21
#18 0x633195082bcb in Py_RunMain /home/jackfromeast/Desktop/entropy/tasks/reproducexx/targets/cpython-main/Modules/main.c:772:5
#19 0x6331950847fb in pymain_main /home/jackfromeast/Desktop/entropy/tasks/reproducexx/targets/cpython-main/Modules/main.c:802:12
#20 0x633195084aa2 in Py_BytesMain /home/jackfromeast/Desktop/entropy/tasks/reproducexx/targets/cpython-main/Modules/main.c:826:12
#21 0x7854bda2a1c9 in __libc_start_call_main csu/../sysdeps/nptl/libc_start_call_main.h:58:16
#22 0x7854bda2a28a in __libc_start_main csu/../csu/libc-start.c:360:3
#23 0x633194441114 in _start (/home/jackfromeast/Desktop/entropy/tasks/reproducexx/targets/cpython-main/python+0x6b0114) (BuildId: 0aee20a59f1c25de22733bd0e5f8259ab04406c4)
AddressSanitizer can not provide additional info.
SUMMARY: AddressSanitizer: SEGV /home/jackfromeast/Desktop/entropy/tasks/reproducexx/targets/cpython-main/./Include/object.h:277:20 in _Py_TYPE
==1656115==ABORTING
Linked PRs
- gh-143041
This is similar to the issue #142594.
This is similar to the issue #142594.
CHECK_ATTACHED need check it again
maybe this is a simple fix
diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c
index f988195256..07575dede5 100644
--- a/Modules/_io/textio.c
+++ b/Modules/_io/textio.c
@@ -2965,12 +2965,11 @@ static PyObject *
_io_TextIOWrapper_truncate_impl(textio *self, PyObject *pos)
/*[clinic end generated code: output=90ec2afb9bb7745f input=8bddb320834c93ee]*/
{
- CHECK_ATTACHED(self)
if (_PyFile_Flush((PyObject *)self) < 0) {
return NULL;
}
-
+ CHECK_ATTACHED(self)
return PyObject_CallMethodOneArg(self->buffer, &_Py_ID(truncate), pos);
}
not sure can cover all the case...and maybe some guys can also chagne he self->detect
we should carefully to make sure CHECK_ATTACHED after that?
I will leave other this kind of issues here that found the same behavior
1
import io
class EvilRaw(io.BytesIO):
def __init__(self):
super().__init__()
self._done = False
def write(self, data):
if not self._done:
self._done = True
wrap.detach()
return super().write(data)
wrap = io.TextIOWrapper(EvilRaw())
wrap.write("hello")
wrap.flush()
2
import io
class EvilRaw(io.BytesIO):
def __init__(self):
super().__init__(b"hello world")
self._done = False
def flush(self):
if not self._done:
self._done = True
wrap.detach()
return None
wrap = io.TextIOWrapper(EvilRaw())
wrap.read()
pos = wrap.tell()
wrap.seek(0)
wrap.seek(pos)
3
import io
class EvilRaw(io.BytesIO):
def __init__(self):
super().__init__(b"hello world")
self._done = False
def flush(self):
if not self._done:
self._done = True
wrap.detach()
return None
wrap = io.TextIOWrapper(EvilRaw())
wrap.close()
4
import io
class EvilRaw(io.BytesIO):
def __init__(self):
super().__init__(b"hello world")
self._done = False
def write(self, data):
if not self._done:
self._done = True
wrap.detach()
return super().write(data)
wrap = io.TextIOWrapper(EvilRaw())
wrap.write("test")
wrap.read()
cc @cmaloney @picnixz
Going through the TextIO code, there's quite a few related cases it currently looks like. Making sure no new code is added between a call which could invalidate self->buffer and uses of self->buffer feels like it'll be hard to maintain.
Could we do a small refactor here to make it so the code doesn't use self->buffer directly as often but rather a simple helper? My thought is a internal helper C function which uses the standard PyObject return pattern but always does the self->attached + self->ok check before returning. That seems like it will be easier to maintain to me.
Some cases it might mean we check more than needed, but I'd rather close out the type of bug first, then can make more efficient for cases which are important (A collection of I/O performance critical workloads that can validate with would be nice! don't have currently; but am working on).