rizin icon indicating copy to clipboard operation
rizin copied to clipboard

Refactor use of `core->block` to the transparent IO access

Open XVilka opened this issue 2 years ago • 3 comments

To avoid the need of manually keeping it in mind when using the API.

Should use the transparent RzIO and RzCore API without the need to work with block or blocksize

I propose the new API that will handle all reads and writes itself:

  • RZ_API bool rz_core_io_read_at(RzCore *core, ut64 offset, ut64 len, ut64 *read)
  • RZ_API bool rz_core_io_write_at(RzCore *core, ut64 offset, ut64 len, ut64 *written)

See https://github.com/rizinorg/rizin/pull/2694

$ rg "core->block[,\s\)]" | wc -l
     121

$ rg "core->block[,\s\)]"
test/unit/test_yank.c
20:	mu_assert_memeq(core->block, (const ut8 *)"\x44\x33\x22\x11", 4, "original bytes should be right at address 0");
29:	mu_assert_memeq(core->block, (const ut8 *)"\x44\x33\x22\x11", 4, "yanked bytes should be pasted at address 4");
46:	mu_assert_streq((const char *)core->block, "Hello World", "yanked bytes should be pasted at address 4");
48:	mu_assert_streq((const char *)core->block, "HellHello World", "yanked bytes should be pasted at address 4, original content there");

test/db/archos/linux-x64/dbg_bps
263:NAME=read core->block on short move

librz/core/rtr_http.c
438:	memcpy(newblk, core->block, core->blocksize);
440:	core->block = newblk;
452:		core->block = origblk;
468:		core->block = newblk;

librz/core/cprint.c
28:		value = rz_read_ble(core->block + pos, false, 8);
1093:	memcpy(buf, core->block, core->blocksize);

librz/core/cmp.c
187:			core->block + i, len - i);

librz/core/tui/visual.c
829:				q = core->block + i;
834:		q = rz_mem_mem(core->block + d, core->blocksize - d,
837:			q = rz_mem_mem(core->block, RZ_MIN(core->blocksize, d),
842:		core->print->cur = (int)(size_t)(q - core->block);
924:	p = rz_mem_mem(core->block + d, core->blocksize - d,
927:		core->print->cur = (int)(size_t)(p - core->block);
1005:		rz_asm_disassemble(core->rasm, &op, core->block, RZ_MIN(32, core->blocksize));
1603:				core->block + next_roff, 32);
1690:					core->block, 32);
1753:				&op, core->block, 32);
2675:							rz_asm_disassemble(core->rasm, &op, core->block, 32);
3642:		op, core->block, 32);

librz/core/cio.c
246:	if (core && core->block) {
247:		return rz_io_read_at(core->io, core->offset, core->block, core->blocksize);
379:	if (rz_analysis_op(core->analysis, &op, core->offset, core->block, core->blocksize, RZ_ANALYSIS_OP_MASK_BASIC) < 1) {

librz/core/hack.c
293:		if (rz_analysis_op(core->analysis, &aop, core->offset, core->block, core->blocksize, RZ_ANALYSIS_OP_MASK_BASIC) < 1) {

librz/core/seek.c
208:		if (rz_analysis_op(core->analysis, &aop, core->offset, core->block, core->blocksize, RZ_ANALYSIS_OP_MASK_BASIC) > 0) {

librz/core/core.c
591:		rz_analysis_op(core->analysis, &op, core->offset, core->block, core->blocksize, RZ_ANALYSIS_OP_MASK_BASIC);
2326:	core->block = (ut8 *)calloc(RZ_CORE_BLOCKSIZE + 1, 1);
2327:	if (!core->block) {
2796:	bump = realloc(core->block, bsize + 1);
2801:	core->block = bump;
2803:	memset(core->block, 0xff, core->blocksize);
2948:					memcpy(ptr + 5, core->block, i); // core->blocksize);

librz/core/cgraph.c
784:	if (rz_analysis_op(core->analysis, &op, core->offset, core->block, core->blocksize, flags) > 0) {

librz/core/canalysis.c
1048:		ptr = core->block + delta;

librz/core/tui/esil.c
51:	memcpy(buf, core->block, sizeof(ut64));

librz/core/tui/define.c
228:			core->block + off - core->offset, 32, RZ_ANALYSIS_OP_MASK_BASIC);
312:				if (rz_analysis_op(core->analysis, &op, off, core->block + delta,

librz/core/tui/panels.c
1544:				&op, core->block, 32);

librz/core/tui/biteditor.c
40:	memcpy(buf, core->block + cur, sizeof(ut64));

librz/core/disasm.c
6109:	if (buf != core->block) {

librz/core/cmd/cmd_magic.c
96:	str = rz_magic_buffer(ck, core->block + delta, core->blocksize - delta);

librz/core/cmd/cmd_search.c
2559:		int diff = memcmpdiff(core->block, block, core->blocksize);
2786:	memcpy(buf, core->block, bufsz);

librz/core/cmd/cmd_write.c
57:		rz_crypto_update(core->crypto, (const ut8 *)core->block, core->blocksize);

librz/core/cmd/cmd_print.c
1598:	if (data != core->block) {
1670:		handle_entropy(core, plugin->name, core->block, core->blocksize);
1672:		handle_ssdeep(core, plugin->name, core->block, core->blocksize);
1674:		handle_hash_cfg(core, plugin->name, core->block, core->blocksize);
2134:	const ut8 *buffer = core->block + offset;
2188:	if (*core->block & 0x1) { // "long" string
2189:		const ut8 *ptr = core->block + (bitness / 8) * 2;
2223:	if (rz_scan_strings_raw(core->block, found, &scan_opt, 0, core->blocksize, RZ_STRING_ENC_GUESS) < 0) {
2256:	if (rz_scan_strings_raw(core->block, found, &scan_opt, 0, core->blocksize, RZ_STRING_ENC_GUESS) < 0) {
2281:		string_len = rz_read_ble16(core->block, big_endian);
2284:		string_len = rz_read_ble32(core->block, big_endian);
2287:		string_len = rz_read_ble64(core->block, big_endian);
2303:		opt.buffer = core->block + offset;
2310:		print_json_string(core, core->block + offset, string_len, RZ_STRING_ENC_8BIT, true);
2545:	// TODO After core->block is removed, this should be changed to a block read.
2573:			if (core->blocksize < 4 || !memcmp(core->block, "\xff\xff\xff\xff", 4)) {
2576:				char *res = rz_print_json_indent((const char *)core->block, true, "  ", NULL);
2801:		print_json_string(core, core->block, core->blocksize, RZ_STRING_ENC_UTF16LE, true);
2815:		print_json_string(core, core->block, core->blocksize, RZ_STRING_ENC_UTF32LE, true);
2829:		print_json_string(core, core->block, core->blocksize, RZ_STRING_ENC_UTF16BE, true);
2843:		print_json_string(core, core->block, core->blocksize, RZ_STRING_ENC_UTF32BE, true);
2890:		rz_str_bits(buf, core->block + i, 8, NULL);
2900:			const ut8 *b = core->block + i - 3;
3015:			ut8 *p = (ut8 *)core->block + j;
3024:			ut8 *p = (ut8 *)core->block + j;
3038:	rz_print_hexii(core->print, core->offset, core->block,
3087:	rz_core_print_hexdump(core, core->offset, core->block, len, 16, 1, 1);
3105:	memcpy(block, core->block, len);
3115:		core->block, len, 8, 1, 1);
3138:	int len = (int)rz_str_nlen((const char *)core->block, core->blocksize);
3142:	rz_print_bytes(core->print, core->block, len, "%02x");
3204:		char *code = rz_lang_byte_array(core->block, size, type); \
3216:		char *code = rz_lang_byte_array(core->block, core->blocksize, big_endian ? type##_BE : type##_LE); \
3256:	char *code = rz_core_print_bytes_with_inst(core, core->block, core->offset, size);
3629:	rz_core_analysis_bytes_il(core, core->block, size, 0, false);
3694:	core->num->value = rz_core_print_disasm(core, core->offset, core->block, core->blocksize, RZ_ABS(n_instrs), state, &disasm_options);
3754:		ret = rz_asm_disassemble(core->rasm, &asm_op, core->block + i, core->blocksize - i);
3987:		int ret = rz_analysis_op(core->analysis, &aop, offset, core->block, core->blocksize, RZ_ANALYSIS_OP_MASK_BASIC);
4066:	char *buf = rz_base64_encode_dyn((const unsigned char *)core->block, core->blocksize);
4077:	ut8 *buf = rz_base64_decode_dyn((const char *)core->block, core->blocksize);
4100:	rz_str_bits(buf, core->block, len + skip, NULL);
4130:	RzASN1Object *asn1 = rz_asn1_object_parse(core->block, core->blocksize);
4146:	char *s = rz_protobuf_decode(core->block, core->blocksize, false);
4157:	char *s = rz_protobuf_decode(core->block, core->blocksize, true);
4169:	RzCMS *cms = rz_pkcs7_cms_parse(core->block, core->blocksize);
4193:	RzX509Certificate *x509 = rz_x509_certificate_parse2(core->block, core->blocksize);
4223:	char *s = rz_axml_decode(core->block, core->blocksize);
4306:	ut8 *block_end = core->block + blocksize;
4635:	char *s = rz_hash_cfg_randomart(core->block, len, core->offset);
4663:			rz_io_read_at(core->io, core->offset, core->block, len);
4664:			s = rz_hash_cfg_randomart(core->block, len, core->offset);
4671:	rz_io_read_at(core->io, offset0, core->block, len);
6609:	colordump(core, core->block, len);
6617:	out = rz_inflate(core->block, core->blocksize, &inConsumed, &outlen);

librz/core/cmd/cmd_cmp.c
190:		rz_core_print_hexdiff(core, core->offset, core->block, addr, b, core->blocksize, col);
206:		rz_core_print_hexdiff(core, core->offset, core->block, addr, b, core->blocksize, col);
326:	ret = rz_hex_bin2str(core->block, strlen(input) / 2, (char *)buf);

librz/core/cmd/cmd_seek.c
52:			rz_asm_disassemble(core->rasm, &op, core->block, 32);
73:		ret = rz_analysis_op(core->analysis, &op, core->offset, core->block,

librz/core/cmd/cmd_analysis.c
375:		ut32 n = rz_read_ble32(core->block + i, big_endian);
5535:		core_analysis_bytes_json(core, core->block, core->blocksize, 0, state->d.pj);
5538:		core_analysis_bytes_standard(core, core->block, core->blocksize, 0);
5549:	core_analysis_bytes_esil(core, core->block, core->blocksize, 0);
5554:	core_analysis_bytes_desc(core, core->block, core->blocksize, 0);
5559:	core_analysis_bytes_size(core, core->block, core->blocksize, 0);
5583:		core_analysis_bytes_json(core, core->block, core->blocksize, count, state->d.pj);
5586:		core_analysis_bytes_standard(core, core->block, core->blocksize, count);
5616:	core_analysis_bytes_size(core, core->block, core->blocksize, count);
5641:	core_analysis_bytes_esil(core, core->block, core->blocksize, count);
5758:	rz_core_analysis_bytes_il(core, core->block, core->blocksize, count, false);
5782:	rz_core_analysis_bytes_il(core, core->block, core->blocksize, count, true);
5796:		core_analysis_bytes_desc(core, core->block + cur, core->blocksize, 1);
6602:		core->offset, core->block, core->blocksize);
$ rg rz_core_block_read -l
librz/main/rizin.c
librz/include/rz_core.h
librz/core/cio.c
librz/core/seek.c
librz/core/core.c
librz/core/yank.c
librz/core/cfile.c
librz/core/cbin.c
librz/core/disasm.c
librz/core/tui/visual.c
librz/core/tui/panels.c
librz/core/cconfig.c
librz/core/cmd/cmd_write.c
librz/core/cmd/cmd.c
librz/core/cmd/cmd_print.c
librz/core/cmd/cmd_open.c
test/unit/test_yank.c

XVilka avatar Jun 14 '22 12:06 XVilka

i agree, but do we also need api for switching bins etc.. ?

wargio avatar Sep 19 '23 07:09 wargio

I am a little bit confused before getting to it and I would really appreciate the further explaination. We are not going to remove the concept of core->block, right? Just wanna make the access of core->block more easily. For example, for the proposed API RZ_API bool rz_core_io_read_at(RzCore *core, ut64 offset, ut64 *read), offset means the offset against the start of current core->block and read is a buf to store all content of core->block from the offset to the end of core->block?

Or RZ_API bool rz_core_io_read_at(RzCore *core, ut64 offset, ut64 *read) is used to directly read the binary under test? But how to decide the length of reading? @XVilka

PeiweiHu avatar Nov 15 '23 15:11 PeiweiHu

@PeiweiHu idea is to not expose core->block or similar APIs to 99% of those API users - both external and internal. Splitting into blocks, caching reads, etc, should be done opaquely under the hood. We could only keep the way to change the block size and or/cache size via separate APIs, but that's it. So, for example, if we have a 64Gb file, the user should be able to read/write to any address via single API call without understanding the current block, the local cache content, etc.

But how to decide the length of reading?

Sorry, I forgot to add ut64 len parameter to both functions, my bad.

XVilka avatar Nov 16 '23 14:11 XVilka