COFF produced by MASM with an empty string table cannot be parsed
Hi,
This appears to be a regression. It's no longer possible to parse a COFF produced by MASM with an empty string table.
See the below test
diff --git a/src/pe/header.rs b/src/pe/header.rs
index 318c0b4..06d20ac 100644
--- a/src/pe/header.rs
+++ b/src/pe/header.rs
@@ -1823,4 +1823,60 @@ mod tests {
assert_eq!(header.dos_header.pe_pointer, 0x40);
assert_eq!(header.coff_header.number_of_sections, 3);
}
+
+ // MASM COFF file that has an empty string table
+ static EMPTY_STRING_TABLE_OBJ: [u8; 424] = [
+ 0x64, 0x86, 0x03, 0x00, 0xf3, 0xcb, 0x20, 0x69, 0x02, 0x01, 0x00, 0x00,
+ 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2e, 0x74, 0x65, 0x78,
+ 0x74, 0x24, 0x6d, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x01, 0x00, 0x00, 0x00, 0x8c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x50, 0x60,
+ 0x2e, 0x64, 0x61, 0x74, 0x61, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x40, 0x00, 0x50, 0xc0, 0x2e, 0x64, 0x65, 0x62, 0x75, 0x67, 0x24, 0x53,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x74, 0x00, 0x00, 0x00,
+ 0x8d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x10, 0x42, 0xc3, 0x04, 0x00, 0x00,
+ 0x00, 0xf1, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00, 0x2d, 0x00, 0x01,
+ 0x11, 0x00, 0x00, 0x00, 0x00, 0x43, 0x3a, 0x5c, 0x55, 0x73, 0x65, 0x72,
+ 0x73, 0x5c, 0x43, 0x6f, 0x6e, 0x74, 0x6f, 0x73, 0x6f, 0x5c, 0x77, 0x69,
+ 0x6e, 0x2d, 0x6b, 0x65, 0x78, 0x70, 0x5c, 0x61, 0x63, 0x6c, 0x5f, 0x65,
+ 0x64, 0x69, 0x74, 0x2e, 0x6f, 0x62, 0x6a, 0x00, 0x37, 0x00, 0x3c, 0x11,
+ 0x03, 0x02, 0x00, 0x00, 0xd0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x0e, 0x00, 0x2a, 0x00, 0x84, 0x86, 0x00, 0x00, 0x4d, 0x69,
+ 0x63, 0x72, 0x6f, 0x73, 0x6f, 0x66, 0x74, 0x20, 0x28, 0x52, 0x29, 0x20,
+ 0x4d, 0x61, 0x63, 0x72, 0x6f, 0x20, 0x41, 0x73, 0x73, 0x65, 0x6d, 0x62,
+ 0x6c, 0x65, 0x72, 0x00, 0x00, 0x00, 0x40, 0x63, 0x6f, 0x6d, 0x70, 0x2e,
+ 0x69, 0x64, 0x84, 0x86, 0x03, 0x01, 0xff, 0xff, 0x00, 0x00, 0x03, 0x00,
+ 0x40, 0x66, 0x65, 0x61, 0x74, 0x2e, 0x30, 0x30, 0x10, 0x00, 0x00, 0x00,
+ 0xff, 0xff, 0x00, 0x00, 0x03, 0x00, 0x2e, 0x74, 0x65, 0x78, 0x74, 0x24,
+ 0x6d, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x01,
+ 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x03, 0x01,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2e, 0x64, 0x65, 0x62, 0x75, 0x67,
+ 0x24, 0x53, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x01,
+ 0x74, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x65, 0x6d, 0x70, 0x74, 0x79, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x20, 0x00, 0x02, 0x00,
+ 0x04, 0x00, 0x00, 0x00
+ ];
+
+ #[test]
+ fn parse_real_coff_with_empty_string_table() {
+ // This test verifies that a real-world COFF file with an empty string table can be parsed
+ // The file has a string table with only the length field (4 bytes)
+ let coff = Coff::parse(&EMPTY_STRING_TABLE_OBJ).unwrap();
+
+ // Should have sections, symbols, and a minimal string table
+ assert_eq!(coff.header.number_of_sections, 3);
+ assert_eq!(coff.header.number_of_symbol_table, 9);
+ assert!(coff.strings.is_some());
+
+ // String table should be empty (only contains the length field)
+ let strings = coff.strings.unwrap();
+ assert_eq!(strings.len(), 0); // Empty string table content
+ }
}
This was introduced in commit 69dba8b, and in particular the check https://github.com/m4b/goblin/blob/master/src/strtab.rs#L109. If you don't trust the above bytes (and you shouldn't!), then it can be replicated with,
.code
empty PROC
ret
empty ENDP
END
And then call ml64 /c sample.asm. The above bytes contain debug info from one of my projects (hence the size).
It's possible that is sufficient to remove the >= and just use > but I don't know enough of goblin's internals to ensure that is the correct fix.
Either way, I don't think you're taking in consideration that the size for an empty string table is 4 as per https://learn.microsoft.com/en-us/windows/win32/debug/pe-format#coff-string-table.
You are correct that >= and > there in goblin has different semantics (on how the error is returned). Would you mind submitting a PR for this?
Opened #503