macholibre
macholibre copied to clipboard
LC_BUILD_VERSION not parsed and cause the parse to fail
you can try to parse sha256 "f05d79dd0e21acc96377e38617bfe070bf8e4c430dc9552808cfbff9841d6427"
the fix i think will work best is update parse_lcs with:
for lc_num in range(nlcs):
# Add this:
prev = self._file.tell()
cmd = self.get_int() # Load command type
cmd_size = self.get_int() # Size of load command
...
...
...
elif cmd == 'MAIN':
self.__macho['lcs'].append(self.parse_main(cmd, cmd_size))
# and this
elif cmd == "BUILD_VERSION":
self.__macho['lcs'].append(self.parse_version_min_os(cmd, cmd_size))
self._file.seek(prev + cmd_size)
else:
logger.warning(f"Failed to parse lc {cmd}, skipping it")
self._file.seek(prev + cmd_size)
or you can just add a parse for it, from header file:
#ifndef LC_BUILD_VERSION
define LC_BUILD_VERSION 0x32 /* build for platform min OS version */
/*
* The build_version_command contains the min OS version on which this
* binary was built to run for its platform. The list of known platforms and
* tool values following it.
*/
struct build_version_command {
uint32_t cmd; /* LC_BUILD_VERSION */
uint32_t cmdsize; /* sizeof(struct build_version_command) plus */
/* ntools * sizeof(struct build_tool_version) */
uint32_t platform; /* platform */
uint32_t minos; /* X.Y.Z is encoded in nibbles xxxx.yy.zz */
uint32_t sdk; /* X.Y.Z is encoded in nibbles xxxx.yy.zz */
uint32_t ntools; /* number of tool entries following this */
};
struct build_tool_version {
uint32_t tool; /* enum for the tool */
uint32_t version; /* version number of the tool */
};
/* Known values for the platform field above. */
#define PLATFORM_MACOS 1
#define PLATFORM_IOS 2
#define PLATFORM_TVOS 3
#define PLATFORM_WATCHOS 4
#define PLATFORM_BRIDGEOS 5
/* Known values for the tool field above. */
#define TOOL_CLANG 1
#define TOOL_SWIFT 2
#define TOOL_LD 3
#endif
I ran into this issue also. There are two related issues. The first is that commands that aren't supported in the big if/else block are't skipped. The second is that the check in dictionaries doesn't continue the loop and advance the read pointer either. I addressed both in this patch as it will be unlikely the code will ever support every segment/section in all files:
$ git diff
diff --git a/macholibre/parser.py b/macholibre/parser.py
index 7835843..1b8103f 100644
--- a/macholibre/parser.py
+++ b/macholibre/parser.py
@@ -239,6 +239,7 @@ class Parser():
nsects = self.get_int()
flags = self.get_int()
+
maxprot = dictionary.protections[maxprot & 0b111]
initprot = dictionary.protections[initprot & 0b111]
@@ -766,6 +767,7 @@ class Parser():
self.__macho['lcs'] = []
+
for _ in range(nlcs):
cmd = self.get_int() # Load command type
cmd_size = self.get_int() # Size of load command
@@ -782,11 +784,9 @@ class Parser():
if cmd in dictionary.loadcommands:
cmd = dictionary.loadcommands[cmd]
else:
- self.add_abnormality('Unknown load command "{}" at offset '
- '"{}".'.format(
- cmd, self.__file.tell() - 8))
-
- self.__file.read(cmd_size - 8) # skip load command
+ if cmd_size > 8:
+ self.__file.read(cmd_size - 8) # skip load command
+ continue
if cmd == 'SEGMENT' or cmd == 'SEGMENT_64':
self.__macho['lcs'].append(
@@ -855,6 +855,8 @@ class Parser():
self.__macho['lcs'].append(self.parse_rpath(cmd, cmd_size))
elif cmd == 'MAIN':
self.__macho['lcs'].append(self.parse_main(cmd, cmd_size))
+ else:
+ self.__file.read(cmd_size)
def parse_syms(self, offset, size, lc_symtab):
"""Parse symbol and string tables.