ValidationNotEqualError: /types/dictionary_page/seq/7: at pos 112632: validation failed: not equal, expected b'\\xcd\\xab\\xcd\\xab', but got b'\\xab\\xcd\\xabL'
Hello,
I got the following error while using the method getTable on a table containing transformations. The other tables in the PBI file could be downloaded without any issue.
I'm using version 0.3.2 with Python 3.12.
{ "name": "ValidationNotEqualError", "message": "/types/dictionary_page/seq/7: at pos 112632: validation failed: not equal, expected b'\xcd\xab\xcd\xab', but got b'\xab\xcd\xabL'", "stack": "--------------------------------------------------------------------------- ValidationNotEqualError Traceback (most recent call last) Cell In[24], line 2 1 table_name = 'Table1' ----> 2 table_contents = model.get_table(table_name) 3 display(table_contents.head()) File c:\Users\abcde\onedrive\Python\PBIX\.venv\Lib\site-packages\pbixray\core.py:20, in PBIXRay.get_table(self, table_name) 18 def get_table(self, table_name): 19 """Generates a DataFrame representation of the specified table.""" ---> 20 return self._vertipaq_decoder.get_table(table_name) File c:\Users\abcde\onedrive\Python\PBIX\.venv\Lib\site-packages\pbixray\vertipaq_decoder.py:202, in VertiPaqDecoder.get_table(self, table_name) 199 idfmeta_buffer = get_data_slice(self._data_model,column_metadata["IDF"] + 'meta') 200 meta = self._read_idfmeta(idfmeta_buffer) --> 202 column_data = self._get_column_data(column_metadata, meta) 203 # Handle special cases for certain data types 204 column_data = self._handle_special_cases(column_data, column_metadata["DataType"]) File c:\Users\abcde\onedrive\Python\PBIX\.venv\Lib\site-packages\pbixray\vertipaq_decoder.py:175, in VertiPaqDecoder._get_column_data(self, column_metadata, meta) 173 # Read and construct the dictionary with appropriate minimum data ID 174 min_data_id_adj = meta['min_data_id'] - null_adjustment --> 175 dictionary = self._read_dictionary(dictionary_buffer, min_data_id=meta['min_data_id']) 176 data_slice = get_data_slice(self._data_model,column_metadata["IDF"]) 177 return pd.Series(self._read_rle_bit_packed_hybrid(data_slice, meta['count_bit_packed'], min_data_id_adj , meta['bit_width'])).map(dictionary) File c:\Users\abcde\onedrive\Python\PBIX\.venv\Lib\site-packages\pbixray\vertipaq_decoder.py:120, in VertiPaqDecoder._read_dictionary(self, buffer, min_data_id) 118 """Reads a dictionary from a buffer.""" 119 with io.BytesIO(buffer) as f: --> 120 dictionary = ColumnDataDictionary.from_io(f) 122 if dictionary.dictionary_type == ColumnDataDictionary.DictionaryTypes.xm_type_string: 123 hashtable = {} File c:\Users\abcde\onedrive\Python\PBIX\.venv\Lib\site-packages\kaitaistruct.py:56, in KaitaiStruct.from_io(cls, io) 54 @classmethod 55 def from_io(cls, io): ---> 56 return cls(KaitaiStream(io)) File c:\Users\abcde\onedrive\Python\PBIX\.venv\Lib\site-packages\pbixray\column_data\dictionary.py:22, in ColumnDataDictionary.init(self, _io, _parent, _root) 20 self._parent = _parent 21 self._root = _root if _root else self ---> 22 self._read() File c:\Users\abcde\onedrive\Python\PBIX\.venv\Lib\site-packages\pbixray\column_data\dictionary.py:29, in ColumnDataDictionary._read(self) 27 _on = self.dictionary_type 28 if _on == ColumnDataDictionary.DictionaryTypes.xm_type_string: ---> 29 self.data = ColumnDataDictionary.StringData(self._io, self, self._root) 30 elif _on == ColumnDataDictionary.DictionaryTypes.xm_type_long: 31 self.data = ColumnDataDictionary.NumberData(self._io, self, self._root) File c:\Users\abcde\onedrive\Python\PBIX\.venv\Lib\site-packages\pbixray\column_data\dictionary.py:52, in ColumnDataDictionary.StringData.init(self, _io, _parent, _root) 50 self._parent = _parent 51 self._root = _root if _root else self ---> 52 self._read() File c:\Users\abcde\onedrive\Python\PBIX\.venv\Lib\site-packages\pbixray\column_data\dictionary.py:58, in ColumnDataDictionary.StringData._read(self) 56 self.dictionary_pages = [] 57 for i in range(self.page_layout_information.store_page_count): ---> 58 self.dictionary_pages.append(ColumnDataDictionary.DictionaryPage(self._io, self, self._root)) 60 self.dictionary_record_handles_vector_info = ColumnDataDictionary.DictionaryRecordHandlesVector(self._io, self, self._root) File c:\Users\abcde\onedrive\Python\PBIX\.venv\Lib\site-packages\pbixray\column_data\dictionary.py:171, in ColumnDataDictionary.DictionaryPage.init(self, _io, _parent, _root) 169 self._parent = _parent 170 self._root = _root if _root else self --> 171 self._read() File c:\Users\abcde\onedrive\Python\PBIX\.venv\Lib\site-packages\pbixray\column_data\dictionary.py:189, in ColumnDataDictionary.DictionaryPage._read(self) 187 self.string_store_end_mark = self._io.read_bytes(4) 188 if not self.string_store_end_mark == b\\xCD\xAB\xCD\xAB: --> 189 raise kaitaistruct.ValidationNotEqualError(b\\xCD\xAB\xCD\xAB, self.string_store_end_mark, self._io, u"/types/dictionary_page/seq/7") ValidationNotEqualError: /types/dictionary_page/seq/7: at pos 112632: validation failed: not equal, expected b'\xcd\xab\xcd\xab', but got b'\xab\xcd\xabL'"
}
Can confirm that I can replicate this error with Adventure Works, Internet Sales.pbix on Product table
Having the same issue. Is there a way to read tables excluding measures to alleviate this?