ProteomicsVisualization
ProteomicsVisualization copied to clipboard
figure1:reading .raw data
The original code may be wrong, According to my test, this is the correct version:
def load_thermo_raw(
raw_file,
most_abundant=1000
):
"""
Load a Thermo raw file and extract all spectra
"""
#from pyrawfilereader import RawFileReader
rawfile = RawFileReader(raw_file)
spec_indices = np.array(
range(rawfile.FirstSpectrumNumber, rawfile.LastSpectrumNumber + 1)
)
scan_list = []
rt_list = []
mass_list = []
int_list = []
ms_list = []
prec_mzs_list = []
mono_mzs_list = []
charge_list = []
for i in tqdm((spec_indices)):
i = int(i)
try:
ms_order = rawfile.GetMSOrderForScanNum(i).value__
rt = rawfile.RTFromScanNum(i)
if ms_order == 2:
prec_mz = rawfile.GetPrecursorMassForScanNum(i, 0)
mono_mz, charge = rawfile.GetMS2MonoMzAndChargeFromScanNum(i)
else:
prec_mz, mono_mz, charge = 0,0,0
masses, intensity = rawfile.GetCentroidMassListFromScanNum(i)
if ms_order == 2:
masses, intensity = get_most_abundant(masses, intensity, most_abundant)
scan_list.append(i)
rt_list.append(rt)
mass_list.append(np.array(masses))
int_list.append(np.array(intensity, dtype=np.int64))
ms_list.append(ms_order)
prec_mzs_list.append(prec_mz)
mono_mzs_list.append(mono_mz)
charge_list.append(charge)
except KeyboardInterrupt as e:
raise e
except SystemExit as e:
raise e
except Exception as e:
logging.info(f"Bad scan={i} in raw file '{raw_file}'")
scan_list_ms1 = [scan_list[i] for i, _ in enumerate(ms_list) if _ == 1]
rt_list_ms1 = [rt_list[i] for i, _ in enumerate(ms_list) if _ == 1]
mass_list_ms1 = [mass_list[i] for i, _ in enumerate(ms_list) if _ == 1]
int_list_ms1 = [int_list[i] for i, _ in enumerate(ms_list) if _ == 1]
ms_list_ms1 = [ms_list[i] for i, _ in enumerate(ms_list) if _ == 1]
scan_list_ms2 = [scan_list[i] for i, _ in enumerate(ms_list) if _ == 2]
rt_list_ms2 = [rt_list[i] for i, _ in enumerate(ms_list) if _ == 2]
mass_list_ms2 = [mass_list[i] for i, _ in enumerate(ms_list) if _ == 2]
int_list_ms2 = [int_list[i] for i, _ in enumerate(ms_list) if _ == 2]
ms_list_ms2 = [ms_list[i] for i, _ in enumerate(ms_list) if _ == 2]
mono_mzs2 = [mono_mzs_list[i] for i, _ in enumerate(ms_list) if _ == 2]
charge2 = [charge_list[i] for i, _ in enumerate(ms_list) if _ == 2]
prec_mass_list2 = [
calculate_mass(mono_mzs_list[i], charge_list[i])
for i, _ in enumerate(ms_list)
if _ == 2
]
check_sanity(mass_list)
data = {}
data["scan_list_ms1"] = np.array(scan_list_ms1)
data["rt_list_ms1"] = np.array(rt_list_ms1)
data["mass_list_ms1"] = np.array(mass_list_ms1, dtype=object)
data["int_list_ms1"] = np.array(int_list_ms1, dtype=object)
data["ms_list_ms1"] = np.array(ms_list_ms1)
data["scan_list_ms2"] = np.array(scan_list_ms2)
data["rt_list_ms2"] = np.array(rt_list_ms2)
data["mass_list_ms2"] = mass_list_ms2
data["int_list_ms2"] = int_list_ms2
data["ms_list_ms2"] = np.array(ms_list_ms2)
data["prec_mass_list2"] = np.array(prec_mass_list2)
data["mono_mzs2"] = np.array(mono_mzs2)
data["charge_ms2"] = np.array(charge2)
rawfile.Close()
return data