Complexity embedding error while intervalrelated feature extraction

Open melisasaygin opened this issue 5 months ago • 2 comments

Question and context The below code works for some respiration data but not for others. Prior, I visualize the peaks and troughs and all looks okay. Then, I try to extract the features per every task I have, and for some participants' data I encounter the error below.

Below is the code

`import pandas as pd import neurokit2 as nk

tasks = ['12.0a', '12.0b', '12.0c', '12.0d', '12.0e', '12.0f', '14.0a', '14.0b', '14.0c', '14.0d', '14.0e', '14.0f', '16.0a', '16.0b', '16.0c', '16.0d', '16.0e', '16.0f', '18.0a', '18.0b', '18.0c', '20.0a', '20.0b', '20.0c', '23.0a', '23.0b', '23.0c', '25.0a', '25.0b', '25.0c', '28.0a', '28.0b', '28.0c', '30.0a', '30.0b', '30.0c', 34.0, 36.0, 38.0, 40.0, 42.0, 44.0, 47.0, 49.0, 51.0, 53.0, 55.0, 57.0, 60.0, 62.0, 64.0, 66.0, 68.0, 70.0, '75.0a', '75.0b', '75.0c', '78.0a', '78.0b', '78.0c', 83.0, 85.0, 87.0, 89.0, 91.0, 93.0] participant_id = 25919 # Example participant ID processed_dataframes = all_processed_data[participant_id]

Initializing empty list

interval_features_list = []

Iterate over tasks and corresponding dataframes

for task, df in processed_dataframes.items(): # Filter the dataframe for the actual task duration task_specific_df = df[df['Task_Label_2'] == task]

# Extract interval-related features using nk.rsp_intervalrelated()
features_df = nk.rsp_intervalrelated(task_specific_df, sampling_rate=256)

# Add a column for the task label
features_df['Task_Label'] = task

# Append the features DataFrame to the list
interval_features_list.append(features_df)

Concatenate all feature DataFrames

interval_features_df = pd.concat(interval_features_list, ignore_index=True)

Display the DataFrame

interval_features_df`

Below is the error I get

`C:\Users\msa583\AppData\Local\anaconda3\lib\site-packages\neurokit2\rsp\rsp_intervalrelated.py:97: RuntimeWarning: Mean of empty slice output["RSP_Rate_Mean"] = np.nanmean(data["RSP_Rate"].values) C:\Users\msa583\AppData\Local\anaconda3\lib\site-packages\numpy\lib\nanfunctions.py:1878: RuntimeWarning: Degrees of freedom <= 0 for slice. var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof, C:\Users\msa583\AppData\Local\anaconda3\lib\site-packages\numpy\core_methods.py:265: RuntimeWarning: Degrees of freedom <= 0 for slice ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof, C:\Users\msa583\AppData\Local\anaconda3\lib\site-packages\numpy\core_methods.py:257: RuntimeWarning: invalid value encountered in double_scalars ret = ret.dtype.type(ret / rcount)

ValueError Traceback (most recent call last) Cell In[122], line 24 21 task_specific_df = df[df['Task_Label_2'] == task] 23 # Extract interval-related features using nk.rsp_intervalrelated() ---> 24 features_df = nk.rsp_intervalrelated(task_specific_df, sampling_rate=256) 26 # Add a column for the task label 27 features_df['Task_Label'] = task

File ~\AppData\Local\anaconda3\lib\site-packages\neurokit2\rsp\rsp_intervalrelated.py:65, in rsp_intervalrelated(data, sampling_rate) 63 # If one interval dataframe 64 if isinstance(data, pd.DataFrame): ---> 65 intervals = _rsp_intervalrelated_features(data, sampling_rate) 66 intervals = pd.DataFrame.from_dict(intervals, orient="index").T 68 # If data is a dict (containing multiple intervals)

File ~\AppData\Local\anaconda3\lib\site-packages\neurokit2\rsp\rsp_intervalrelated.py:98, in _rsp_intervalrelated_features(data, sampling_rate, output) 96 if "RSP_Rate" in colnames: 97 output["RSP_Rate_Mean"] = np.nanmean(data["RSP_Rate"].values) ---> 98 rrv = rsp_rrv(data, sampling_rate=sampling_rate) 99 output.update(rrv.to_dict(orient="records")[0]) 101 if "RSP_Amplitude" in colnames:

File ~\AppData\Local\anaconda3\lib\site-packages\neurokit2\rsp\rsp_rrv.py:118, in rsp_rrv(rsp_rate, troughs, sampling_rate, show, silent) 114 rrv.update(_rsp_rrv_time(bbi)) 115 rrv.update( 116 _rsp_rrv_frequency(rsp_period, sampling_rate=sampling_rate, show=show, silent=silent) 117 ) --> 118 rrv.update(rsp_rrv_nonlinear(bbi)) 120 rrv = pd.DataFrame.from_dict(rrv, orient="index").T.add_prefix("RRV") 122 if show:

File ~\AppData\Local\anaconda3\lib\site-packages\neurokit2\rsp\rsp_rrv.py:224, in _rsp_rrv_nonlinear(bbi) 214 out["SD2SD1"] = out["SD2"] / out["SD1"] 216 # CSI / CVI 217 # T = 4 * out["SD1"] 218 # L = 4 * out["SD2"] (...) 222 223 # Entropy --> 224 out["ApEn"] = entropy_approximate(bbi, dimension=2)[0] 225 out["SampEn"] = entropy_sample(bbi, dimension=2, tolerance=0.2 * np.std(bbi, ddof=1))[0] 227 # DFA

File ~\AppData\Local\anaconda3\lib\site-packages\neurokit2\complexity\entropy_approximate.py:102, in entropy_approximate(signal, delay, dimension, tolerance, corrected, **kwargs) 98 # Compute index 99 if corrected is False: 100 # ApEn is implemented in 'utils_entropy.py' to avoid circular imports 101 # as one of the method for optimizing tolerance relies on ApEn --> 102 out, _ = _entropy_apen(signal, delay, dimension, info["Tolerance"], **kwargs) 103 else: 104 out = _entropy_capen(signal, delay, dimension, info["Tolerance"], **kwargs)

File ~\AppData\Local\anaconda3\lib\site-packages\neurokit2\complexity\utils_entropy.py:14, in _entropy_apen(signal, delay, dimension, tolerance, **kwargs) 13 def _entropy_apen(signal, delay, dimension, tolerance, **kwargs): ---> 14 phi, info = _phi( 15 signal, 16 delay=delay, 17 dimension=dimension, 18 tolerance=tolerance, 19 approximate=True, 20 **kwargs, 21 ) 23 return np.abs(np.subtract(phi[0], phi[1])), info

File ~\AppData\Local\anaconda3\lib\site-packages\neurokit2\complexity\utils_entropy.py:57, in _phi(signal, delay, dimension, tolerance, distance, approximate, fuzzy, kdtree1, kdtree2, **kwargs) 45 # Embed signal at m and m+1 46 embedded1, count1, kdtree1 = _get_count( 47 signal, 48 delay, (...) 54 kdtree=kdtree1, 55 ) ---> 57 embedded2, count2, kdtree2 = _get_count( 58 signal, 59 delay, 60 dimension + 1, 61 tolerance, 62 distance=distance, 63 approximate=True, 64 fuzzy=fuzzy, 65 kdtree=kdtree2, 66 ) 68 # Initialize phi 69 phi = np.zeros(2)

File ~\AppData\Local\anaconda3\lib\site-packages\neurokit2\complexity\utils_entropy.py:123, in _get_count(signal, delay, dimension, tolerance, distance, approximate, fuzzy, kdtree, n, **kwargs) 115 """ 116 This is usually the bottleneck for several complexity methods, in particular in the counting. 117 That's why we allow the possibility of giving kdtrees as pre-computed (used in the optimization 118 of tolerance via MaxApEn which computes iteratively the value with multiple tolerances). 119 However, more improvements are welcome! 120 """ 121 # Get embedded 122 # ------------------- --> 123 embedded = complexity_embedding(signal, delay=delay, dimension=dimension) 124 if approximate is False: 125 embedded = embedded[:-1] # Removes the last line

File ~\AppData\Local\anaconda3\lib\site-packages\neurokit2\complexity\utils_complexity_embedding.py:141, in complexity_embedding(signal, delay, dimension, show, **kwargs) 139 dimension = int(dimension) 140 if dimension * delay > N: --> 141 raise ValueError( 142 "NeuroKit error: complexity_embedding(): dimension * delay should be lower than", 143 " the length of the signal.", 144 ) 145 if delay < 1: 146 raise ValueError("NeuroKit error: complexity_embedding(): 'delay' has to be at least 1.")

ValueError: ('NeuroKit error: complexity_embedding(): dimension * delay should be lower than', ' the length of the signal.')`

Do you know how I can resolve this?

Jan 10 '24 17:01 melisasaygin

NeuroKit NeuroKit copied to clipboard

Complexity embedding error while intervalrelated feature extraction

Initializing empty list

Iterate over tasks and corresponding dataframes

Concatenate all feature DataFrames

Display the DataFrame

NeuroKit
NeuroKit copied to clipboard