NeuroKit icon indicating copy to clipboard operation
NeuroKit copied to clipboard

Complexity embedding error while intervalrelated feature extraction

Open melisasaygin opened this issue 5 months ago • 2 comments

Question and context The below code works for some respiration data but not for others. Prior, I visualize the peaks and troughs and all looks okay. Then, I try to extract the features per every task I have, and for some participants' data I encounter the error below.

Below is the code

`import pandas as pd import neurokit2 as nk

tasks = ['12.0a', '12.0b', '12.0c', '12.0d', '12.0e', '12.0f', '14.0a', '14.0b', '14.0c', '14.0d', '14.0e', '14.0f', '16.0a', '16.0b', '16.0c', '16.0d', '16.0e', '16.0f', '18.0a', '18.0b', '18.0c', '20.0a', '20.0b', '20.0c', '23.0a', '23.0b', '23.0c', '25.0a', '25.0b', '25.0c', '28.0a', '28.0b', '28.0c', '30.0a', '30.0b', '30.0c', 34.0, 36.0, 38.0, 40.0, 42.0, 44.0, 47.0, 49.0, 51.0, 53.0, 55.0, 57.0, 60.0, 62.0, 64.0, 66.0, 68.0, 70.0, '75.0a', '75.0b', '75.0c', '78.0a', '78.0b', '78.0c', 83.0, 85.0, 87.0, 89.0, 91.0, 93.0] participant_id = 25919 # Example participant ID processed_dataframes = all_processed_data[participant_id]

Initializing empty list

interval_features_list = []

Iterate over tasks and corresponding dataframes

for task, df in processed_dataframes.items(): # Filter the dataframe for the actual task duration task_specific_df = df[df['Task_Label_2'] == task]

# Extract interval-related features using nk.rsp_intervalrelated()
features_df = nk.rsp_intervalrelated(task_specific_df, sampling_rate=256)

# Add a column for the task label
features_df['Task_Label'] = task

# Append the features DataFrame to the list
interval_features_list.append(features_df)

Concatenate all feature DataFrames

interval_features_df = pd.concat(interval_features_list, ignore_index=True)

Display the DataFrame

interval_features_df`

Below is the error I get

`C:\Users\msa583\AppData\Local\anaconda3\lib\site-packages\neurokit2\rsp\rsp_intervalrelated.py:97: RuntimeWarning: Mean of empty slice output["RSP_Rate_Mean"] = np.nanmean(data["RSP_Rate"].values) C:\Users\msa583\AppData\Local\anaconda3\lib\site-packages\numpy\lib\nanfunctions.py:1878: RuntimeWarning: Degrees of freedom <= 0 for slice. var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof, C:\Users\msa583\AppData\Local\anaconda3\lib\site-packages\numpy\core_methods.py:265: RuntimeWarning: Degrees of freedom <= 0 for slice ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof, C:\Users\msa583\AppData\Local\anaconda3\lib\site-packages\numpy\core_methods.py:257: RuntimeWarning: invalid value encountered in double_scalars ret = ret.dtype.type(ret / rcount)

ValueError Traceback (most recent call last) Cell In[122], line 24 21 task_specific_df = df[df['Task_Label_2'] == task] 23 # Extract interval-related features using nk.rsp_intervalrelated() ---> 24 features_df = nk.rsp_intervalrelated(task_specific_df, sampling_rate=256) 26 # Add a column for the task label 27 features_df['Task_Label'] = task

File ~\AppData\Local\anaconda3\lib\site-packages\neurokit2\rsp\rsp_intervalrelated.py:65, in rsp_intervalrelated(data, sampling_rate) 63 # If one interval dataframe 64 if isinstance(data, pd.DataFrame): ---> 65 intervals = _rsp_intervalrelated_features(data, sampling_rate) 66 intervals = pd.DataFrame.from_dict(intervals, orient="index").T 68 # If data is a dict (containing multiple intervals)

File ~\AppData\Local\anaconda3\lib\site-packages\neurokit2\rsp\rsp_intervalrelated.py:98, in _rsp_intervalrelated_features(data, sampling_rate, output) 96 if "RSP_Rate" in colnames: 97 output["RSP_Rate_Mean"] = np.nanmean(data["RSP_Rate"].values) ---> 98 rrv = rsp_rrv(data, sampling_rate=sampling_rate) 99 output.update(rrv.to_dict(orient="records")[0]) 101 if "RSP_Amplitude" in colnames:

File ~\AppData\Local\anaconda3\lib\site-packages\neurokit2\rsp\rsp_rrv.py:118, in rsp_rrv(rsp_rate, troughs, sampling_rate, show, silent) 114 rrv.update(_rsp_rrv_time(bbi)) 115 rrv.update( 116 _rsp_rrv_frequency(rsp_period, sampling_rate=sampling_rate, show=show, silent=silent) 117 ) --> 118 rrv.update(rsp_rrv_nonlinear(bbi)) 120 rrv = pd.DataFrame.from_dict(rrv, orient="index").T.add_prefix("RRV") 122 if show:

File ~\AppData\Local\anaconda3\lib\site-packages\neurokit2\rsp\rsp_rrv.py:224, in _rsp_rrv_nonlinear(bbi) 214 out["SD2SD1"] = out["SD2"] / out["SD1"] 216 # CSI / CVI 217 # T = 4 * out["SD1"] 218 # L = 4 * out["SD2"] (...) 222 223 # Entropy --> 224 out["ApEn"] = entropy_approximate(bbi, dimension=2)[0] 225 out["SampEn"] = entropy_sample(bbi, dimension=2, tolerance=0.2 * np.std(bbi, ddof=1))[0] 227 # DFA

File ~\AppData\Local\anaconda3\lib\site-packages\neurokit2\complexity\entropy_approximate.py:102, in entropy_approximate(signal, delay, dimension, tolerance, corrected, **kwargs) 98 # Compute index 99 if corrected is False: 100 # ApEn is implemented in 'utils_entropy.py' to avoid circular imports 101 # as one of the method for optimizing tolerance relies on ApEn --> 102 out, _ = _entropy_apen(signal, delay, dimension, info["Tolerance"], **kwargs) 103 else: 104 out = _entropy_capen(signal, delay, dimension, info["Tolerance"], **kwargs)

File ~\AppData\Local\anaconda3\lib\site-packages\neurokit2\complexity\utils_entropy.py:14, in _entropy_apen(signal, delay, dimension, tolerance, **kwargs) 13 def _entropy_apen(signal, delay, dimension, tolerance, **kwargs): ---> 14 phi, info = _phi( 15 signal, 16 delay=delay, 17 dimension=dimension, 18 tolerance=tolerance, 19 approximate=True, 20 **kwargs, 21 ) 23 return np.abs(np.subtract(phi[0], phi[1])), info

File ~\AppData\Local\anaconda3\lib\site-packages\neurokit2\complexity\utils_entropy.py:57, in _phi(signal, delay, dimension, tolerance, distance, approximate, fuzzy, kdtree1, kdtree2, **kwargs) 45 # Embed signal at m and m+1 46 embedded1, count1, kdtree1 = _get_count( 47 signal, 48 delay, (...) 54 kdtree=kdtree1, 55 ) ---> 57 embedded2, count2, kdtree2 = _get_count( 58 signal, 59 delay, 60 dimension + 1, 61 tolerance, 62 distance=distance, 63 approximate=True, 64 fuzzy=fuzzy, 65 kdtree=kdtree2, 66 ) 68 # Initialize phi 69 phi = np.zeros(2)

File ~\AppData\Local\anaconda3\lib\site-packages\neurokit2\complexity\utils_entropy.py:123, in _get_count(signal, delay, dimension, tolerance, distance, approximate, fuzzy, kdtree, n, **kwargs) 115 """ 116 This is usually the bottleneck for several complexity methods, in particular in the counting. 117 That's why we allow the possibility of giving kdtrees as pre-computed (used in the optimization 118 of tolerance via MaxApEn which computes iteratively the value with multiple tolerances). 119 However, more improvements are welcome! 120 """ 121 # Get embedded 122 # ------------------- --> 123 embedded = complexity_embedding(signal, delay=delay, dimension=dimension) 124 if approximate is False: 125 embedded = embedded[:-1] # Removes the last line

File ~\AppData\Local\anaconda3\lib\site-packages\neurokit2\complexity\utils_complexity_embedding.py:141, in complexity_embedding(signal, delay, dimension, show, **kwargs) 139 dimension = int(dimension) 140 if dimension * delay > N: --> 141 raise ValueError( 142 "NeuroKit error: complexity_embedding(): dimension * delay should be lower than", 143 " the length of the signal.", 144 ) 145 if delay < 1: 146 raise ValueError("NeuroKit error: complexity_embedding(): 'delay' has to be at least 1.")

ValueError: ('NeuroKit error: complexity_embedding(): dimension * delay should be lower than', ' the length of the signal.')`

Do you know how I can resolve this?

melisasaygin avatar Jan 10 '24 17:01 melisasaygin