LO-Shot
LO-Shot copied to clipboard
Unable to find lines using recursive_regression for higher dimensions
I was experimenting with the experiments, specifically trying to generate lines for data points with high dimensions. I was able to reproduce the error on this notebook. The snippet can be run under the Multi-dim Experiments
section.
seeds=range(500)
num_samples=11 #np.array([10,20,30,40,50,60,70,80,90,100])*2
#points_surf = np.random.poisson(lam=7, size=(49,2))*20
#print(len(np.unique(points_surf, axis=1)))
num_classes=3
num_lines=3
#num_dims=2
for num_dims in range(2,11):
fail_count=0
tc_list =[]
tp_list =[]
knnc_list=[]
nline_list=[]
for seed in tqdm(seeds):
# hardcode dimension count to 768
clist,plist,knn_correct, true_num_lines = multiD_experiment(num_samples, num_classes, num_lines, seed, False, brute=False, max_diff=0.01, center_box=(-20,20), num_dims=768)
total_points = sum(plist)
total_correct = sum(clist)
tp_list.append(total_points)
tc_list.append(total_correct)
knnc_list.append(knn_correct)
nline_list.append(true_num_lines)
#print("Correctly predicted: {0}/{1}".format(total_correct,total_points))
#print("Vanilla kNN predicted: {0}/{1}".format(knn_correct,total_points))
if len(tp_list)==100:
break
print("Dimension: {0}".format(num_dims))
print(np.mean(tc_list), np.mean(knnc_list), np.mean(nline_list), fail_count )
print(np.std(tc_list), np.std(knnc_list))
I got the following output, which seems to suggest this problem is not solvable mathematically.
0%| | 0/500 [00:00<?, ?it/s] 0 1 2 ... 766 767 My Hopes And Dreams
0 18.608422 11.431569 7.906970 ... 0.177640 8.986997 1.0
1 18.728446 13.081384 4.447207 ... 1.386950 9.313738 1.0
2 2.167781 7.601209 3.927630 ... 19.934411 -3.890110 0.0
3 1.765849 7.463933 5.255089 ... 20.456780 -5.806031 0.0
4 8.885765 15.784543 13.242956 ... 6.850871 -5.471370 2.0
5 18.770369 9.909713 6.214410 ... -1.898342 7.821227 1.0
6 20.053602 11.852070 8.755898 ... -1.006263 8.715530 1.0
7 8.621864 16.963792 13.193678 ... 7.565738 -2.090369 2.0
8 2.566521 8.031120 4.064921 ... 18.256428 -4.626897 0.0
9 0.434333 9.727282 3.156674 ... 20.090864 -3.733480 0.0
10 9.285816 15.687726 12.829242 ... 7.120174 -3.397527 2.0
[11 rows x 769 columns]
[1] 3 2 3
[1] 2
R[write to console]: Error in solve.default(t(x) %*% x) :
system is computationally singular: reciprocal condition number = 6.69256e-22
Error in solve.default(t(x) %*% x) :
system is computationally singular: reciprocal condition number = 6.69256e-22
0%| | 0/500 [00:01<?, ?it/s]
---------------------------------------------------------------------------
RRuntimeError Traceback (most recent call last)
/usr/local/lib/python3.7/dist-packages/rpy2/ipython/rmagic.py in eval(self, code)
267 # Need the newline in case the last line in code is a comment.
--> 268 value, visible = ro.r("withVisible({%s\n})" % code)
269 except (ri.embedded.RRuntimeError, ValueError) as exception:
13 frames
/usr/local/lib/python3.7/dist-packages/rpy2/robjects/__init__.py in __call__(self, string)
437 p = rinterface.parse(string)
--> 438 res = self.eval(p)
439 return conversion.rpy2py(res)
/usr/local/lib/python3.7/dist-packages/rpy2/robjects/functions.py in __call__(self, *args, **kwargs)
198 return (super(SignatureTranslatedFunction, self)
--> 199 .__call__(*args, **kwargs))
200
/usr/local/lib/python3.7/dist-packages/rpy2/robjects/functions.py in __call__(self, *args, **kwargs)
124 new_kwargs[k] = conversion.py2rpy(v)
--> 125 res = super(Function, self).__call__(*new_args, **new_kwargs)
126 res = conversion.rpy2py(res)
/usr/local/lib/python3.7/dist-packages/rpy2/rinterface_lib/conversion.py in _(*args, **kwargs)
44 def _(*args, **kwargs):
---> 45 cdata = function(*args, **kwargs)
46 # TODO: test cdata is of the expected CType
/usr/local/lib/python3.7/dist-packages/rpy2/rinterface.py in __call__(self, *args, **kwargs)
679 if error_occured[0]:
--> 680 raise embedded.RRuntimeError(_rinterface._geterrmessage())
681 return res
RRuntimeError: Error in solve.default(t(x) %*% x) :
system is computationally singular: reciprocal condition number = 6.69256e-22
During handling of the above exception, another exception occurred:
RInterpreterError Traceback (most recent call last)
<ipython-input-27-1abdead09a56> in <module>()
15 nline_list=[]
16 for seed in tqdm(seeds):
---> 17 clist,plist,knn_correct, true_num_lines = multiD_experiment(num_samples, num_classes, num_lines, seed, False, brute=False, max_diff=0.01, center_box=(-20,20), num_dims=768)
18 total_points = sum(plist)
19 total_correct = sum(clist)
<ipython-input-19-91b63ce35f6a> in multiD_experiment(num_samples, num_classes, num_lines, random_state, visualize, brute, max_diff, center_box, num_dims)
29 lines=[line_order(centroids, np.array(line)) for line in find_lines_brute(centroids,num_lines)]
30 else:
---> 31 lines = [line_order_no_endpoints(centroids, np.array(line)) for line in find_lines_R_multiD(dat,centroids,dims=num_dims, k=num_lines, max_diff=max_diff)]
32
33 if visualize:
<ipython-input-26-8869bbce1340> in find_lines_R_multiD(dat, centroids, dims, k, max_diff)
387 print(df)
388 #result1=[]
--> 389 get_ipython().magic('R -i df -i k -i max_diff -i dims -o result1 result1 <- recursive_reg(as.matrix(df[,-(dims+1)]), df[,dims+1]+1, k = k, max_diff = max_diff)')
390 lines=[list(r) for r in result1]
391 #print(lines)
/usr/local/lib/python3.7/dist-packages/IPython/core/interactiveshell.py in magic(self, arg_s)
2158 magic_name, _, magic_arg_s = arg_s.partition(' ')
2159 magic_name = magic_name.lstrip(prefilter.ESC_MAGIC)
-> 2160 return self.run_line_magic(magic_name, magic_arg_s)
2161
2162 #-------------------------------------------------------------------------
/usr/local/lib/python3.7/dist-packages/IPython/core/interactiveshell.py in run_line_magic(self, magic_name, line)
2079 kwargs['local_ns'] = sys._getframe(stack_depth).f_locals
2080 with self.builtin_trap:
-> 2081 result = fn(*args,**kwargs)
2082 return result
2083
<decorator-gen-119> in R(self, line, cell, local_ns)
/usr/local/lib/python3.7/dist-packages/IPython/core/magic.py in <lambda>(f, *a, **k)
186 # but it's overkill for just that one bit of state.
187 def magic_deco(arg):
--> 188 call = lambda f, *a, **k: f(*a, **k)
189
190 if callable(arg):
/usr/local/lib/python3.7/dist-packages/rpy2/ipython/rmagic.py in R(self, line, cell, local_ns)
781 if not e.stdout.endswith(e.err):
782 print(e.err)
--> 783 raise e
784 finally:
785 if self.device in ['png', 'svg']:
/usr/local/lib/python3.7/dist-packages/rpy2/ipython/rmagic.py in R(self, line, cell, local_ns)
754 if line_mode:
755 for line in code.split(';'):
--> 756 text_result, result, visible = self.eval(line)
757 text_output += text_result
758 if text_result:
/usr/local/lib/python3.7/dist-packages/rpy2/ipython/rmagic.py in eval(self, code)
271 warning_or_other_msg = self.flush()
272 raise RInterpreterError(code, str(exception),
--> 273 warning_or_other_msg)
274 text_output = self.flush()
275 return text_output, value, visible[0]
RInterpreterError: Failed to parse and evaluate line 'result1 <- recursive_reg(as.matrix(df[,-(dims+1)]), df[,dims+1]+1, k = k, max_diff = max_diff)'.
R error message: 'Error in solve.default(t(x) %*% x) : \n system is computationally singular: reciprocal condition number = 6.69256e-22'
Please let me know how to proceed for such cases. Thanks!
In the add_classes function, there is a line that computes the inverse of the dot product of X matrix (the covariate matrix).
Solve(t(x) %*% x)
It looks like you have used x where nrow(x) < ncol(x), which renders t(x) %*% x singular, and hence not invertible.
A work-around could be computing generalised inverse (MASS::ginv in R) instead of solve.
Can confirm that the fix works, I will raise a patch for this issue in the meanwhile