FEDOT
FEDOT copied to clipboard
DataMerger fails when merging indices with different types
Failing example. Here it's seen that str
index can't be merged with int
index. Possibly this would also fail for two non-numeric indices.
2022-07-28T15:17:14.1922292Z test/unit/utilities/test_project_import_export.py:88:
2022-07-28T15:17:14.1922714Z _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2022-07-28T15:17:14.1923082Z fedot/api/main.py:164: in fit
2022-07-28T15:17:14.1923533Z self._train_pipeline_on_full_dataset(recommendations, full_train_not_preprocessed)
2022-07-28T15:17:14.1924029Z fedot/api/main.py:422: in _train_pipeline_on_full_dataset
2022-07-28T15:17:14.1924537Z n_jobs=self.params.api_params['n_jobs'],
2022-07-28T15:17:14.1924958Z fedot/core/pipelines/pipeline.py:161: in fit
2022-07-28T15:17:14.1925366Z use_fitted_operations=use_fitted)
2022-07-28T15:17:14.1925818Z fedot/core/pipelines/pipeline.py:116: in _fit
2022-07-28T15:17:14.1926265Z train_predicted = self.root_node.fit(input_data=input_data)
2022-07-28T15:17:14.1926664Z fedot/core/pipelines/node.py:335: in fit
2022-07-28T15:17:14.1927276Z secondary_input = self._input_from_parents(input_data=input_data, parent_operation='fit')
2022-07-28T15:17:14.1927789Z fedot/core/pipelines/node.py:363: in _input_from_parents
2022-07-28T15:17:14.1928185Z parent_operation)
2022-07-28T15:17:14.1928575Z fedot/core/pipelines/node.py:402: in _combine_parents
2022-07-28T15:17:14.1929017Z prediction = parent.fit(input_data=input_data)
2022-07-28T15:17:14.1929430Z fedot/core/pipelines/node.py:335: in fit
2022-07-28T15:17:14.1930029Z secondary_input = self._input_from_parents(input_data=input_data, parent_operation='fit')
2022-07-28T15:17:14.1930525Z fedot/core/pipelines/node.py:363: in _input_from_parents
2022-07-28T15:17:14.1930915Z parent_operation)
2022-07-28T15:17:14.1931297Z fedot/core/pipelines/node.py:402: in _combine_parents
2022-07-28T15:17:14.1931738Z prediction = parent.fit(input_data=input_data)
2022-07-28T15:17:14.1932151Z fedot/core/pipelines/node.py:335: in fit
2022-07-28T15:17:14.1932749Z secondary_input = self._input_from_parents(input_data=input_data, parent_operation='fit')
2022-07-28T15:17:14.1933247Z fedot/core/pipelines/node.py:365: in _input_from_parents
2022-07-28T15:17:14.1933699Z secondary_input = DataMerger.get(parent_results).merge()
2022-07-28T15:17:14.1934152Z fedot/core/data/merge/data_merger.py:56: in get
2022-07-28T15:17:14.1934528Z return cls(outputs, data_type)
2022-07-28T15:17:14.1934935Z fedot/core/data/merge/data_merger.py:30: in __init__
2022-07-28T15:17:14.1935369Z self.common_indices = find_common_elements(*idx_list)
2022-07-28T15:17:14.1935822Z fedot/core/data/array_utilities.py:10: in find_common_elements
2022-07-28T15:17:14.1936405Z common_elements = reduce(np.intersect1d, indices[1:], indices[0])
2022-07-28T15:17:14.1936873Z <__array_function__ internals>:6: in intersect1d
2022-07-28T15:17:14.1937204Z ???
2022-07-28T15:17:14.1937537Z _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2022-07-28T15:17:14.1937801Z
2022-07-28T15:17:14.1938176Z ar1 = array(['Alpen gold', 'Alpen gold1', 'Rossia shedraya dusha',
2022-07-28T15:17:14.1938848Z 'Rossia shedraya dusha1', 'Shipuchka', 'Shipuchka1...acks1', 'Werthers Original Caramel',
2022-07-28T15:17:14.1939461Z 'Werthers Original Caramel1', 'Whoppers', 'Whoppers1'],
2022-07-28T15:17:14.1939851Z dtype=object)
2022-07-28T15:17:14.1940285Z ar2 = array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
2022-07-28T15:17:14.1940616Z 17, 18, 19])
2022-07-28T15:17:14.1940982Z assume_unique = False, return_indices = False
2022-07-28T15:17:14.1941265Z
2022-07-28T15:17:14.1941512Z @array_function_dispatch(_intersect1d_dispatcher)
2022-07-28T15:17:14.1941956Z def intersect1d(ar1, ar2, assume_unique=False, return_indices=False):
2022-07-28T15:17:14.1942422Z """
2022-07-28T15:17:14.1942780Z Find the intersection of two arrays.
2022-07-28T15:17:14.1943124Z
2022-07-28T15:17:14.1943516Z Return the sorted, unique values that are in both of the input arrays.
2022-07-28T15:17:14.1943892Z
2022-07-28T15:17:14.1944191Z Parameters
2022-07-28T15:17:14.1944546Z ----------
2022-07-28T15:17:14.1944881Z ar1, ar2 : array_like
2022-07-28T15:17:14.1945280Z Input arrays. Will be flattened if not already 1D.
2022-07-28T15:17:14.1945678Z assume_unique : bool
2022-07-28T15:17:14.1946097Z If True, the input arrays are both assumed to be unique, which
2022-07-28T15:17:14.1946562Z can speed up the calculation. Default is False.
2022-07-28T15:17:14.1946936Z return_indices : bool
2022-07-28T15:17:14.1947352Z If True, the indices which correspond to the intersection of the two
2022-07-28T15:17:14.1947857Z arrays are returned. The first instance of a value is used if there are
2022-07-28T15:17:14.1948299Z multiple. Default is False.
2022-07-28T15:17:14.1948636Z
2022-07-28T15:17:14.1949000Z .. versionadded:: 1.15.0
2022-07-28T15:17:14.1949310Z
2022-07-28T15:17:14.1949600Z Returns
2022-07-28T15:17:14.1949957Z -------
2022-07-28T15:17:14.1950290Z intersect1d : ndarray
2022-07-28T15:17:14.1950844Z Sorted 1D array of common and unique elements.
2022-07-28T15:17:14.1951220Z comm1 : ndarray
2022-07-28T15:17:14.1951622Z The indices of the first occurrences of the common values in `ar1`.
2022-07-28T15:17:14.1952092Z Only provided if `return_indices` is True.
2022-07-28T15:17:14.1952465Z comm2 : ndarray
2022-07-28T15:17:14.1952863Z The indices of the first occurrences of the common values in `ar2`.
2022-07-28T15:17:14.1953327Z Only provided if `return_indices` is True.
2022-07-28T15:17:14.1953749Z
2022-07-28T15:17:14.1954015Z
2022-07-28T15:17:14.1954298Z See Also
2022-07-28T15:17:14.1954665Z --------
2022-07-28T15:17:14.1955087Z numpy.lib.arraysetops : Module with a number of other functions for
2022-07-28T15:17:14.1955566Z performing set operations on arrays.
2022-07-28T15:17:14.1955907Z
2022-07-28T15:17:14.1956201Z Examples
2022-07-28T15:17:14.1956552Z --------
2022-07-28T15:17:14.1956909Z >>> np.intersect1d([1, 3, 4, 3], [3, 1, 2, 1])
2022-07-28T15:17:14.1957404Z array([1, 3])
2022-07-28T15:17:14.1958062Z
2022-07-28T15:17:14.1958418Z To intersect more than two arrays, use functools.reduce:
2022-07-28T15:17:14.1958687Z
2022-07-28T15:17:14.1958903Z >>> from functools import reduce
2022-07-28T15:17:14.1959204Z >>> reduce(np.intersect1d, ([1, 3, 4, 3], [3, 1, 2, 1], [6, 3, 4, 2]))
2022-07-28T15:17:14.1959650Z array([3])
2022-07-28T15:17:14.1959849Z
2022-07-28T15:17:14.1960105Z To return the indices of the values common to the input arrays
2022-07-28T15:17:14.1960415Z along with the intersected values:
2022-07-28T15:17:14.1960641Z
2022-07-28T15:17:14.1960836Z >>> x = np.array([1, 1, 2, 3, 4])
2022-07-28T15:17:14.1961075Z >>> y = np.array([2, 1, 4, 6])
2022-07-28T15:17:14.1961369Z >>> xy, x_ind, y_ind = np.intersect1d(x, y, return_indices=True)
2022-07-28T15:17:14.1961628Z >>> x_ind, y_ind
2022-07-28T15:17:14.1961860Z (array([0, 2, 4]), array([1, 0, 2]))
2022-07-28T15:17:14.1962093Z >>> xy, x[x_ind], y[y_ind]
2022-07-28T15:17:14.1962332Z (array([1, 2, 4]), array([1, 2, 4]), array([1, 2, 4]))
2022-07-28T15:17:14.1962549Z
2022-07-28T15:17:14.1962724Z """
2022-07-28T15:17:14.1962921Z ar1 = np.asanyarray(ar1)
2022-07-28T15:17:14.1963156Z ar2 = np.asanyarray(ar2)
2022-07-28T15:17:14.1963358Z
2022-07-28T15:17:14.1963553Z if not assume_unique:
2022-07-28T15:17:14.1963854Z if return_indices:
2022-07-28T15:17:14.1964115Z ar1, ind1 = unique(ar1, return_index=True)
2022-07-28T15:17:14.1964403Z ar2, ind2 = unique(ar2, return_index=True)
2022-07-28T15:17:14.1964618Z else:
2022-07-28T15:17:14.1964825Z ar1 = unique(ar1)
2022-07-28T15:17:14.1965042Z ar2 = unique(ar2)
2022-07-28T15:17:14.1965233Z else:
2022-07-28T15:17:14.1965433Z ar1 = ar1.ravel()
2022-07-28T15:17:14.1965646Z ar2 = ar2.ravel()
2022-07-28T15:17:14.1965829Z
2022-07-28T15:17:14.1966048Z aux = np.concatenate((ar1, ar2))
2022-07-28T15:17:14.1966290Z if return_indices:
2022-07-28T15:17:14.1966764Z aux_sort_indices = np.argsort(aux, kind='mergesort')
2022-07-28T15:17:14.1967042Z aux = aux[aux_sort_indices]
2022-07-28T15:17:14.1967256Z else:
2022-07-28T15:17:14.1967442Z > aux.sort()
2022-07-28T15:17:14.1967829Z E TypeError: '<' not supported between instances of 'int' and 'str'
2022-07-28T15:17:14.1968036Z
@gkirgizov еще актуально? Выглядит как то, что так и должно быть. Непонятно как мержить str и int в общем случае