Daily-Dose-of-Data-Science
Daily-Dose-of-Data-Science copied to clipboard
when i use the code, the print is always the iris.data' result although i change the data.
Hi ChawlaAvi, when i use the code, the print is always the iris.data' result although i change the data.
`import pandas as pd import numpy as np import interactive_decision_tree as idt ## local module from sklearn.tree import DecisionTreeClassifier
data = pd.read_csv('/Users/lee/Desktop/data-xy015.csv') X = data.iloc[:, 0:31] y = data.iloc[:, 38]
clf = DecisionTreeClassifier() clf = clf.fit(X, y)
idt.create_tree(tree_model=clf, X=X, target_names=np.unique(y), save_path='C:/Users/lee/Desktop/PY01/tree_template.html')
idt.create_sankey(tree_model=clf, X=X, target_names=np.unique(y), save_path='C:/Users/lee/Desktop/PY01/sankey_template.html') `
Have you used iris dataset in above code ? Does /Users/lee/Desktop/data-xy015.csv refer to iris dataset ?
Hi shreevaths1, I didn't use the iris dataset. "/Users/lee/Desktop/data-xy015.csv" is about my experiment data. I also custom decision tree model, the print is still the iris.data' result.
import pandas as pd import numpy as np from sklearn.metrics import confusion_matrix from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score from sklearn.metrics import classification_report from sklearn.preprocessing import MinMaxScaler from sklearn.tree import DecisionTreeClassifier import interactive_decision_tree as idt
class MyDecisionTree: def init(self): self.tree = None
def fit(self, X, y):
self.tree = DecisionTreeClassifier(criterion="gini", random_state=100, max_depth=4, min_samples_leaf=0.05, max_leaf_nodes=50)
self.tree.fit(X, y)
def predict(self, X):
return self.tree.predict(X)
def importdata(): balance_data = pd.read_csv('/Users/lee/Desktop/data-xy015.csv', sep=',', header=0) print("Dataset Length: ", len(balance_data)) print("Dataset Shape: ", balance_data.shape) print("Dataset: ", balance_data.head()) return balance_data
def splitdataset(balance_data): X = balance_data.values[:, 0:30] Y = balance_data.values[:, 38] scaler = MinMaxScaler() X = scaler.fit_transform(X) X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=100) return X, Y, X_train, X_test, y_train, y_test
def train_using_gini(X_train, X_test, y_train): clf_gini = MyDecisionTree() clf_gini.fit(X_train, y_train) return clf_gini
def prediction(X_test, clf_object): y_pred = clf_object.predict(X_test) print("Predicted values:") print(y_pred) return y_pred
def cal_accuracy(y_test, y_pred): print("Confusion Matrix: ", confusion_matrix(y_test, y_pred)) print("Accuracy : ", accuracy_score(y_test, y_pred) * 100) print("Report : ", classification_report(y_test, y_pred))
def build_decision_tree(X, clf_object): feature_names = ['X1', 'X2', 'X3', 'X4', 'X5', 'X6', 'X7', 'X8', 'X9', 'X10', 'X11', 'X12', 'X13', 'X14', 'X15', 'X16', 'X17', 'X18', 'X19', 'X20', 'X21', 'X22', 'X23', 'X24', 'X25', 'X26', 'X27', 'X28', 'X29', 'X30'] class_names = ['low', 'high'] idt.create_sankey(tree_model=clf_object.tree, X=X, target_names=class_names, save_path='/Users/lee/Desktop/tree_template1.html')
def main(): data = importdata() X, Y, X_train, X_test, y_train, y_test = splitdataset(data) clf_gini = train_using_gini(X_train, X_test, y_train) print("Results Using Gini Index:") y_pred_gini = prediction(X_test, clf_gini) cal_accuracy(y_test, y_pred_gini) build_decision_tree(X, clf_gini)
if name == "main": main()