from sklearn.ensemble import RandomForestClassifier
from sklearn import tree
X = [[ 1,  2,  3],  # 2 samples, 3 features
    [11, 12, 13]]
y = [0, 1]  # classes of each sample
clf = RandomForestClassifier(random_state=0)
clf.fit(X, y) # X is training data without ctarget attribute
# make some predictions
clf.predict(X)
# array([0, 1])
clf.predict([[4, 5, 6], [14, 15, 16]])
# array([0, 1])
clf.predict([[4, 5, 6], [14, 15, 16], [ 0,1,2],[20,22,24],[0,10,20]])
# array([0, 1, 0, 1, 1])

# Use same data, but use string names for the class attribute. This works.
y = ["little", "big"]
clf.fit(X, y)
clf.predict([[4, 5, 6], [14, 15, 16], [ 0,1,2],[20,22,24],[0,10,20]])
# array(['little', 'big', 'little', 'big', 'big'], dtype='<U6')

# TRY USING NON-NUMERIC NON-TARGET ATTRIBUTES. Does not work:
X2=[["dog"],["cat"],["weasel"],["snake"],["turtle"],["alligator"]]
Y2=["mammal","mammal","mammal","reptile","reptile","reptile"]
clf.fit(X2,Y2)  # ERRORS GALORE!

# TRY DECISION TREE. IT DOES NOT WORK WITH STRING NON-TARGET ATTRIBUTES
# **NOTE**: CART IS THE ONLY DECISION TREE IN SCIKIT-LEARN
# https://scikit-learn.org/stable/modules/tree.html

clf = tree.DecisionTreeClassifier()
clf.fit(X2,Y2)  # ERRORS GALORE!

clf = tree.DecisionTreeClassifier()
trainClassInstances = [0, 1]  # classes of each sample
trainNonClassInstances = [[ 1,  2,  3],  # 2 samples, 3 features
    [11, 12, 13]]
clf.fit(trainNonClassInstances,trainClassInstances)  # earlier examples
clf.predict(trainNonClassInstances)
clf.predict([[4, 5, 6], [14, 15, 16]])
clf.predict([[4, 5, 6], [14, 15, 16], [ 0,1,2],[20,22,24],[0,10,20]])

# Use same data, but use string names for the class attribute. This works.
trainClassInstances = ["little", "big"]
clf.fit(trainNonClassInstances,trainClassInstances)  # earlier examples
clf.predict(trainNonClassInstances)
testdata = [[4, 5, 6], [14, 15, 16], [ 0,1,2],[20,22,24],[0,10,20]]
bestguess = clf.predict(testdata)
perfect = ['little', 'big', 'little', 'big', 'big']
confuseAuto = confusion_matrix(perfect, bestguess, ['little', 'big'])
print(confuseAuto)
kapp,Pcorrect,numberCorrect,Pincorrect,numberIncorrect = kappa(confuseAuto)
k = cohen_kappa_score(perfect, bestguess)
print(kapp,Pcorrect,numberCorrect,Pincorrect,numberIncorrect)

from sklearn.metrics import *
# These do not work with non-numeric types in sklearn.
mse = mean_squared_error(perfect, bestguess, squared=True)
rmse = mean_squared_error(perfect, bestguess, squared=False)
mae = mean_absolute_error(perfect, bestguess)


# Use same data, but use string names for the class attribute. This works.
trainClassInstances = [0, 1]
clf.fit(trainNonClassInstances,trainClassInstances)  # earlier examples
clf.predict(trainNonClassInstances)
testdata = [[4, 5, 6], [14, 15, 16], [ 0,1,2],[20,22,24],[0,10,20]]
bestguess = clf.predict(testdata)
perfect = [0, 1, 0, 1, 0]
confuseAuto = confusion_matrix(perfect, bestguess, trainClassInstances)
print(confuseAuto)
kapp,Pcorrect,numberCorrect,Pincorrect,numberIncorrect = kappa(confuseAuto)
k = cohen_kappa_score(perfect, bestguess)
print(kapp,Pcorrect,numberCorrect,Pincorrect,numberIncorrect)
mse = mean_squared_error(perfect, bestguess, squared=True)
rmse = mean_squared_error(perfect, bestguess, squared=False)
mae = mean_absolute_error(perfect, bestguess)