The CatBoost classifier has a predict() function to get the predicted labels of the data and a predict_proba() function to get their predicted probabilities. If there are k classes in the data, each record will have k predicted probabilities, indicating that the record belongs to the given classes.
I am applying the CatBoost to a binary classification problem in the example below. Using predicted labels, I am computing the accuracy and using class 1 predicted probabilities, I am computing the AUC-ROC.
from catboost import CatBoostClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_auc_score
import numpy as np
def generate_train_test_data():
"""
Randomly generate train test data and labels
"""
np.random.seed(1007)
# train data
feature_count = 10
train_data_count = 500
train_data = np.reshape(np.random.random(train_data_count * feature_count), (train_data_count, feature_count))
train_labels = np.round(np.random.random(train_data_count))
# test data
test_data_count = 100
test_data = np.reshape(np.random.random(test_data_count * feature_count), (test_data_count, feature_count))
test_labels = np.round(np.random.random(test_data_count))
return train_data, train_labels, test_data, test_labels
if __name__ == "__main__":
X_train, y_train, X_test, y_test = generate_train_test_data()
# train the model
model = CatBoostClassifier(verbose=False)
model.fit(X_train, y_train)
# get predicted labels
pred_labels = model.predict(X_test)
print("Accuracy: ", accuracy_score(y_test, pred_labels))
# get predicted probabilities
pred_probs = model.predict_proba(X_test)
print("AUC-ROC: ", roc_auc_score(y_test, pred_probs[:, 1]))