Skip to contents

Compute class probabilities or assign points from a scorecard to each observation in a data frame

Usage

# S3 method for scorecard
predict(object, newdata, type = c("points", "prob"), ...)

Arguments

object

An object of class "scorecard"

newdata

A data frame containing a column for each independent variable in the scorecard model passed to object

type

(String) One of "points" or "prob", indicating the type of prediction to return (number of scorecard points or glm model class probability, respectively). Default is "points".

...

further arguments passed to or from other methods (not currently used).

Value

If type = "points", a numeric vector of points; if type = "prob", a numeric vector of glm model class probabilities

Details

If type = "points", the output of predict() will represent the probability of observing the second level in the dependent variable in the training data supplied to create the glm component of the scorecard object passed to object

Examples

# Reverse levels of `default_status`
loans$default_status <- factor(loans$default_status, levels = c("good", "bad"))

# Pre-process the data to create WoE features
train <- woe(
  data = loans,
  outcome = default_status,
  predictors = c(industry, housing_status),
  method = "replace",
  verbose = FALSE
)

# Fit the logistic regression model
my_model <- glm(
  formula = default_status ~ .,
  data = train,
  family = "binomial"
)

# Assume we have the following associated card
my_card <- tibble::tribble(
  ~variable, ~class, ~woe, ~points,
  "industry", "", 1.23, 148,
  "industry", "beef", -0.231, 107,
  "industry", "dairy", -0.0956, 110,
  "industry", "fruit", -0.359, 103,
  "industry", "grain", 0.410, 125,
  "industry", "greenhouse", -0.511, 99,
  "industry", "nuts", -0.288, 105,
  "industry", "pork", -0.606, 96,
  "industry", "poultry", 0.774, 135,
  "industry", "sod", -0.154, 109,
  "housing_status", "own", 0.194, 119,
  "housing_status", "rent", -0.430, 101
)

# Create the scorecard object
my_scorecard <- scorecard(
  fit = my_model,
  card = my_card
)

# Simulate data for new loan applicant
new_data <- tibble::tribble(
  ~industry, ~housing_status,
  "poultry", "rent"
)

# Return applicant's points scored
predict(
  my_scorecard,
  newdata = new_data,
  type = "points"
)
#> [1] 236

# Return applicant's `glm` class probabilities (probability of default)
predict(
  my_scorecard,
  newdata = new_data,
  type = "prob"
)
#> [1] 0.2343966