-
Notifications
You must be signed in to change notification settings - Fork 32
Description
It appears that tidypredict_fit() and tidypredict_sql() are returned a parsed model that has the Gaussian coefficients instead of the Binomial coefficients when a MARS model is fit using the earth package. The example below was done using R version 4.0.2, tidypredict_0.4.8 and earth_5.3.0. A comparison to parsed models using glm() is also included for reference.
library(earth)
library(tidyverse)
library(tidypredict)
data("etitanic", package = "earth")
MARS w/ Binomial
mars.mod.1 <-
earth(
survived ~ age + sex,
data = etitanic,
glm = list(family = binomial)
)
coef(mars.mod.1)
#> (Intercept) sexmale h(9-age)
#> 1.0499562 -2.4754699 0.1692326
MARS w/ Gaussian
mars.mod.2 <-
earth(
survived ~ age + sex,
data = etitanic,
glm = list(family = gaussian)
)
coef(mars.mod.2)
#> (Intercept) sexmale h(9-age)
#> 0.73782144 -0.54239536 0.02975461
The parsed model w/ binomial returns the Gaussian model coefficients with the Sigmoid function applied
tidypredict_fit(mars.mod.1)
#> 1 - 1/(1 + exp(0.737821439264803 + (ifelse(age < 9, 9 - age,
#> 0) * 0.0297546135084789) + (ifelse(sex == "male", 1, 0) *
#> -0.542395361228247)))
tidypredict_fit(mars.mod.2)
#> 0.737821439264803 + (ifelse(age < 9, 9 - age, 0) * 0.0297546135084789) +
#> (ifelse(sex == "male", 1, 0) * -0.542395361228247)
Compare tidypredict with predict (fit and pred columns are not the same)
etitanic %>%
tidypredict_to_column(mars.mod.1) %>%
mutate(pred = predict(mars.mod.1, type = "response")[,1]) %>%
head(10)
#> pclass survived sex age sibsp parch fit pred
#> 1 1st 1 female 29.0000 0 0 0.6765193 0.7407665
#> 2 1st 1 male 0.9167 1 2 0.6072916 0.4856151
#> 3 1st 0 female 2.0000 1 2 0.7203309 0.9033125
#> 4 1st 0 male 30.0000 1 2 0.5487016 0.1937987
#> 5 1st 0 female 25.0000 1 2 0.6765193 0.7407665
#> 6 1st 1 male 48.0000 0 0 0.5487016 0.1937987
#> 7 1st 1 female 63.0000 1 0 0.6765193 0.7407665
#> 8 1st 0 male 39.0000 0 0 0.5487016 0.1937987
#> 9 1st 1 female 53.0000 2 0 0.6765193 0.7407665
#> 10 1st 0 male 71.0000 0 0 0.5487016 0.1937987
Contrast with GLM
GLM w/ binomial
glm.mod.1 <-
glm(
survived ~ age + sex,
data = etitanic,
family = binomial
)
coef(glm.mod.1)
#> (Intercept) age sexmale
#> 1.235414162 -0.004254246 -2.460689180
GLM w/ Gaussian
glm.mod.2 <-
glm(
survived ~ age + sex,
data = etitanic,
family = gaussian
)
coef(glm.mod.2)
#> (Intercept) age sexmale
#> 0.7734801846 -0.0007286511 -0.5460270483
Coefficients match GLM model 1 above
tidypredict_fit(glm.mod.1)
#> 1 - 1/(1 + exp(1.23541416209053 + (age * -0.00425424604207735) +
#> (ifelse(sex == "male", 1, 0) * -2.46068918004127)))
Coefficients match GLM model 2 above
tidypredict_fit(glm.mod.2)
#> 0.773480184644955 + (age * -0.000728651082406954) + (ifelse(sex ==
#> "male", 1, 0) * -0.546027048277061)
Compare predictions (fit and pred match)
etitanic %>%
tidypredict_to_column(glm.mod.1) %>%
mutate(pred = predict(glm.mod.1, type = "response")) %>%
head(10)
#> pclass survived sex age sibsp parch fit pred
#> 1 1st 1 female 29.0000 0 0 0.7525094 0.7525094
#> 2 1st 1 male 0.9167 1 2 0.2263259 0.2263259
#> 3 1st 0 female 2.0000 1 2 0.7732765 0.7732765
#> 4 1st 0 male 30.0000 1 2 0.2053963 0.2053963
#> 5 1st 0 female 25.0000 1 2 0.7556650 0.7556650
#> 6 1st 1 male 48.0000 0 0 0.1931799 0.1931799
#> 7 1st 1 female 63.0000 1 0 0.7246003 0.7246003
#> 8 1st 0 male 39.0000 0 0 0.1992178 0.1992178
#> 9 1st 1 female 53.0000 2 0 0.7330082 0.7330082
#> 10 1st 0 male 71.0000 0 0 0.1783852 0.1783852