::p_load(DT,plotly,tidyverse,patchwork,ggiraph,ggstatsplot,readxl, performance, parameters, see) pacman
In Class Ex-4
<- read_csv("Exam_data.csv") exam
plot_ly(data = exam,
x = ~ENGLISH,
y = ~MATHS,
color = ~RACE,
colors = "Set1")
<- c("pink", "purple", "blue", "green") #<<
pal
plot_ly(data = exam,
x = ~ENGLISH,
y = ~MATHS,
color = ~RACE,
colors = pal)
plot_ly(data = exam,
x = ~ENGLISH,
y = ~MATHS,
text = ~paste("Student ID:", ID, #<<
"<br>Class:", CLASS), #<<
color = ~RACE,
colors = "Set1")
<- ggplot(data=exam,
p aes(x = MATHS,
y = ENGLISH)) +
geom_point(dotsize = 1) +
coord_cartesian(xlim=c(0,100),
ylim=c(0,100))
ggplotly(p)
<- ggplot(data=exam,
p aes(x = MATHS)) +
geom_dotplot_interactive(
aes(tooltip = CLASS, #<<
data_id = CLASS),#<<
stackgroups = TRUE,
binwidth = 1,
method = "histodot") +
scale_y_continuous(NULL,
breaks = NULL)
girafe(
ggobj = p,
width_svg = 6,
height_svg = 6*0.618,
options = list(
opts_hover(css = "fill: #202020;"),
opts_hover_inv(css = "opacity:0.2;")
) )
$onclick <- sprintf("window.open(\"%s%s\")",
exam"https://www.moe.gov.sg/schoolfinder?journey=Primary%20school",
as.character(exam$ID))
<- ggplot(data=exam,
p aes(x = MATHS)) +
geom_dotplot_interactive(
aes(onclick = onclick), #<<
stackgroups = TRUE,
binwidth = 1,
method = "histodot") +
scale_y_continuous(NULL,
breaks = NULL)
girafe(
ggobj = p,
width_svg = 6,
height_svg = 6*0.618)
set.seed(1234)
gghistostats(
data = exam,
x = ENGLISH,
type = "bayes",
test.value = 60,
xlab = "English scores"
)
ggbetweenstats(
data = exam,
x = GENDER,
y = MATHS,
type = "np",
messages = FALSE
)
<- read_xls("ToyotaCorolla.xls",
car_resale "data")
car_resale
# A tibble: 1,436 × 38
Id Model Price Age_0…¹ Mfg_M…² Mfg_Y…³ KM Quart…⁴ Weight Guara…⁵
<dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 81 TOYOTA Cor… 18950 25 8 2002 20019 100 1180 3
2 1 TOYOTA Cor… 13500 23 10 2002 46986 210 1165 3
3 2 TOYOTA Cor… 13750 23 10 2002 72937 210 1165 3
4 3 TOYOTA Co… 13950 24 9 2002 41711 210 1165 3
5 4 TOYOTA Cor… 14950 26 7 2002 48000 210 1165 3
6 5 TOYOTA Cor… 13750 30 3 2002 38500 210 1170 3
7 6 TOYOTA Cor… 12950 32 1 2002 61000 210 1170 3
8 7 TOYOTA Co… 16900 27 6 2002 94612 210 1245 3
9 8 TOYOTA Cor… 18600 30 3 2002 75889 210 1245 3
10 44 TOYOTA Cor… 16950 27 6 2002 110404 234 1255 3
# … with 1,426 more rows, 28 more variables: HP_Bin <chr>, CC_bin <chr>,
# Doors <dbl>, Gears <dbl>, Cylinders <dbl>, Fuel_Type <chr>, Color <chr>,
# Met_Color <dbl>, Automatic <dbl>, Mfr_Guarantee <dbl>,
# BOVAG_Guarantee <dbl>, ABS <dbl>, Airbag_1 <dbl>, Airbag_2 <dbl>,
# Airco <dbl>, Automatic_airco <dbl>, Boardcomputer <dbl>, CD_Player <dbl>,
# Central_Lock <dbl>, Powered_Windows <dbl>, Power_Steering <dbl>,
# Radio <dbl>, Mistlamps <dbl>, Sport_Model <dbl>, Backseat_Divider <dbl>, …
<- lm(Price ~ Age_08_04 + Mfg_Year + KM +
model + Guarantee_Period, data = car_resale)
Weight
summary(model)
Call:
lm(formula = Price ~ Age_08_04 + Mfg_Year + KM + Weight + Guarantee_Period,
data = car_resale)
Residuals:
Min 1Q Median 3Q Max
-10426.3 -737.3 -6.4 739.1 6591.4
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -2.637e+06 2.618e+05 -10.072 <2e-16 ***
Age_08_04 -1.409e+01 1.081e+01 -1.304 0.1924
Mfg_Year 1.315e+03 1.307e+02 10.064 <2e-16 ***
KM -2.323e-02 1.163e-03 -19.969 <2e-16 ***
Weight 1.903e+01 8.129e-01 23.405 <2e-16 ***
Guarantee_Period 2.770e+01 1.219e+01 2.272 0.0232 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 1366 on 1430 degrees of freedom
Multiple R-squared: 0.8586, Adjusted R-squared: 0.8581
F-statistic: 1737 on 5 and 1430 DF, p-value: < 2.2e-16
check_collinearity(model)
# Check for Multicollinearity
Low Correlation
Term VIF VIF 95% CI Increased SE Tolerance Tolerance 95% CI
Guarantee_Period 1.04 [1.01, 1.17] 1.02 0.97 [0.86, 0.99]
Age_08_04 31.07 [28.08, 34.38] 5.57 0.03 [0.03, 0.04]
Mfg_Year 31.16 [28.16, 34.48] 5.58 0.03 [0.03, 0.04]
High Correlation
Term VIF VIF 95% CI Increased SE Tolerance Tolerance 95% CI
KM 1.46 [1.37, 1.57] 1.21 0.68 [0.64, 0.73]
Weight 1.41 [1.32, 1.51] 1.19 0.71 [0.66, 0.76]
<- check_collinearity(model)
check_c plot(check_c)
<- lm(Price ~ Age_08_04 + KM +
model1 + Guarantee_Period, data = car_resale) Weight
<- check_normality(model1) check_n
plot(check_n)
<- check_heteroscedasticity(model1) check_h
plot(check_h)