In Class Ex-4

Author

Daisy

Published

February 26, 2023

pacman::p_load(DT,plotly,tidyverse,patchwork,ggiraph,ggstatsplot,readxl, performance, parameters, see)
exam<- read_csv("Exam_data.csv")
plot_ly(data = exam, 
        x = ~ENGLISH, 
        y = ~MATHS, 
        color = ~RACE, 
        colors = "Set1") 
203040506070809010020406080100
ChineseIndianMalayOthersENGLISHMATHS
pal <- c("pink", "purple", "blue", "green") #<<

plot_ly(data = exam, 
        x = ~ENGLISH, 
        y = ~MATHS, 
        color = ~RACE, 
        colors = pal) 
203040506070809010020406080100
ChineseIndianMalayOthersENGLISHMATHS
plot_ly(data = exam, 
        x = ~ENGLISH, 
        y = ~MATHS,
        text = ~paste("Student ID:", ID,     #<<
                      "<br>Class:", CLASS),  #<<
        color = ~RACE, 
        colors = "Set1")
203040506070809010020406080100
ChineseIndianMalayOthersENGLISHMATHS
p <- ggplot(data=exam, 
            aes(x = MATHS,
                y = ENGLISH)) +
  geom_point(dotsize = 1) +
  coord_cartesian(xlim=c(0,100),
                  ylim=c(0,100))
ggplotly(p) 
p <- ggplot(data=exam, 
       aes(x = MATHS)) +
  geom_dotplot_interactive(              
    aes(tooltip = CLASS, #<<
        data_id = CLASS),#<<              
    stackgroups = TRUE,                  
    binwidth = 1,                        
    method = "histodot") +               
  scale_y_continuous(NULL,               
                     breaks = NULL)
girafe(                                  
  ggobj = p,                             
  width_svg = 6,                         
  height_svg = 6*0.618,
  options = list(                        
    opts_hover(css = "fill: #202020;"),  
    opts_hover_inv(css = "opacity:0.2;") 
  )                                        
)                    
25 50 75 100 MATHS
exam$onclick <- sprintf("window.open(\"%s%s\")",
"https://www.moe.gov.sg/schoolfinder?journey=Primary%20school",
as.character(exam$ID))

p <- ggplot(data=exam, 
       aes(x = MATHS)) +
  geom_dotplot_interactive(              
    aes(onclick = onclick),              #<<
    stackgroups = TRUE,                  
    binwidth = 1,                        
    method = "histodot") +               
  scale_y_continuous(NULL,               
                     breaks = NULL)
girafe(                                  
  ggobj = p,                             
  width_svg = 6,                         
  height_svg = 6*0.618)                                        
25 50 75 100 MATHS
set.seed(1234)

gghistostats(
  data = exam,
  x = ENGLISH,
  type = "bayes",
  test.value = 60,
  xlab = "English scores"
)

ggbetweenstats(
  data = exam,
  x = GENDER, 
  y = MATHS,
  type = "np",
  messages = FALSE
)

car_resale <- read_xls("ToyotaCorolla.xls", 
                       "data")
car_resale
# A tibble: 1,436 × 38
      Id Model       Price Age_0…¹ Mfg_M…² Mfg_Y…³     KM Quart…⁴ Weight Guara…⁵
   <dbl> <chr>       <dbl>   <dbl>   <dbl>   <dbl>  <dbl>   <dbl>  <dbl>   <dbl>
 1    81 TOYOTA Cor… 18950      25       8    2002  20019     100   1180       3
 2     1 TOYOTA Cor… 13500      23      10    2002  46986     210   1165       3
 3     2 TOYOTA Cor… 13750      23      10    2002  72937     210   1165       3
 4     3  TOYOTA Co… 13950      24       9    2002  41711     210   1165       3
 5     4 TOYOTA Cor… 14950      26       7    2002  48000     210   1165       3
 6     5 TOYOTA Cor… 13750      30       3    2002  38500     210   1170       3
 7     6 TOYOTA Cor… 12950      32       1    2002  61000     210   1170       3
 8     7  TOYOTA Co… 16900      27       6    2002  94612     210   1245       3
 9     8 TOYOTA Cor… 18600      30       3    2002  75889     210   1245       3
10    44 TOYOTA Cor… 16950      27       6    2002 110404     234   1255       3
# … with 1,426 more rows, 28 more variables: HP_Bin <chr>, CC_bin <chr>,
#   Doors <dbl>, Gears <dbl>, Cylinders <dbl>, Fuel_Type <chr>, Color <chr>,
#   Met_Color <dbl>, Automatic <dbl>, Mfr_Guarantee <dbl>,
#   BOVAG_Guarantee <dbl>, ABS <dbl>, Airbag_1 <dbl>, Airbag_2 <dbl>,
#   Airco <dbl>, Automatic_airco <dbl>, Boardcomputer <dbl>, CD_Player <dbl>,
#   Central_Lock <dbl>, Powered_Windows <dbl>, Power_Steering <dbl>,
#   Radio <dbl>, Mistlamps <dbl>, Sport_Model <dbl>, Backseat_Divider <dbl>, …
model <- lm(Price ~ Age_08_04 + Mfg_Year + KM + 
              Weight + Guarantee_Period, data = car_resale)

summary(model)

Call:
lm(formula = Price ~ Age_08_04 + Mfg_Year + KM + Weight + Guarantee_Period, 
    data = car_resale)

Residuals:
     Min       1Q   Median       3Q      Max 
-10426.3   -737.3     -6.4    739.1   6591.4 

Coefficients:
                   Estimate Std. Error t value Pr(>|t|)    
(Intercept)      -2.637e+06  2.618e+05 -10.072   <2e-16 ***
Age_08_04        -1.409e+01  1.081e+01  -1.304   0.1924    
Mfg_Year          1.315e+03  1.307e+02  10.064   <2e-16 ***
KM               -2.323e-02  1.163e-03 -19.969   <2e-16 ***
Weight            1.903e+01  8.129e-01  23.405   <2e-16 ***
Guarantee_Period  2.770e+01  1.219e+01   2.272   0.0232 *  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1366 on 1430 degrees of freedom
Multiple R-squared:  0.8586,    Adjusted R-squared:  0.8581 
F-statistic:  1737 on 5 and 1430 DF,  p-value: < 2.2e-16
check_collinearity(model)
# Check for Multicollinearity

Low Correlation

             Term   VIF     VIF 95% CI Increased SE Tolerance Tolerance 95% CI
 Guarantee_Period  1.04   [1.01, 1.17]         1.02      0.97     [0.86, 0.99]
        Age_08_04 31.07 [28.08, 34.38]         5.57      0.03     [0.03, 0.04]
         Mfg_Year 31.16 [28.16, 34.48]         5.58      0.03     [0.03, 0.04]

High Correlation

   Term  VIF   VIF 95% CI Increased SE Tolerance Tolerance 95% CI
     KM 1.46 [1.37, 1.57]         1.21      0.68     [0.64, 0.73]
 Weight 1.41 [1.32, 1.51]         1.19      0.71     [0.66, 0.76]
check_c <- check_collinearity(model)
plot(check_c)

model1 <- lm(Price ~ Age_08_04 + KM + 
              Weight + Guarantee_Period, data = car_resale)
check_n <- check_normality(model1)
plot(check_n)

check_h <- check_heteroscedasticity(model1)
plot(check_h)