Loading ggplot2

library(ggplot2)

Loading the dataset

df <- read.csv("~/r1.csv")

Describing the dataset

head(df)
##   age educ gender    religion income    immig   loginc
## 1  43   15   Male No religion   1400 4.666666 7.244227
## 2  23   15 Female    catholic   1400 6.666666 7.244227
## 3  26   17   Male No religion   3600 6.000000 8.188689
## 4  48   12   Male No religion   1700 4.333334 7.438384
## 5  59   14   Male No religion   3000 5.333334 8.006368
## 6  38   10 Female No religion   1500 3.666667 7.313220
summary(df)
##       age             educ          gender    
##  Min.   :16.00   Min.   : 0.00   Female:7521  
##  1st Qu.:33.00   1st Qu.:12.00   Male  :7840  
##  Median :42.00   Median :13.00                
##  Mean   :42.26   Mean   :13.74                
##  3rd Qu.:51.00   3rd Qu.:16.00                
##  Max.   :86.00   Max.   :29.00                
##                                               
##                         religion        income           immig       
##  catholic                   :3681   Min.   :     1   Min.   : 0.000  
##  Islamic                    : 366   1st Qu.:   612   1st Qu.: 4.000  
##  Jewish                     : 451   Median :  1500   Median : 5.333  
##  No religion                :6569   Mean   :  9126   Mean   : 5.224  
##  Orthodox & other christians:1937   3rd Qu.:  3466   3rd Qu.: 6.667  
##  other                      :  94   Max.   :700000   Max.   :10.000  
##  Protestant                 :2263                                    
##      loginc      
##  Min.   : 0.000  
##  1st Qu.: 6.417  
##  Median : 7.313  
##  Mean   : 7.472  
##  3rd Qu.: 8.151  
##  Max.   :13.459  
## 

Model 1 : Log of income explained by the level of education

fm1 <- lm(loginc ~ educ, df)
summary(fm1)
## 
## Call:
## lm(formula = loginc ~ educ, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -8.8368 -0.9959 -0.1628  0.6629  6.6888 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 5.941867   0.054933  108.17   <2e-16 ***
## educ        0.111342   0.003872   28.75   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.69 on 15359 degrees of freedom
## Multiple R-squared:  0.05109,    Adjusted R-squared:  0.05102 
## F-statistic: 826.9 on 1 and 15359 DF,  p-value: < 2.2e-16

Diagnostics plots

plot(fm1)