Install R

Install Rstudio

Install Latex (or its equivalent)

[Optionnal] : Install Rtools

Installing external packages

Loading an installed package

Asking for help

or

A few basic examples with numeric variables

a <- 1

a
## [1] 1
b <- a + 2

b
## [1] 3
c <- a + b

c
## [1] 4
c1 <- 2 * a * b

c1
## [1] 6
c2 <- ( a + b ) / 2

Qualitative (string) variable

d <- "bonjour"

d
## [1] "bonjour"
e <- c("nous", "sommes", "le", "vendredi")

Factor variable

f <- as.factor(e)

f
## [1] nous     sommes   le       vendredi
## Levels: le nous sommes vendredi
levels(f)
## [1] "le"       "nous"     "sommes"   "vendredi"

Know the working directory

getwd()
## [1] "C:/Users/Antoine/Documents"

Define a working directory

setwd("C:/Users/Antoine/Documents/REIMS_2015")

List files in the workind directory

dir()

List objects stored

ls()
## [1] "a"  "b"  "c"  "c1" "c2" "d"  "e"  "f"

Remove an object

rm(a)

ls()
## [1] "b"  "c"  "c1" "c2" "d"  "e"  "f"

Remove all objects

rm(list = ls())

ls()
## character(0)

An example with data stored in a Data Frame

We will use data provided by an external package called ggplot2

install.packages("ggplot2")
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.2.2
data(diamonds)

We store diamonds data in a DataFrame called df

df <- diamonds

We look at the first lines of the data frame

Notice the column names

head(df)
##   carat       cut color clarity depth table price    x    y    z
## 1  0.23     Ideal     E     SI2  61.5    55   326 3.95 3.98 2.43
## 2  0.21   Premium     E     SI1  59.8    61   326 3.89 3.84 2.31
## 3  0.23      Good     E     VS1  56.9    65   327 4.05 4.07 2.31
## 4  0.29   Premium     I     VS2  62.4    58   334 4.20 4.23 2.63
## 5  0.31      Good     J     SI2  63.3    58   335 4.34 4.35 2.75
## 6  0.24 Very Good     J    VVS2  62.8    57   336 3.94 3.96 2.48

Looking at the tail of df

tail(df)
##       carat       cut color clarity depth table price    x    y    z
## 53935  0.72   Premium     D     SI1  62.7    59  2757 5.69 5.73 3.58
## 53936  0.72     Ideal     D     SI1  60.8    57  2757 5.75 5.76 3.50
## 53937  0.72      Good     D     SI1  63.1    55  2757 5.69 5.75 3.61
## 53938  0.70 Very Good     D     SI1  62.8    60  2757 5.66 5.68 3.56
## 53939  0.86   Premium     H     SI2  61.0    58  2757 6.15 6.12 3.74
## 53940  0.75     Ideal     D     SI2  62.2    55  2757 5.83 5.87 3.64

Looking at column names

colnames(df)
##  [1] "carat"   "cut"     "color"   "clarity" "depth"   "table"   "price"  
##  [8] "x"       "y"       "z"

Computation on vectors within a data frame

df$x2 <- df$x * 2

head(df$x2)
## [1] 7.90 7.78 8.10 8.40 8.68 7.88
df$new <- df$x + df$y + df$z 

head(df$new)
## [1] 10.36 10.04 10.43 11.06 11.44 10.38
new2 <- df$x + df$y + df$z 

head(new2)
## [1] 10.36 10.04 10.43 11.06 11.44 10.38

Making a frequency table

table(df$cut)
## 
##      Fair      Good Very Good   Premium     Ideal 
##      1610      4906     12082     13791     21551

Storing results in a new object

t1 <- table(df$cut)

t1
## 
##      Fair      Good Very Good   Premium     Ideal 
##      1610      4906     12082     13791     21551
t2 <- table(df$color)

t2
## 
##     D     E     F     G     H     I     J 
##  6775  9797  9542 11292  8304  5422  2808

Introduction to plotting with ggplot2

Selection data to be plotted

p <- ggplot(df, aes(x=carat, y=price))

Define the way to display the mapped data

p + geom_point()

Facetting

p + geom_point() + facet_wrap(~ color)

p + geom_point() + facet_wrap(~ cut)

Colors

p + geom_point(aes(color = cut)) + facet_wrap(~ cut)

p + geom_point(aes(color = color)) + facet_wrap(~ cut)

Introduction to Dplyr

Dplyr is a newly designed tool to work on data frame.

It has been developped by the same guy than ggplot2

They become the news basics to use on R

Library loading

library(dplyr)
## Warning: package 'dplyr' was built under R version 3.2.2
## 
## Attaching package: 'dplyr'
## 
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

Filter given a condition (select observations)

df2 <- filter(df, cut == "Ideal")

head(df2)
##   carat   cut color clarity depth table price    x    y    z   x2   new
## 1  0.23 Ideal     E     SI2  61.5    55   326 3.95 3.98 2.43 7.90 10.36
## 2  0.23 Ideal     J     VS1  62.8    56   340 3.93 3.90 2.46 7.86 10.29
## 3  0.31 Ideal     J     SI2  62.2    54   344 4.35 4.37 2.71 8.70 11.43
## 4  0.30 Ideal     I     SI2  62.0    54   348 4.31 4.34 2.68 8.62 11.33
## 5  0.33 Ideal     I     SI2  61.8    55   403 4.49 4.51 2.78 8.98 11.78
## 6  0.33 Ideal     I     SI2  61.2    56   403 4.49 4.50 2.75 8.98 11.74
df3 <- filter(df, price > 15000)

head(df3)
##   carat       cut color clarity depth table price    x    y    z    x2
## 1  1.54   Premium     E     VS2  62.3    58 15002 7.31 7.39 4.58 14.62
## 2  1.19     Ideal     F    VVS1  61.5    55 15005 6.82 6.84 4.20 13.64
## 3  2.10   Premium     I     SI1  61.5    57 15007 8.25 8.21 5.06 16.50
## 4  1.69     Ideal     D     SI1  60.8    57 15011 7.69 7.71 4.68 15.38
## 5  1.50 Very Good     G    VVS2  62.9    56 15013 7.22 7.32 4.57 14.44
## 6  1.73 Very Good     G     VS1  62.8    57 15014 7.57 7.72 4.80 15.14
##     new
## 1 19.28
## 2 17.86
## 3 21.52
## 4 20.08
## 5 19.11
## 6 20.09

Select columns

df4 <- select(df, cut, price)

head(df4)
##         cut price
## 1     Ideal   326
## 2   Premium   326
## 3      Good   327
## 4   Premium   334
## 5      Good   335
## 6 Very Good   336
df5 <- select(df, cut, color, clarity, depth, table, price)

head(df5)
##         cut color clarity depth table price
## 1     Ideal     E     SI2  61.5    55   326
## 2   Premium     E     SI1  59.8    61   326
## 3      Good     E     VS1  56.9    65   327
## 4   Premium     I     VS2  62.4    58   334
## 5      Good     J     SI2  63.3    58   335
## 6 Very Good     J    VVS2  62.8    57   336
df6 <- select(df, cut:price)

head(df6)
##         cut color clarity depth table price
## 1     Ideal     E     SI2  61.5    55   326
## 2   Premium     E     SI1  59.8    61   326
## 3      Good     E     VS1  56.9    65   327
## 4   Premium     I     VS2  62.4    58   334
## 5      Good     J     SI2  63.3    58   335
## 6 Very Good     J    VVS2  62.8    57   336