Nous allons utiliser le package tidyverse. Le terme tidyverse est une contraction de tidy ( tradut comme bien rangé) et de universe . C’est package est une collection de packages conçues pour faire la visualisation, la manipulation des tableaux de données, import/export de données, manipulation de variables, programmation, …, etc. Plusieurs packages qui constituent le coeur de tidyverse sont

library(tidyverse)

Nous considérons les données salaires.tex.

Salaire <-  read.table('salaires.txt',header=TRUE)
summary(Salaire)
##      salary          salbegin        jobtime         prevexp      
##  Min.   : 15750   Min.   : 9000   Min.   :63.00   Min.   :  0.00  
##  1st Qu.: 24000   1st Qu.:12488   1st Qu.:72.00   1st Qu.: 19.25  
##  Median : 28875   Median :15000   Median :81.00   Median : 55.00  
##  Mean   : 34420   Mean   :17016   Mean   :81.11   Mean   : 95.86  
##  3rd Qu.: 36938   3rd Qu.:17490   3rd Qu.:90.00   3rd Qu.:138.75  
##  Max.   :135000   Max.   :79980   Max.   :98.00   Max.   :476.00  
##       educ          minority           sex        
##  Min.   : 8.00   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:12.00   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :12.00   Median :0.0000   Median :0.0000  
##  Mean   :13.49   Mean   :0.2194   Mean   :0.4557  
##  3rd Qu.:15.00   3rd Qu.:0.0000   3rd Qu.:1.0000  
##  Max.   :21.00   Max.   :1.0000   Max.   :1.0000
Salaire <- Salaire %>% 
  mutate(sex = factor(sex, levels = c(0,1), labels = c("H","F")),
         minority = factor(minority,levels = c(0,1), labels = c("Non","Oui"))) 

summary(Salaire)
##      salary          salbegin        jobtime         prevexp      
##  Min.   : 15750   Min.   : 9000   Min.   :63.00   Min.   :  0.00  
##  1st Qu.: 24000   1st Qu.:12488   1st Qu.:72.00   1st Qu.: 19.25  
##  Median : 28875   Median :15000   Median :81.00   Median : 55.00  
##  Mean   : 34420   Mean   :17016   Mean   :81.11   Mean   : 95.86  
##  3rd Qu.: 36938   3rd Qu.:17490   3rd Qu.:90.00   3rd Qu.:138.75  
##  Max.   :135000   Max.   :79980   Max.   :98.00   Max.   :476.00  
##       educ       minority  sex    
##  Min.   : 8.00   Non:370   H:258  
##  1st Qu.:12.00   Oui:104   F:216  
##  Median :12.00                    
##  Mean   :13.49                    
##  3rd Qu.:15.00                    
##  Max.   :21.00
View(Salaire %>% filter(salary >= 80000))

View(Salaire %>% filter(salary >= 80000, sex=="F"))

View(Salaire %>% filter(salary >= 80000, sex=="H"))

View(Salaire %>% filter(salary >= 80000, sex=="H") %>%
       select(salary,salbegin))

View(Salaire %>% filter(salary >= 80000, sex=="H") %>%
       select(-salary,-salbegin))

View(Salaire %>% filter(salary >= 30000, salary < 60000))

View(Salaire  %>% arrange(salary))

View(Salaire  %>% arrange(desc(salary)))
View(Salaire %>% mutate(logsalary = log(salary), W= 2 * salary +1))
summary(Salaire$salary)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   15750   24000   28875   34420   36938  135000
tmp <- Salaire  %>% summarise(moy = mean(salary),
                              s = moments::skewness(salary),
                              k = moments::kurtosis(salary))
tmp
##        moy        s       k
## 1 34419.57 2.117877 8.30863
tmp <- Salaire %>% group_by(sex) %>% summarise(moy = mean(salary))
tmp
## # A tibble: 2 x 2
##   sex      moy
##   <fct>  <dbl>
## 1 H     41442.
## 2 F     26032.
summary(Salaire$sex)
##   H   F 
## 258 216
Salaire %>% group_by(sex) %>% count()
## # A tibble: 2 x 2
## # Groups:   sex [2]
##   sex       n
##   <fct> <int>
## 1 H       258
## 2 F       216
Salaire %>% filter(sex == "F", salary > 35000)  %>% count()
## # A tibble: 1 x 1
##       n
##   <int>
## 1    21
Salaire_A <- Salaire %>% sample_n(10)
Salaire_A
##     salary salbegin jobtime prevexp educ minority sex
## 225  21900    12750      82       0   15      Non   F
## 90   16200     9750      92       0    8      Non   F
## 49   34800    16500      94      93   15      Non   H
## 34   92000    39990      96     175   19      Non   H
## 184  38850    15000      84      53   15      Non   H
## 362  16950    10200      72     319    8      Non   F
## 160  66000    47490      86     150   16      Non   H
## 144  16650     9750      88     412    8      Non   F
## 455  43650    19500      65      19   16      Non   H
## 353  31200    15750      73     155   12      Oui   H
Salaire_B <- Salaire %>% sample_frac(0.05)
Salaire_B
##     salary salbegin jobtime prevexp educ minority sex
## 320  22050    15000      76     385   12      Non   H
## 98   30000    15000      92     144    8      Oui   H
## 117  31500    18750      90     205   12      Non   F
## 63   55000    26250      93      32   17      Non   H
## 35   81250    30000      96      18   17      Non   H
## 195  26400    12750      84      36   12      Oui   F
## 417  37800    15000      67      36   15      Non   H
## 196  23100    12000      84     214   16      Oui   F
## 356  28350    15000      72      48   15      Non   H
## 375  27450    14700      70      41   12      Non   H
## 58   26400    13500      94       3   15      Non   F
## 50   60000    23730      94      59   16      Non   H
## 134  41550    24990      89     285   16      Non   F
## 234  50550    19500      81      44   15      Non   H
## 220  29850    15750      82      85   12      Non   H
## 448  16350    10200      66     163   12      Oui   F
## 453  24450    15750      65     338   15      Non   H
## 372  21300    15750      70     372   15      Non   H
## 237  22650    11250      81       0   12      Non   F
## 12   28350    12000      98      26    8      Oui   H
## 394  29100    12450      69      17    8      Non   F
## 465  33900    16500      64     106   12      Non   H
## 462  34410    19500      65      79   16      Non   F
## 252  25500    11400      81       9   12      Oui   H