dplyr.R

Plain text source: dplyr.R


# -*- Mode:R; Coding:us-ascii-unix; fill-column:160 -*-

################################################################################################################################################################
##
# @file      dplyr.R
# @author    Mitch Richling <https://www.mitchr.me>
# @Copyright Copyright 2015 by Mitch Richling.  All rights reserved.
# @brief     Demo of some of my most used dplyr features.@EOL
# @Keywords  package cran dplyr R
#
# Notes on the design of dplyr:
#
#  1) Operations in dplyr don't modify the arguments passed to them, but rather return new results -- i.e. they don't have side effects.  This means that you
#     can use functional-style programming with dplyr.
#
#  2) dplyr tends to be faster than the equivalent base R functionality
#
#  3) dplyr works on data.tables, data.frames, and SQL connections.  This is very cool because it provides a uniform interface to all of those objects.  For
#     example, you can write your code for data.frames and it will still work later if your data gets bigger and you switch from data.frames to data.tables.

################################################################################################################################################################
# First we load up the library

library(dplyr)

################################################################################################################################################################
# The discussion below is example focused.  We will be using the same dataset for all of the examples that follow:

mtcars
                     mpg cyl  disp  hp drat    wt  qsec vs am gear carb
Mazda RX4           21.0   6 160.0 110 3.90 2.620 16.46  0  1    4    4
Mazda RX4 Wag       21.0   6 160.0 110 3.90 2.875 17.02  0  1    4    4
Datsun 710          22.8   4 108.0  93 3.85 2.320 18.61  1  1    4    1
Hornet 4 Drive      21.4   6 258.0 110 3.08 3.215 19.44  1  0    3    1
Hornet Sportabout   18.7   8 360.0 175 3.15 3.440 17.02  0  0    3    2
Valiant             18.1   6 225.0 105 2.76 3.460 20.22  1  0    3    1
Duster 360          14.3   8 360.0 245 3.21 3.570 15.84  0  0    3    4
Merc 240D           24.4   4 146.7  62 3.69 3.190 20.00  1  0    4    2
Merc 230            22.8   4 140.8  95 3.92 3.150 22.90  1  0    4    2
Merc 280            19.2   6 167.6 123 3.92 3.440 18.30  1  0    4    4
Merc 280C           17.8   6 167.6 123 3.92 3.440 18.90  1  0    4    4
Merc 450SE          16.4   8 275.8 180 3.07 4.070 17.40  0  0    3    3
Merc 450SL          17.3   8 275.8 180 3.07 3.730 17.60  0  0    3    3
Merc 450SLC         15.2   8 275.8 180 3.07 3.780 18.00  0  0    3    3
Cadillac Fleetwood  10.4   8 472.0 205 2.93 5.250 17.98  0  0    3    4
Lincoln Continental 10.4   8 460.0 215 3.00 5.424 17.82  0  0    3    4
Chrysler Imperial   14.7   8 440.0 230 3.23 5.345 17.42  0  0    3    4
Fiat 128            32.4   4  78.7  66 4.08 2.200 19.47  1  1    4    1
Honda Civic         30.4   4  75.7  52 4.93 1.615 18.52  1  1    4    2
Toyota Corolla      33.9   4  71.1  65 4.22 1.835 19.90  1  1    4    1
Toyota Corona       21.5   4 120.1  97 3.70 2.465 20.01  1  0    3    1
Dodge Challenger    15.5   8 318.0 150 2.76 3.520 16.87  0  0    3    2
AMC Javelin         15.2   8 304.0 150 3.15 3.435 17.30  0  0    3    2
Camaro Z28          13.3   8 350.0 245 3.73 3.840 15.41  0  0    3    4
Pontiac Firebird    19.2   8 400.0 175 3.08 3.845 17.05  0  0    3    2
Fiat X1-9           27.3   4  79.0  66 4.08 1.935 18.90  1  1    4    1
Porsche 914-2       26.0   4 120.3  91 4.43 2.140 16.70  0  1    5    2
Lotus Europa        30.4   4  95.1 113 3.77 1.513 16.90  1  1    5    2
Ford Pantera L      15.8   8 351.0 264 4.22 3.170 14.50  0  1    5    4
Ferrari Dino        19.7   6 145.0 175 3.62 2.770 15.50  0  1    5    6
Maserati Bora       15.0   8 301.0 335 3.54 3.570 14.60  0  1    5    8
Volvo 142E          21.4   4 121.0 109 4.11 2.780 18.60  1  1    4    2
################################################################################################################################################################
# First we need to have some data to work with.

daObjects <- read.table(header=T, text='
     hairs SEX  name
  brunette   M  rock
    blonde   M  paper
  brunette   F  scissors
    blonde   F  hammer
       red   M  gun
       red   F  knife
', as.is=TRUE)

daPeople <- read.table(header=T, text='
      hair sex  name
  brunette   M  bill
    blonde   M  ted
    blonde   M  fred
  brunette   F  wilma
    blonde   F  betzi
      pink   F  pinkey
', as.is=TRUE)

################################################################################################################################################################
# Most dplyr operations zap row names on data.frames.   -- so add a real column with the row.names

mtcarsR <- dplyr::add_rownames(mtcars)
mtcarsR
# A tibble: 32 x 12
               rowname   mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb
                 <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1            Mazda RX4  21.0     6 160.0   110  3.90 2.620 16.46     0     1     4     4
2        Mazda RX4 Wag  21.0     6 160.0   110  3.90 2.875 17.02     0     1     4     4
3           Datsun 710  22.8     4 108.0    93  3.85 2.320 18.61     1     1     4     1
4       Hornet 4 Drive  21.4     6 258.0   110  3.08 3.215 19.44     1     0     3     1
5    Hornet Sportabout  18.7     8 360.0   175  3.15 3.440 17.02     0     0     3     2
6              Valiant  18.1     6 225.0   105  2.76 3.460 20.22     1     0     3     1
7           Duster 360  14.3     8 360.0   245  3.21 3.570 15.84     0     0     3     4
8            Merc 240D  24.4     4 146.7    62  3.69 3.190 20.00     1     0     4     2
9             Merc 230  22.8     4 140.8    95  3.92 3.150 22.90     1     0     4     2
10            Merc 280  19.2     6 167.6   123  3.92 3.440 18.30     1     0     4     4
11           Merc 280C  17.8     6 167.6   123  3.92 3.440 18.90     1     0     4     4
12          Merc 450SE  16.4     8 275.8   180  3.07 4.070 17.40     0     0     3     3
13          Merc 450SL  17.3     8 275.8   180  3.07 3.730 17.60     0     0     3     3
14         Merc 450SLC  15.2     8 275.8   180  3.07 3.780 18.00     0     0     3     3
15  Cadillac Fleetwood  10.4     8 472.0   205  2.93 5.250 17.98     0     0     3     4
16 Lincoln Continental  10.4     8 460.0   215  3.00 5.424 17.82     0     0     3     4
17   Chrysler Imperial  14.7     8 440.0   230  3.23 5.345 17.42     0     0     3     4
18            Fiat 128  32.4     4  78.7    66  4.08 2.200 19.47     1     1     4     1
19         Honda Civic  30.4     4  75.7    52  4.93 1.615 18.52     1     1     4     2
20      Toyota Corolla  33.9     4  71.1    65  4.22 1.835 19.90     1     1     4     1
21       Toyota Corona  21.5     4 120.1    97  3.70 2.465 20.01     1     0     3     1
22    Dodge Challenger  15.5     8 318.0   150  2.76 3.520 16.87     0     0     3     2
23         AMC Javelin  15.2     8 304.0   150  3.15 3.435 17.30     0     0     3     2
24          Camaro Z28  13.3     8 350.0   245  3.73 3.840 15.41     0     0     3     4
25    Pontiac Firebird  19.2     8 400.0   175  3.08 3.845 17.05     0     0     3     2
26           Fiat X1-9  27.3     4  79.0    66  4.08 1.935 18.90     1     1     4     1
27       Porsche 914-2  26.0     4 120.3    91  4.43 2.140 16.70     0     1     5     2
28        Lotus Europa  30.4     4  95.1   113  3.77 1.513 16.90     1     1     5     2
29      Ford Pantera L  15.8     8 351.0   264  4.22 3.170 14.50     0     1     5     4
30        Ferrari Dino  19.7     6 145.0   175  3.62 2.770 15.50     0     1     5     6
31       Maserati Bora  15.0     8 301.0   335  3.54 3.570 14.60     0     1     5     8
32          Volvo 142E  21.4     4 121.0   109  4.11 2.780 18.60     1     1     4     2
################################################################################################################################################################
# Grab rows by Boolean expression. You can actually replace that & with a comma, but I'm usually explicit about this sort of thing

dplyr::filter(mtcarsR, cyl>4 & carb==4)
# A tibble: 10 x 12
               rowname   mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb
                 <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1            Mazda RX4  21.0     6 160.0   110  3.90 2.620 16.46     0     1     4     4
2        Mazda RX4 Wag  21.0     6 160.0   110  3.90 2.875 17.02     0     1     4     4
3           Duster 360  14.3     8 360.0   245  3.21 3.570 15.84     0     0     3     4
4             Merc 280  19.2     6 167.6   123  3.92 3.440 18.30     1     0     4     4
5            Merc 280C  17.8     6 167.6   123  3.92 3.440 18.90     1     0     4     4
6   Cadillac Fleetwood  10.4     8 472.0   205  2.93 5.250 17.98     0     0     3     4
7  Lincoln Continental  10.4     8 460.0   215  3.00 5.424 17.82     0     0     3     4
8    Chrysler Imperial  14.7     8 440.0   230  3.23 5.345 17.42     0     0     3     4
9           Camaro Z28  13.3     8 350.0   245  3.73 3.840 15.41     0     0     3     4
10      Ford Pantera L  15.8     8 351.0   264  4.22 3.170 14.50     0     1     5     4
################################################################################################################################################################
# Grab rows by index

dplyr::slice(mtcarsR, 15:20)
# A tibble: 6 x 12
              rowname   mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb
                <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1  Cadillac Fleetwood  10.4     8 472.0   205  2.93 5.250 17.98     0     0     3     4
2 Lincoln Continental  10.4     8 460.0   215  3.00 5.424 17.82     0     0     3     4
3   Chrysler Imperial  14.7     8 440.0   230  3.23 5.345 17.42     0     0     3     4
4            Fiat 128  32.4     4  78.7    66  4.08 2.200 19.47     1     1     4     1
5         Honda Civic  30.4     4  75.7    52  4.93 1.615 18.52     1     1     4     2
6      Toyota Corolla  33.9     4  71.1    65  4.22 1.835 19.90     1     1     4     1
################################################################################################################################################################
# Sort things.  Could have used '-gear' here as 'gear' is numeric, but 'desc' works on strings as well.

dplyr::arrange(mtcarsR, cyl, desc(gear))
# A tibble: 32 x 12
               rowname   mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb
                 <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1        Porsche 914-2  26.0     4 120.3    91  4.43 2.140 16.70     0     1     5     2
2         Lotus Europa  30.4     4  95.1   113  3.77 1.513 16.90     1     1     5     2
3           Datsun 710  22.8     4 108.0    93  3.85 2.320 18.61     1     1     4     1
4            Merc 240D  24.4     4 146.7    62  3.69 3.190 20.00     1     0     4     2
5             Merc 230  22.8     4 140.8    95  3.92 3.150 22.90     1     0     4     2
6             Fiat 128  32.4     4  78.7    66  4.08 2.200 19.47     1     1     4     1
7          Honda Civic  30.4     4  75.7    52  4.93 1.615 18.52     1     1     4     2
8       Toyota Corolla  33.9     4  71.1    65  4.22 1.835 19.90     1     1     4     1
9            Fiat X1-9  27.3     4  79.0    66  4.08 1.935 18.90     1     1     4     1
10          Volvo 142E  21.4     4 121.0   109  4.11 2.780 18.60     1     1     4     2
11       Toyota Corona  21.5     4 120.1    97  3.70 2.465 20.01     1     0     3     1
12        Ferrari Dino  19.7     6 145.0   175  3.62 2.770 15.50     0     1     5     6
13           Mazda RX4  21.0     6 160.0   110  3.90 2.620 16.46     0     1     4     4
14       Mazda RX4 Wag  21.0     6 160.0   110  3.90 2.875 17.02     0     1     4     4
15            Merc 280  19.2     6 167.6   123  3.92 3.440 18.30     1     0     4     4
16           Merc 280C  17.8     6 167.6   123  3.92 3.440 18.90     1     0     4     4
17      Hornet 4 Drive  21.4     6 258.0   110  3.08 3.215 19.44     1     0     3     1
18             Valiant  18.1     6 225.0   105  2.76 3.460 20.22     1     0     3     1
19      Ford Pantera L  15.8     8 351.0   264  4.22 3.170 14.50     0     1     5     4
20       Maserati Bora  15.0     8 301.0   335  3.54 3.570 14.60     0     1     5     8
21   Hornet Sportabout  18.7     8 360.0   175  3.15 3.440 17.02     0     0     3     2
22          Duster 360  14.3     8 360.0   245  3.21 3.570 15.84     0     0     3     4
23          Merc 450SE  16.4     8 275.8   180  3.07 4.070 17.40     0     0     3     3
24          Merc 450SL  17.3     8 275.8   180  3.07 3.730 17.60     0     0     3     3
25         Merc 450SLC  15.2     8 275.8   180  3.07 3.780 18.00     0     0     3     3
26  Cadillac Fleetwood  10.4     8 472.0   205  2.93 5.250 17.98     0     0     3     4
27 Lincoln Continental  10.4     8 460.0   215  3.00 5.424 17.82     0     0     3     4
28   Chrysler Imperial  14.7     8 440.0   230  3.23 5.345 17.42     0     0     3     4
29    Dodge Challenger  15.5     8 318.0   150  2.76 3.520 16.87     0     0     3     2
30         AMC Javelin  15.2     8 304.0   150  3.15 3.435 17.30     0     0     3     2
31          Camaro Z28  13.3     8 350.0   245  3.73 3.840 15.41     0     0     3     4
32    Pontiac Firebird  19.2     8 400.0   175  3.08 3.845 17.05     0     0     3     2
################################################################################################################################################################
# Select columns.  Love how you can use column names with the range operator.  Numbers work too, but that is boring.

dplyr::select(mtcarsR, cyl:drat)
# A tibble: 32 x 4
     cyl  disp    hp  drat
   <dbl> <dbl> <dbl> <dbl>
1      6 160.0   110  3.90
2      6 160.0   110  3.90
3      4 108.0    93  3.85
4      6 258.0   110  3.08
5      8 360.0   175  3.15
6      6 225.0   105  2.76
7      8 360.0   245  3.21
8      4 146.7    62  3.69
9      4 140.8    95  3.92
10     6 167.6   123  3.92
11     6 167.6   123  3.92
12     8 275.8   180  3.07
13     8 275.8   180  3.07
14     8 275.8   180  3.07
15     8 472.0   205  2.93
16     8 460.0   215  3.00
17     8 440.0   230  3.23
18     4  78.7    66  4.08
19     4  75.7    52  4.93
20     4  71.1    65  4.22
21     4 120.1    97  3.70
22     8 318.0   150  2.76
23     8 304.0   150  3.15
24     8 350.0   245  3.73
25     8 400.0   175  3.08
26     4  79.0    66  4.08
27     4 120.3    91  4.43
28     4  95.1   113  3.77
29     8 351.0   264  4.22
30     6 145.0   175  3.62
31     8 301.0   335  3.54
32     4 121.0   109  4.11
################################################################################################################################################################
# Pull out a column and rename it

dplyr::select(mtcarsR, displacement=disp)
# A tibble: 32 x 1
   displacement
          <dbl>
1         160.0
2         160.0
3         108.0
4         258.0
5         360.0
6         225.0
7         360.0
8         146.7
9         140.8
10        167.6
11        167.6
12        275.8
13        275.8
14        275.8
15        472.0
16        460.0
17        440.0
18         78.7
19         75.7
20         71.1
21        120.1
22        318.0
23        304.0
24        350.0
25        400.0
26         79.0
27        120.3
28         95.1
29        351.0
30        145.0
31        301.0
32        121.0
################################################################################################################################################################
# You can separate selection rules with commas (works like and)

dplyr::select(mtcarsR, displacement=disp, cyl)
# A tibble: 32 x 2
   displacement   cyl
          <dbl> <dbl>
1         160.0     6
2         160.0     6
3         108.0     4
4         258.0     6
5         360.0     8
6         225.0     6
7         360.0     8
8         146.7     4
9         140.8     4
10        167.6     6
11        167.6     6
12        275.8     8
13        275.8     8
14        275.8     8
15        472.0     8
16        460.0     8
17        440.0     8
18         78.7     4
19         75.7     4
20         71.1     4
21        120.1     4
22        318.0     8
23        304.0     8
24        350.0     8
25        400.0     8
26         79.0     4
27        120.3     4
28         95.1     4
29        351.0     8
30        145.0     6
31        301.0     8
32        121.0     4
################################################################################################################################################################
# If you wanted all the cols, but just wanted to rename a few you can do this.

dplyr::rename(mtcarsR, displacement=disp, weight=wt)
# A tibble: 32 x 12
               rowname   mpg   cyl displacement    hp  drat weight  qsec    vs    am  gear  carb
                 <chr> <dbl> <dbl>        <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1            Mazda RX4  21.0     6        160.0   110  3.90  2.620 16.46     0     1     4     4
2        Mazda RX4 Wag  21.0     6        160.0   110  3.90  2.875 17.02     0     1     4     4
3           Datsun 710  22.8     4        108.0    93  3.85  2.320 18.61     1     1     4     1
4       Hornet 4 Drive  21.4     6        258.0   110  3.08  3.215 19.44     1     0     3     1
5    Hornet Sportabout  18.7     8        360.0   175  3.15  3.440 17.02     0     0     3     2
6              Valiant  18.1     6        225.0   105  2.76  3.460 20.22     1     0     3     1
7           Duster 360  14.3     8        360.0   245  3.21  3.570 15.84     0     0     3     4
8            Merc 240D  24.4     4        146.7    62  3.69  3.190 20.00     1     0     4     2
9             Merc 230  22.8     4        140.8    95  3.92  3.150 22.90     1     0     4     2
10            Merc 280  19.2     6        167.6   123  3.92  3.440 18.30     1     0     4     4
11           Merc 280C  17.8     6        167.6   123  3.92  3.440 18.90     1     0     4     4
12          Merc 450SE  16.4     8        275.8   180  3.07  4.070 17.40     0     0     3     3
13          Merc 450SL  17.3     8        275.8   180  3.07  3.730 17.60     0     0     3     3
14         Merc 450SLC  15.2     8        275.8   180  3.07  3.780 18.00     0     0     3     3
15  Cadillac Fleetwood  10.4     8        472.0   205  2.93  5.250 17.98     0     0     3     4
16 Lincoln Continental  10.4     8        460.0   215  3.00  5.424 17.82     0     0     3     4
17   Chrysler Imperial  14.7     8        440.0   230  3.23  5.345 17.42     0     0     3     4
18            Fiat 128  32.4     4         78.7    66  4.08  2.200 19.47     1     1     4     1
19         Honda Civic  30.4     4         75.7    52  4.93  1.615 18.52     1     1     4     2
20      Toyota Corolla  33.9     4         71.1    65  4.22  1.835 19.90     1     1     4     1
21       Toyota Corona  21.5     4        120.1    97  3.70  2.465 20.01     1     0     3     1
22    Dodge Challenger  15.5     8        318.0   150  2.76  3.520 16.87     0     0     3     2
23         AMC Javelin  15.2     8        304.0   150  3.15  3.435 17.30     0     0     3     2
24          Camaro Z28  13.3     8        350.0   245  3.73  3.840 15.41     0     0     3     4
25    Pontiac Firebird  19.2     8        400.0   175  3.08  3.845 17.05     0     0     3     2
26           Fiat X1-9  27.3     4         79.0    66  4.08  1.935 18.90     1     1     4     1
27       Porsche 914-2  26.0     4        120.3    91  4.43  2.140 16.70     0     1     5     2
28        Lotus Europa  30.4     4         95.1   113  3.77  1.513 16.90     1     1     5     2
29      Ford Pantera L  15.8     8        351.0   264  4.22  3.170 14.50     0     1     5     4
30        Ferrari Dino  19.7     6        145.0   175  3.62  2.770 15.50     0     1     5     6
31       Maserati Bora  15.0     8        301.0   335  3.54  3.570 14.60     0     1     5     8
32          Volvo 142E  21.4     4        121.0   109  4.11  2.780 18.60     1     1     4     2
################################################################################################################################################################
# Pull out a row for each unique value of cyl & carb.  More useful if dplyr::select is used to limit input to the column(s)

dplyr::distinct(mtcarsR, cyl, carb)
# A tibble: 9 x 12
            rowname   mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb
              <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1         Mazda RX4  21.0     6 160.0   110  3.90 2.620 16.46     0     1     4     4
2        Datsun 710  22.8     4 108.0    93  3.85 2.320 18.61     1     1     4     1
3    Hornet 4 Drive  21.4     6 258.0   110  3.08 3.215 19.44     1     0     3     1
4 Hornet Sportabout  18.7     8 360.0   175  3.15 3.440 17.02     0     0     3     2
5        Duster 360  14.3     8 360.0   245  3.21 3.570 15.84     0     0     3     4
6         Merc 240D  24.4     4 146.7    62  3.69 3.190 20.00     1     0     4     2
7        Merc 450SE  16.4     8 275.8   180  3.07 4.070 17.40     0     0     3     3
8      Ferrari Dino  19.7     6 145.0   175  3.62 2.770 15.50     0     1     5     6
9     Maserati Bora  15.0     8 301.0   335  3.54 3.570 14.60     0     1     5     8
################################################################################################################################################################
# Add new column(s)

dplyr::mutate(mtcarsR, mpc = mpg/cyl, impc=1/mpc)
# A tibble: 32 x 14
               rowname   mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb      mpc      impc
                 <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>    <dbl>     <dbl>
1            Mazda RX4  21.0     6 160.0   110  3.90 2.620 16.46     0     1     4     4 3.500000 0.2857143
2        Mazda RX4 Wag  21.0     6 160.0   110  3.90 2.875 17.02     0     1     4     4 3.500000 0.2857143
3           Datsun 710  22.8     4 108.0    93  3.85 2.320 18.61     1     1     4     1 5.700000 0.1754386
4       Hornet 4 Drive  21.4     6 258.0   110  3.08 3.215 19.44     1     0     3     1 3.566667 0.2803738
5    Hornet Sportabout  18.7     8 360.0   175  3.15 3.440 17.02     0     0     3     2 2.337500 0.4278075
6              Valiant  18.1     6 225.0   105  2.76 3.460 20.22     1     0     3     1 3.016667 0.3314917
7           Duster 360  14.3     8 360.0   245  3.21 3.570 15.84     0     0     3     4 1.787500 0.5594406
8            Merc 240D  24.4     4 146.7    62  3.69 3.190 20.00     1     0     4     2 6.100000 0.1639344
9             Merc 230  22.8     4 140.8    95  3.92 3.150 22.90     1     0     4     2 5.700000 0.1754386
10            Merc 280  19.2     6 167.6   123  3.92 3.440 18.30     1     0     4     4 3.200000 0.3125000
11           Merc 280C  17.8     6 167.6   123  3.92 3.440 18.90     1     0     4     4 2.966667 0.3370787
12          Merc 450SE  16.4     8 275.8   180  3.07 4.070 17.40     0     0     3     3 2.050000 0.4878049
13          Merc 450SL  17.3     8 275.8   180  3.07 3.730 17.60     0     0     3     3 2.162500 0.4624277
14         Merc 450SLC  15.2     8 275.8   180  3.07 3.780 18.00     0     0     3     3 1.900000 0.5263158
15  Cadillac Fleetwood  10.4     8 472.0   205  2.93 5.250 17.98     0     0     3     4 1.300000 0.7692308
16 Lincoln Continental  10.4     8 460.0   215  3.00 5.424 17.82     0     0     3     4 1.300000 0.7692308
17   Chrysler Imperial  14.7     8 440.0   230  3.23 5.345 17.42     0     0     3     4 1.837500 0.5442177
18            Fiat 128  32.4     4  78.7    66  4.08 2.200 19.47     1     1     4     1 8.100000 0.1234568
19         Honda Civic  30.4     4  75.7    52  4.93 1.615 18.52     1     1     4     2 7.600000 0.1315789
20      Toyota Corolla  33.9     4  71.1    65  4.22 1.835 19.90     1     1     4     1 8.475000 0.1179941
21       Toyota Corona  21.5     4 120.1    97  3.70 2.465 20.01     1     0     3     1 5.375000 0.1860465
22    Dodge Challenger  15.5     8 318.0   150  2.76 3.520 16.87     0     0     3     2 1.937500 0.5161290
23         AMC Javelin  15.2     8 304.0   150  3.15 3.435 17.30     0     0     3     2 1.900000 0.5263158
24          Camaro Z28  13.3     8 350.0   245  3.73 3.840 15.41     0     0     3     4 1.662500 0.6015038
25    Pontiac Firebird  19.2     8 400.0   175  3.08 3.845 17.05     0     0     3     2 2.400000 0.4166667
26           Fiat X1-9  27.3     4  79.0    66  4.08 1.935 18.90     1     1     4     1 6.825000 0.1465201
27       Porsche 914-2  26.0     4 120.3    91  4.43 2.140 16.70     0     1     5     2 6.500000 0.1538462
28        Lotus Europa  30.4     4  95.1   113  3.77 1.513 16.90     1     1     5     2 7.600000 0.1315789
29      Ford Pantera L  15.8     8 351.0   264  4.22 3.170 14.50     0     1     5     4 1.975000 0.5063291
30        Ferrari Dino  19.7     6 145.0   175  3.62 2.770 15.50     0     1     5     6 3.283333 0.3045685
31       Maserati Bora  15.0     8 301.0   335  3.54 3.570 14.60     0     1     5     8 1.875000 0.5333333
32          Volvo 142E  21.4     4 121.0   109  4.11 2.780 18.60     1     1     4     2 5.350000 0.1869159
################################################################################################################################################################
# New table with JUST the new computed columns. No back references in formulas to other new columns.

dplyr::transmute(mtcarsR, mpc = mpg/cyl, mpd=mpg/disp)
# A tibble: 32 x 2
        mpc        mpd
      <dbl>      <dbl>
1  3.500000 0.13125000
2  3.500000 0.13125000
3  5.700000 0.21111111
4  3.566667 0.08294574
5  2.337500 0.05194444
6  3.016667 0.08044444
7  1.787500 0.03972222
8  6.100000 0.16632584
9  5.700000 0.16193182
10 3.200000 0.11455847
11 2.966667 0.10620525
12 2.050000 0.05946338
13 2.162500 0.06272661
14 1.900000 0.05511240
15 1.300000 0.02203390
16 1.300000 0.02260870
17 1.837500 0.03340909
18 8.100000 0.41168996
19 7.600000 0.40158520
20 8.475000 0.47679325
21 5.375000 0.17901749
22 1.937500 0.04874214
23 1.900000 0.05000000
24 1.662500 0.03800000
25 2.400000 0.04800000
26 6.825000 0.34556962
27 6.500000 0.21612635
28 7.600000 0.31966351
29 1.975000 0.04501425
30 3.283333 0.13586207
31 1.875000 0.04983389
32 5.350000 0.17685950
################################################################################################################################################################
# Compute various aggregate functions on the columns

dplyr::summarise(mtcarsR, avgmpg=mean(mpg), sdmpc=sd(mpg/cyl))
# A tibble: 1 x 2
    avgmpg    sdmpc
     <dbl>    <dbl>
1 20.09062 2.255627
################################################################################################################################################################
# Combining the previous functions (especially 'summarize') with a 'group_by' object is where the true utility of dplyr shines.

by_cyl <- dplyr::group_by(mtcarsR, cyl)
dplyr::summarise(by_cyl, cnt=n(), meanmpg=mean(mpg, na.rm = TRUE))
# A tibble: 3 x 3
    cyl   cnt  meanmpg
  <dbl> <int>    <dbl>
1     4    11 26.66364
2     6     7 19.74286
3     8    14 15.10000
################################################################################################################################################################
# mutate and group_by are a magical combination!!!

dplyr::mutate(dplyr::group_by(mtcarsR, cyl), meanMPGbyCYL=mean(mpg))
Source: local data frame [32 x 13]
Groups: cyl [3]

               rowname   mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb meanMPGbyCYL
                 (chr) (dbl) (dbl) (dbl) (dbl) (dbl) (dbl) (dbl) (dbl) (dbl) (dbl) (dbl)        (dbl)
1            Mazda RX4  21.0     6 160.0   110  3.90 2.620 16.46     0     1     4     4     19.74286
2        Mazda RX4 Wag  21.0     6 160.0   110  3.90 2.875 17.02     0     1     4     4     19.74286
3           Datsun 710  22.8     4 108.0    93  3.85 2.320 18.61     1     1     4     1     26.66364
4       Hornet 4 Drive  21.4     6 258.0   110  3.08 3.215 19.44     1     0     3     1     19.74286
5    Hornet Sportabout  18.7     8 360.0   175  3.15 3.440 17.02     0     0     3     2     15.10000
6              Valiant  18.1     6 225.0   105  2.76 3.460 20.22     1     0     3     1     19.74286
7           Duster 360  14.3     8 360.0   245  3.21 3.570 15.84     0     0     3     4     15.10000
8            Merc 240D  24.4     4 146.7    62  3.69 3.190 20.00     1     0     4     2     26.66364
9             Merc 230  22.8     4 140.8    95  3.92 3.150 22.90     1     0     4     2     26.66364
10            Merc 280  19.2     6 167.6   123  3.92 3.440 18.30     1     0     4     4     19.74286
11           Merc 280C  17.8     6 167.6   123  3.92 3.440 18.90     1     0     4     4     19.74286
12          Merc 450SE  16.4     8 275.8   180  3.07 4.070 17.40     0     0     3     3     15.10000
13          Merc 450SL  17.3     8 275.8   180  3.07 3.730 17.60     0     0     3     3     15.10000
14         Merc 450SLC  15.2     8 275.8   180  3.07 3.780 18.00     0     0     3     3     15.10000
15  Cadillac Fleetwood  10.4     8 472.0   205  2.93 5.250 17.98     0     0     3     4     15.10000
16 Lincoln Continental  10.4     8 460.0   215  3.00 5.424 17.82     0     0     3     4     15.10000
17   Chrysler Imperial  14.7     8 440.0   230  3.23 5.345 17.42     0     0     3     4     15.10000
18            Fiat 128  32.4     4  78.7    66  4.08 2.200 19.47     1     1     4     1     26.66364
19         Honda Civic  30.4     4  75.7    52  4.93 1.615 18.52     1     1     4     2     26.66364
20      Toyota Corolla  33.9     4  71.1    65  4.22 1.835 19.90     1     1     4     1     26.66364
21       Toyota Corona  21.5     4 120.1    97  3.70 2.465 20.01     1     0     3     1     26.66364
22    Dodge Challenger  15.5     8 318.0   150  2.76 3.520 16.87     0     0     3     2     15.10000
23         AMC Javelin  15.2     8 304.0   150  3.15 3.435 17.30     0     0     3     2     15.10000
24          Camaro Z28  13.3     8 350.0   245  3.73 3.840 15.41     0     0     3     4     15.10000
25    Pontiac Firebird  19.2     8 400.0   175  3.08 3.845 17.05     0     0     3     2     15.10000
26           Fiat X1-9  27.3     4  79.0    66  4.08 1.935 18.90     1     1     4     1     26.66364
27       Porsche 914-2  26.0     4 120.3    91  4.43 2.140 16.70     0     1     5     2     26.66364
28        Lotus Europa  30.4     4  95.1   113  3.77 1.513 16.90     1     1     5     2     26.66364
29      Ford Pantera L  15.8     8 351.0   264  4.22 3.170 14.50     0     1     5     4     15.10000
30        Ferrari Dino  19.7     6 145.0   175  3.62 2.770 15.50     0     1     5     6     19.74286
31       Maserati Bora  15.0     8 301.0   335  3.54 3.570 14.60     0     1     5     8     15.10000
32          Volvo 142E  21.4     4 121.0   109  4.11 2.780 18.60     1     1     4     2     26.66364
################################################################################################################################################################
# Chaining is a technique using the %>% operator that can be used to "chain" together dplyr calls into sequential steps.  The idea is very much like how pipes
# are used on the UNIX command line.

mtcarsR %>%
  dplyr::group_by(cyl, carb) %>%
  dplyr::select(mpg, hp, cyl, carb) %>%
  dplyr::summarise(meanMPG = mean(mpg, na.rm = TRUE),
                   meanCYL = mean(hp, na.rm = TRUE))  %>%
  filter(meanMPG > 20)
Source: local data frame [2 x 4]
Groups: cyl [1]

    cyl  carb meanMPG meanCYL
  (dbl) (dbl)   (dbl)   (dbl)
1     4     1   27.58    77.4
2     4     2   25.90    87.0
################################################################################################################################################################
# Some people don't like chaining, and you are not forced to use it if you don't wish.  The previous example could have been done like this:

tmp <- dplyr::group_by(mtcarsR, cyl, carb)
tmp <- dplyr::select(tmp, mpg, hp, cyl, carb)
tmp <- dplyr::summarise(tmp,
                        meanMPG = mean(mpg, na.rm = TRUE),
                        meanCYL = mean(hp, na.rm = TRUE))
tmp <- dplyr::filter(tmp, meanMPG > 20)
tmp
Source: local data frame [2 x 4]
Groups: cyl [1]

    cyl  carb meanMPG meanCYL
  (dbl) (dbl)   (dbl)   (dbl)
1     4     1   27.58    77.4
2     4     2   25.90    87.0
################################################################################################################################################################
# dplyr has nice join (merge in R) capability too

dplyr::full_join( daPeople, daObjects, by=c('hair'='hairs', 'sex'='SEX'))
      hair sex name.x   name.y
1 brunette   M   bill     rock
2   blonde   M    ted    paper
3   blonde   M   fred    paper
4 brunette   F  wilma scissors
5   blonde   F  betzi   hammer
6     pink   F pinkey     <NA>
7      red   M   <NA>      gun
8      red   F   <NA>    knife
dplyr::left_join( daPeople, daObjects, by=c('hair'='hairs', 'sex'='SEX'))
      hair sex name.x   name.y
1 brunette   M   bill     rock
2   blonde   M    ted    paper
3   blonde   M   fred    paper
4 brunette   F  wilma scissors
5   blonde   F  betzi   hammer
6     pink   F pinkey     <NA>
dplyr::right_join(daPeople, daObjects, by=c('hair'='hairs', 'sex'='SEX'))
      hair sex name.x   name.y
1 brunette   M   bill     rock
2   blonde   M    ted    paper
3   blonde   M   fred    paper
4 brunette   F  wilma scissors
5   blonde   F  betzi   hammer
6      red   M   <NA>      gun
7      red   F   <NA>    knife
dplyr::inner_join(daPeople, daObjects, by=c('hair'='hairs', 'sex'='SEX'))
      hair sex name.x   name.y
1 brunette   M   bill     rock
2   blonde   M    ted    paper
3   blonde   M   fred    paper
4 brunette   F  wilma scissors
5   blonde   F  betzi   hammer
dplyr::semi_join( daPeople, daObjects, by=c('hair'='hairs', 'sex'='SEX'))
      hair sex  name
1 brunette   M  bill
2   blonde   M   ted
3   blonde   M  fred
4 brunette   F wilma
5   blonde   F betzi
dplyr::anti_join( daPeople, daObjects, by=c('hair'='hairs', 'sex'='SEX'))
  hair sex   name
1 pink   F pinkey

The R session information (including the OS info, R version and all packages used):

    options(width=80)
    sessionInfo()
R version 3.3.0 (2016-05-03)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Debian GNU/Linux 8 (jessie)

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

attached base packages:
[1] graphics  grDevices datasets  utils     grid      stats     base     

other attached packages:
[1] RColorBrewer_1.1-2 reshape2_1.4.1     ggplot2_2.1.0      tidyr_0.5.1       
[5] dplyr_0.4.3        data.table_1.9.6   gridExtra_2.2.1    knitr_1.13        
[9] lattice_0.20-33   

loaded via a namespace (and not attached):
 [1] Rcpp_0.12.5      magrittr_1.5     munsell_0.4.3    colorspace_1.2-6
 [5] R6_2.1.2         stringr_1.0.0    highr_0.6        plyr_1.8.3      
 [9] tools_3.3.0      parallel_3.3.0   gtable_0.2.0     DBI_0.4-1       
[13] lazyeval_0.2.0   assertthat_0.1   tibble_1.1       formatR_1.4     
[17] evaluate_0.9     stringi_1.0-1    methods_3.3.0    scales_0.4.0    
[21] chron_2.3-47    
    Sys.time()
[1] "2016-07-21 22:41:44 CDT"