timeSeriesSmooth.R

Plain text source: timeSeriesSmooth.R


# -*- Mode:R; Coding:us-ascii-unix; fill-column:160 -*-

################################################################################################################################################################
# @file      timeSeriesSmooth.R
# @author    Mitch Richling <https://www.mitchr.me>
# @Copyright Copyright 2015 by Mitch Richling.  All rights reserved.
# @brief     Smoothing time series plots.@EOL
# @Keywords  base r smooth filter lowess
#
# Smoothers (especially ones found in CRAN packages) frequently have one or more of the following characteristics:
#   - They may require both x & y data
#   - They might only work when the x data is numeric -- they simply fail when given dates for example.
#   - They might coerce x data into numeric values
#   - They return a composite data type or list containing $x and $y values in the returned object
#   - They may change the number of observations
#
# Here we demonstrate how to use such smoothers with time series data where the x data are dates.
#

################################################################################################################################################################
# We need some data, so let's fake some. Note that we use POSIXct date objects -- I find them easy to work with as each date is an atomic object.
numPts <- 150
daDat <- data.frame(xpdate=as.POSIXct('2012-01-01')+(1:numPts)*(60*60*24),
                    y=rnorm(numPts, mean=0, sd=10)+(1:numPts)*.2)

################################################################################################################################################################
# Now we transform our POSIXct time stamps into integers so that they will work with any smoother.  The smoother we demonstrate, lowess, would do this step for
# us, but not all smoothers are so nice.
daDat$xpint <- as.integer(daDat$xpdate)

head(daDat)
      xpdate           y      xpint
1 2012-01-02   0.6560061 1325484000
2 2012-01-03   1.1566978 1325570400
3 2012-01-04 -10.3631669 1325656800
4 2012-01-05  14.5889121 1325743200
5 2012-01-06  -6.0947830 1325829600
6 2012-01-07  13.1343474 1325916000
#################################################################################################################################################################
# Now we smooth our data (we give it the x integers we constructed from our time data as well as the y data)
smoothedData <- lowess(daDat$xpint, daDat$y, f=.3)
head(smoothedData)
$x
  [1] 1325484000 1325570400 1325656800 1325743200 1325829600 1325916000 1326002400 1326088800 1326175200 1326261600 1326348000 1326434400 1326520800 1326607200
 [15] 1326693600 1326780000 1326866400 1326952800 1327039200 1327125600 1327212000 1327298400 1327384800 1327471200 1327557600 1327644000 1327730400 1327816800
 [29] 1327903200 1327989600 1328076000 1328162400 1328248800 1328335200 1328421600 1328508000 1328594400 1328680800 1328767200 1328853600 1328940000 1329026400
 [43] 1329112800 1329199200 1329285600 1329372000 1329458400 1329544800 1329631200 1329717600 1329804000 1329890400 1329976800 1330063200 1330149600 1330236000
 [57] 1330322400 1330408800 1330495200 1330581600 1330668000 1330754400 1330840800 1330927200 1331013600 1331100000 1331186400 1331272800 1331359200 1331445600
 [71] 1331532000 1331618400 1331704800 1331791200 1331877600 1331964000 1332050400 1332136800 1332223200 1332309600 1332396000 1332482400 1332568800 1332655200
 [85] 1332741600 1332828000 1332914400 1333000800 1333087200 1333173600 1333260000 1333346400 1333432800 1333519200 1333605600 1333692000 1333778400 1333864800
 [99] 1333951200 1334037600 1334124000 1334210400 1334296800 1334383200 1334469600 1334556000 1334642400 1334728800 1334815200 1334901600 1334988000 1335074400
[113] 1335160800 1335247200 1335333600 1335420000 1335506400 1335592800 1335679200 1335765600 1335852000 1335938400 1336024800 1336111200 1336197600 1336284000
[127] 1336370400 1336456800 1336543200 1336629600 1336716000 1336802400 1336888800 1336975200 1337061600 1337148000 1337234400 1337320800 1337407200 1337493600
[141] 1337580000 1337666400 1337752800 1337839200 1337925600 1338012000 1338098400 1338184800 1338271200 1338357600

$y
  [1]  2.029738  2.076850  2.127046  2.180861  2.239144  2.302577  2.371953  2.447768  2.530082  2.618428  2.711846  2.808819  2.907739  3.006742  3.103714
 [16]  3.196488  3.282920  3.360849  3.427724  3.480350  3.514879  3.530066  3.540063  3.666517  3.826284  4.027020  4.270560  4.551674  4.858422  5.173354
 [31]  5.477782  5.756956  6.003553  6.216544  6.400317  6.560867  6.708927  6.856935  7.015243  7.186928  7.372626  7.572089  7.781911  8.004423  8.239116
 [46]  8.488012  8.754220  9.034002  9.314802  9.586510  9.844125 10.089265 10.328525 10.565953 10.803412 11.047267 11.302019 11.569201 11.848262 12.141291
 [61] 12.442734 12.744356 13.042446 13.331707 13.604699 13.857709 14.093097 14.314210 14.518673 14.704880 14.878920 15.043340 15.199362 15.347867 15.485357
 [76] 15.609549 15.724217 15.827834 15.919080 15.993107 16.045924 16.076128 16.089338 16.092556 16.094649 16.106013 16.128010 16.163193 16.214255 16.288682
 [91] 16.396489 16.542224 16.722566 16.927389 17.147809 17.376577 17.609224 17.842801 18.068947 18.283406 18.489192 18.693718 18.908822 19.150868 19.428119
[106] 19.743340 20.098256 20.491945 20.915224 21.356514 21.802856 22.243332 22.667125 23.061172 23.420553 23.742870 24.026569 24.274108 24.490406 24.687093
[121] 24.883966 25.098987 25.341617 25.615677 25.920853 26.254298 26.613515 26.990630 27.398852 27.781337 28.140957 28.483812 28.814833 29.137565 29.454803
[136] 29.768936 30.081806 30.395039 30.709839 31.027122 31.347478 31.670992 31.997365 32.326093 32.656918 32.989623 33.323998 33.659578 33.995703 34.332029
################################################################################################################################################################
# Now we put everything in a data.frame so we can graph it with ggplot.  Notice that we convert the integers we got from lowess at the same time.
allDat <- rbind(data.frame(smoother=rep('actual', numPts),
                           x=daDat$xpdate,
                           y=daDat$y),
                data.frame(smoother=rep('lowess', numPts),
                           x=as.POSIXct(smoothedData$x, origin='1970-01-01 00:00:00 UTC'),
                           y=smoothedData$y))

################################################################################################################################################################
# Plot them all
ggplot(allDat, aes(x=x, y=y, col=smoother)) +
  geom_line() +
  labs(title='Smoothing Time Series With A Generic Smoother')
plot of chunk auto-report

The R session information (including the OS info, R version and all packages used):

    options(width=80)
    sessionInfo()
R version 3.3.0 (2016-05-03)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Debian GNU/Linux 8 (jessie)

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

attached base packages:
[1] graphics  grDevices datasets  utils     grid      stats     base     

other attached packages:
[1] RColorBrewer_1.1-2 reshape2_1.4.1     ggplot2_2.1.0      dplyr_0.4.3       
[5] data.table_1.9.6   gridExtra_2.2.1    knitr_1.13         lattice_0.20-33   

loaded via a namespace (and not attached):
 [1] Rcpp_0.12.5      magrittr_1.5     munsell_0.4.3    colorspace_1.2-6
 [5] R6_2.1.2         stringr_1.0.0    highr_0.6        plyr_1.8.3      
 [9] tools_3.3.0      parallel_3.3.0   gtable_0.2.0     DBI_0.4-1       
[13] assertthat_0.1   digest_0.6.9     formatR_1.4      evaluate_0.9    
[17] labeling_0.3     stringi_1.0-1    methods_3.3.0    scales_0.4.0    
[21] chron_2.3-47    
    Sys.time()
[1] "2016-07-09 20:07:43 CDT"