R Installation Basics

2016/01/02

Install mostly necessary packages

Quick and clean installation script of necessary packages:

PACKAGES <- scan(url("https://research.janimiettinen.fi/data/r_packages.txt"), what="character")
# PACKAGES <- scan("./data/r_paketit.txt", what="character")
inst <- match(PACKAGES, .packages(all=TRUE))
need <- which(is.na(inst))
if (length(need) > 0) install.packages(PACKAGES[need])

or from terminal command line

$wget -U firefox "https://research.janimiettinen.fi/data/install_R_packages.R" 
wget "https://research.janimiettinen.fi/data/install_R_packages.R" 
sudo Rscript install_packages.R

Rprofile

Get slightly modified .Rprofile to start your RStudio session.

download.file(url = "https://research.janimiettinen.fi/data/.Rprofile", destfile = "~/.Rprofile")

List of best packages

## Import
library(feather) # a fast, lightweight file format used by both R and Python
library(readr) # reads tabular data
library(readxl) # reads Microsoft Excel spreadsheets
library(openxlsx) # reads Microsoft Excel spreadsheets
library(googlesheets) # reads Google spreadsheets
library(haven) # reads SAS, SPSS, and Stata files
library(httr) # reads data from web APIs
library(rvest) # scrapes data from web pages
library(xml2) # reads HTML and XML data
library(webreadr) # reads common web log formats
library(DBI) # a universal interface to database management systems (DBMS)
library(RMySQL) # MySQL driver for DBI
library(RPostgres) # Postgres driver for DBI
library(RSQLite) # SQlite driver for DBI
library(bigrquery) # Google BigQuery driver for DBI
library(PivotalR) # reads data from and interfaces with Postgres, Greenplum, and HAWQ
library(dplyr) # contains an interface to common databases
library(data.table) # fread() for fast table reading
library(git2r) # tools to access git repositories

## Tidy
library(tidyr) # tools for tidying layout of tabular data
library(dplyr) # tools for joining multiple tables into a tidy data set
library(purrr) # tools for applying R functions to data structures, very useful when tidying
library(broom) # tools for tidying statistical models into data frames
library(zoo) # data structures for time series data
library(PivotalR) # R wrappers for in-database SQL operations (i.e. join, group by)


## Visualize
# These packages help you visualize your data.

library(ggplot2) # a versatile system for making plots
library(ggthemes) # plot style themes
library(ggmap) # maps with Google Maps, Open Street Maps, etc.
library(ggiraph) # interactive ggplots
library(ggstance) # horizontal versions of common plots
library(GGally) # scatterplot matrices
library(ggalt) # additional coordinate systems, geoms, etc.
library(ggforce) # additional geoms, etc.
library(ggrepel) # prevent plot labels from overlapping
library(ggraph) # graphs, networks, trees and more
library(ggpmisc) # photo-biology related extensions
library(geomnet) # network visualization
library(ggExtra) # marginal histograms for a plot
library(gganimate) # animations
library(plotROC) # interactive ROC plots
library(ggspectra) # tools for plotting light spectra
library(ggnetwork) # geoms to plot networks
library(ggtech) # style themes for plots
library(ggradar) # radar charts
library(ggTimeSeries) # time series visualizations
library(ggtree) # tree visualizations
library(ggseas) # seasonal adjustment tools
library(lattice) # Trellis graphics
library(rgl) # interactive 3D plots
library(ggvis) # versatile system for interactive graphs
library(htmlwidgets) # framework for creating JavaScript widgets with R
library(leaflet) # Interactive maps
library(dygraphs) # Interactive time series plots
library(plotly) # Interactive plots
library(rbokeh) # Interactive Bokeh plots
library(Highcharter) # Interactive Highcharts plots
library(visNetwork) # Interactive network graphs
library(networkD3) # Interative d3 network graphs
library(d3heatmap) # Interactive d3 heatmaps
library(DT) # Interactive tables
library(threejs) # Interactive 3d plots and globes
library(rglwidget) # Interactive 3d plot
library(DiagrammeR) # Interactive diagrams
library(MetricsGraphics) # Interactive MetricsGraphics plots
library(rCharts) # many interactive JavaScript visualizations
library(coefplot) # visualizes model statistics
library(quantmod) # candlestick financial charts
library(colorspace) # HSL based color palettes
library(viridis) # Matplotlib viridis color pallete for R
library(munsell) # Munsell color palettes for R.
library(RColorBrewer) # color palettes for plots. No manual or website.
library(dichromat) # color-blind friendly palettes. No manual or website.
library(igraph) # Network Analysis and Visualization
library(latticeExtra) # Extensions for lattice graphics
library(sp) # tools for spatial data


## Transform
# These packages help you transform your data into new types of data.

library(dplyr) # a grammar of data transformation
library(magrittr) # a concise syntax for calling sequences of functions
library(tibble) # efficient display structure for tabular data
library(stringr) # tools for working with strings and regular expressions
library(lubridate) # tools for working with dates and times
library(xts) # tools for time series based data
library(data.table) # fast data manipulation
library(vtreat) # tools for pre-processing variables for predictive modeling
library(stringi) # fast string processing facilities.
library(Matrix) # LAPACK methods for dense and sparse matrix operations


## Model/Infer
# These packages help you build models and make inferences. Often the same packages will focus on both topics.

library(car) # functions from An R Companion to Applied Regression
library(Hmisc) # miscellaneous functions for data analysis
library(multcomp) # Simultaneous Inference in General Parametric Models
library(pbkrtest) # parametric bootstrap test for linear mixed effects models
library(mvtnorm) # Multivariate Normal and t Distributions
library(MatrixModels) # Modelling with Sparse And Dense Matrices
library(SparseM) # linear algebra for sparse matrices
library(lme4) # Linear Mixed-Effects Models using Eigen C++ library
library(broom) # tools for tidying statistical models into data frames
library(caret) # tools for Classification And REgression Training
library(glmnet) # generalized linear models via penalized maximum likelihood
library(mosaic) # Tools for teaching mathematics, statistics, computation and modeling
library(gbm) # gradient boosted regression models
library(xgboost) # Extreme Gradient Boosting
library(randomForest) # Random Forests for Classification and Regression
library(ranger) # a fast implementation of Random Forests
library(h2o) # parallel distributed machine learning algorithms
library(ROCR) # plots to visualize classifier performance
library(pROC) # Tools for visualizing, smoothing and comparing ROC curves
library(PivotalR) # R wrappers for MADlib's parallel distributed machine learning algorithms


## Communicate
# These packages help you communicate the results of data science to your audiences.

library(rmarkdown) # easy-to-use format for reproducible reports and dynamic documents in R
library(knitr) # embed R code within pdf and html reports
library(flexdashboard) # easy-to-create dashboards based on rmarkdown
library(bookdown) # books and long documents built on R Markdown
library(rticles) # ready to use R Markdown templates
library(tufte) # Tufte handout R Markdown template
library(DT) # Interactive data tables
library(pixiedust) # Customized tables
library(xtable) # Customized tables
library(highr) # Syntax Highlighting for R Source Code
library(formatR) # tidy_source() to format R source code
library(yaml) # Methods to convert R data to YAML and back
library(pander) # renders R objects into Pandoc markdown.


## Automate
# These packages help you create data science products that automate your analyses.

library(shiny) # tools to make interactive web apps with R
library(shinydashboard) # interactive dashboards with R
library(shinythemes) # style themes for Shiny apps
library(shinyAce) # Ace text editor for Shiny apps
library(shinyjs) # adds common JavaScript operations to Shiny apps
library(miniUI) # UI elements for Shiny gadgets, interactive apps integrated into the R commandline workflow
library(shinyapps.io) # hosting service for Shiny apps
library(Shiny Server Open Source) # OS server to host Shiny apps
library(Shiny Server Pro) # server to host Shiny apps enhanced with features for business enterprises
library(rsconnect) # deploys Shiny apps to shinyapps.io
library(plumber) # converts R code to a web API
library(rmarkdown) # easy-to-use format for reproducible reports and dynamic documents in R
library(rstudioapi) # safely access RStudio IDE's API


## Program
# These packages make it easier to program with the R language.

# RStudio Desktop IDE) # IDE application for R
# RStudio Server Open Source) # server based IDE for R
# RStudio Server Professional) # server based IDE for R enhanced with features for business enterprises
library(devtools) # tools that make it easier to develop R packages
library(packrat) # creates project specific libraries, which handle package versioning and enhance reproducibility
library(drat) # tools to create and use alternative R package repositories
library(testthat) # easy-to-use system for unit testing packages
library(roxygen2) # easy-to-use method for documenting packages
library(purrr) # tools for applying R functions to data structures
library(profvis) # visualizes code profiling data from R
library(Rcpp) # C++ API for R
library(R6) # fast, simple object class that uses reference semantics
library(htmltools) # Tools for HTML generation and output
library(nloptr) # interface to NLopt non-linear optimization library.
library(minqa) # optimization algorithms.
library(rngtools) # Utilities for working with Random Number Generators
library(NMF) # Nonnegative Matrix Factorization
library(crayon) # Adds color to terminal output
library(RJSONIO) # convert R objects to JSON notation
library(jsonlite) # a fast JSON parser and generator for R
library(RcppArmadillo) # interface to 'Armadillo' Templated Linear Algebra Library


## Data
# These packages contain data sets to use as training data or toy examples.

library(babynames) # Names given to US babies 1880-2014
library(neiss) # sample of all accidents reported to US emergency rooms 2009-2014
library(yrbss) # Youth Risk Behaviour Surveillance System data from 1991 to 2013
library(nycflights13) # all out-bound flights from NYC in 2013
library(hflights) # flights departing Houston in 2011
library(USAboundaries) # Historical and Contemporary Boundaries of the United States of America
library(rworldmap) # country border data
library(usdanutrients) # USDA nutrient database
library(fueleconomy) # EPA fuel economy data
library(nasaweather) # geographic and atmospheric measures on a very coarse 24 by 24 grid covering Central America
library(mexico-mortality) # deaths in Mexico
library(data-movies)# and 
library(ggplotmovies) # data from the Internet Movie Database (IMDB)
library(pop-flows) # Population flows around the USA in 2008
library(data-housing-crisis) # Clean data related to the 2008 US housing crisis
library(gun-sales) # Statistical analysis of monthly background checks of gun purchases from NY times
library(stationaRy) # hourly meteorological data from one of thousands of global stations
library(gapminder) # Excerpt from the Gapminder data
library(janeaustenr) # Jane Austen's Complete Novels