Data: duplicates and order

2016/07/02

Order data frame and remove duplicates

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2     ✓ purrr   0.3.4
## ✓ tibble  3.0.3     ✓ dplyr   1.0.2
## ✓ tidyr   1.1.2     ✓ stringr 1.4.0
## ✓ readr   1.4.0     ✓ forcats 0.5.0
## ── Conflicts ────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
# Data frame with id and value
dtemp <- data_frame(id = c(1,2,3,4,5,6,1,2,3,4,5,6), 
                    value = c(rep("first", 6), rep("second", 6)) )
## Warning: `data_frame()` is deprecated as of tibble 1.1.0.
## Please use `tibble()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
dtemp <- dtemp[order(dtemp$id),] # order by  id

# Check how it looks like
dtemp
## # A tibble: 12 x 2
##       id value 
##    <dbl> <chr> 
##  1     1 first 
##  2     1 second
##  3     2 first 
##  4     2 second
##  5     3 first 
##  6     3 second
##  7     4 first 
##  8     4 second
##  9     5 first 
## 10     5 second
## 11     6 first 
## 12     6 second
# remove duplicates
dtemp[!duplicated(dtemp$id, fromLast = TRUE), ] # removing first obs
## # A tibble: 6 x 2
##      id value 
##   <dbl> <chr> 
## 1     1 second
## 2     2 second
## 3     3 second
## 4     4 second
## 5     5 second
## 6     6 second
dtemp[!duplicated(dtemp$id, fromLast = FALSE), ] # removing last obs
## # A tibble: 6 x 2
##      id value
##   <dbl> <chr>
## 1     1 first
## 2     2 first
## 3     3 first
## 4     4 first
## 5     5 first
## 6     6 first