how can i extract two dup values with the same max date based on a different var?

We can convert the ‘DATE’ to Date class, then do a group by ‘FID’ and slice the row with the max value in ‘DATE’

library(dplyr)
library(lubridate)
withrepeats %>%
   mutate(DATE = mdy(DATE)) %>%
   arrange(FID, desc(CID)) %>%
   group_by(FID) %>% 
   mutate(mxDate = if(any(CT == 1)) DATE[which.max(DATE)] else 
        DATE[which.max(DATE)]) %>%
   filter(DATE == mxDate & !duplicated(DATE)) %>% 
   ungroup %>% 
   slice(c(2, 4, 5, 1, 3)) %>%
   select(-mxDate)

-output

# A tibble: 5 x 6
#    FID ID      CID    CT DROPS DATE      
#  <int> <chr> <int> <int> <int> <date>    
#1   123 CV        3     1     1 2020-11-03
#2   456 LO        2     1     1 2020-10-05
#3   678 IP        1     1     2 2020-11-02
#4   111 AK        1     2     2 2020-11-02
#5   222 PL        4     2     2 2020-11-01

data

withrepeats <- structure(list(FID = c(123L, 123L, 123L, 456L, 456L, 678L, 678L, 
678L, 111L, 111L, 222L, 222L), ID = c("CV", "CV", "CV", "LO", 
"LO", "IP", "IP", "IP", "AK", "AK", "PL", "PL"), CID = c(1L, 
2L, 3L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 4L, 3L), CT = c(2L, 2L, 1L, 
1L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L), DROPS = c(3L, 2L, 1L, 2L, 
1L, 3L, 2L, 1L, 2L, 1L, 2L, 2L), DATE = c("11-3-2020", "11-3-2020", 
"11-3-2020", "10-4-2020", "10-5-2020", "11-1-2020", "11-2-2020", 
"10-29-2020", "11-2-2020", "11-1-2020", "11-1-2020", "11-1-2020"
)), class = "data.frame", row.names = c(NA, -12L))

CLICK HERE to find out more related problems solutions.

Leave a Comment

Your email address will not be published.

Scroll to Top