更新时间:2023-12-01 12:19:58
使用data.table
相对简单:
library(data.table)
# Read only relevant columns from csv file using data.table::fread
outcome_data <- fread("outcome-of-care-measures.csv",
na.strings="Not Available" ,
select = c("Hospital.Name","State","rate"))
# Drop rows NA values using data.table::na.omit
outcome_data <- na.omit(outcome_data)
## Use data.table::setkey to sort/index by State, then rate, then hospital name
setkey(outcome_data,State,rate,Hospital.Name)
## Add a rank column by state, order within groups will be based key order above
## (the .N operator is the number of rows in each State group)
outcome_data[,rank := seq_len(.N),by = .(State)]