WordPress and Gist combined are powerful tools for data scientists and programmers to showcase their project with code blocks in a code editor fashion. And it seems to be the most efficient approach I can find so far. Here is one of the functions I just wrote in R as part of the assignment completed to compute the correlation between two types of air pollutants out of 332 areas in the US under a given threshold of the number of complete observations.

R code here:

## R Programming Assignment 3 - Part III | |

## TASKS: | |

## (1) Plot the 30-day mortality rates for heart attack | |

## (2) Finding the best hospital in a state | |

## (3) Ranking hospitals by outcome in a state | |

## -> (4) Ranking hospitals in all states | |

## (4) Ranking hospitals in all states | |

rankall <- function (outcome_name, num = "best") { | |

setwd("/Users/lucky1eva/Downloads/rprog-data-ProgAssignment3-data/") | |

outcome <- read.csv("outcome-of-care-measures.csv") | |

# Check validity | |

if (!outcome_name %in% c("heart attack", "heart failure", "pneumonia")) { | |

stop("invalid outcome") | |

} | |

# Subset relevant data from all states | |

outcome <- subset(outcome, select = c("Hospital.Name","State", | |

"Hospital.30.Day.Death..Mortality..Rates.from.Heart.Attack", | |

"Hospital.30.Day.Death..Mortality..Rates.from.Heart.Failure", | |

"Hospital.30.Day.Death..Mortality..Rates.from.Pneumonia")) | |

# Assign colnames to match the arguments in terms of outcome_name | |

colnames(outcome) <- c("Name","State", "heart attack", "heart failure", "pneumonia") | |

# Correct classes of each variable | |

outcome[, 3:5] <- apply(outcome[, 3:5], 2, function(x) as.numeric(x)) | |

outcome[, 1:2] <- apply(outcome[, 1:2], 2, function(x) as.character(x)) | |

# Restructure the data frame and keep the class unchanged | |

outcome_by_state <- split(outcome, outcome$State) # split outcome table by state | |

outcome_table <- lapply(outcome_by_state, as.data.frame) # set each element class to data.frame | |

# define a function to use lapply() to set new order for each table at once | |

f <- function (x, y) {x[order(x[, y], x[, 1]), ]} | |

sorted_table <- lapply(outcome_table, f, y = outcome_name) | |

all_listed <- NULL # just to define the return variable used in the following | |

for (i in 1:54) { # check in each state if the rank requested by 'num' is numeric / char and if within range | |

if (class(num) == "numeric" && num <= sum(!is.na(sorted_table[[i]][, outcome_name])) && num >= 1) { | |

all_listed <- rbind(all_listed, sorted_table[[i]][num, 1:2]) | |

} else if (class(num) == "numeric" && num > sum(!is.na(sorted_table[[i]][, outcome_name])) ) { | |

all_listed <- rbind(all_listed, data.frame("Name" = NA, "State" = names(sorted_table[i]))) | |

} else if ( num == "best") { | |

all_listed <- rbind(all_listed, sorted_table[[i]][1, 1:2]) | |

} else if (num == "worst") { | |

all_listed <- rbind(all_listed, sorted_table[[i]][sum(!is.na(sorted_table[[i]][, outcome_name])), 1:2]) | |

} | |

} | |

all_listed | |

} | |