An effort to visualize trend in racial complaints reported to the Anti-Discrimination Board, NSW, Australia. These incidents may be serious enough to be reported, and thus form a tiny sample compared to the whole population of unreported ones.

As CSV is not available, have to manually download HTML data and stitch things together. Here is the code (main function fetch_discrimination_data
) to download HTML data using rvest
substrRight <- function(x, n){
substr(x, nchar(x)-n+1, nchar(x))
}
pattern_selector <- function(h2_text) {
# Construct the XPATH for common header selector
h2_anchor <- sprintf("//h2[contains(., '%s')]", h2_text)
# Construct the final XPATH
sprintf("%s/following-sibling::table[1] | (%s/following-sibling::div[@class='clearfix']/table)[1]",
h2_anchor, h2_anchor)
}
####### MAIN ######
fetch_discrimination_data <- function(year) {
from_year <- toString(year)
to_year <- toString(year + 1)
base_url <- "http://www.antidiscrimination.justice.nsw.gov.au/Pages/adb1_resources/adb1_statistics/"
path_url <- sprintf("%s%s%s%s.aspx",
ifelse(year < 2011, "adb1_statistics", "stats"),
substrRight(from_year, 2),
ifelse(year < 2012, "_", "-"),
substrRight(to_year, 2))
discrimination_url <- paste(base_url, path_url, sep = "")
discrimination_markup <- html(discrimination_url)
selector <- paste(c(pattern_selector('Complaints received by ground and area'),
pattern_selector('Complaints received by Ground and Area')),
collapse = " | ")
discrimination_table <- discrimination_markup %>%
html_nodes(xpath = selector) %>%
html_table()
discrimination_frame <- discrimination_table[[1]]
# For some reason following doesn't work for year = 2013 case
# discrimination_frame <- discrimination_frame[discrimination_frame$X1 == "Race", 2:6]
discrimination_frame <- discrimination_frame[as.integer(factor(discrimination_frame$X1)) == 13, 2:6]
names(discrimination_frame) <- c("employment", "goods_and_services", "accomodation", "education", "clubs")
# subset columns
data.frame(discrimination_frame, year=year)
}
Following code uses rbind
to build the time series data -
frame <- NA
for(year in 1999:2013) {
print(sprintf("Year - %s", year))
print(str(frame))
if (is.na(frame)) {
frame <- fetch_discrimination_data(year)
}
else {
frame <- rbind(frame, fetch_discrimination_data(year))
}
}