res <- read.csv('2001.csv', as.is=TRUE, header=TRUE, na.string="") res <- res[!is.na(res$Candidate),] filldown <- function(d) { while(TRUE) { x <- which(is.na(d)) if (length(x)==0) break dx <- c(2,diff(x)) d[x[dx>1]] <- d[x[dx>1]-1] } d } reg <- read.csv('regions.csv', as.is=TRUE, header=TRUE, na.string="") reg$Region <- filldown(reg$Region) const <- res[!is.na(res$Constituency), c("Constituency","Electorate","Turnout","Status","Declaration")] const$Turnout <- as.numeric(substr(const$Turnout,1,nchar(const$Turnout)-1))/100 const$Turnout <- round(const$Turnout*const$Electorate) cand <- res[,c("Constituency","Candidate","Party","Votes")] cand[] <- lapply(cand, filldown) cand$Party[cand$Party=="Labour/Co-operative"] <- "Labour" cand$Party[cand$Party=="Liberal Democrat"] <- "LibDem" row.names(reg) <- tolower(reg$Constituency) const$Region <- reg[tolower(const$Constituency),'Region'] const$Region <- factor(const$Region, levels=c('Southeast','Southwest','East Anglia','East Midlands','West Midlands', 'Yorkshire and Humberside','North','Northwest','London', 'Scotland','N.Ireland','Wales')) row.names(const) <- const$Constituency ## Restrict to big3 parties.int <- c('Labour','Conservative','LibDem') candparties <- cand$Party candparties[!(candparties %in% parties.int)] <- "Other" cand.int <- aggregate(cand$Votes, by=list(Party=candparties,Constituency=cand$Constituency), FUN=max) names(cand.int)[3] <- "Votes" cand.int$Constituency <- as.character(cand.int$Constituency) const.int <- reshape(cand.int, direction='wide', idvar='Constituency', timevar='Party' ) const.int[] <- lapply(const.int, function(d) ifelse(is.na(d),0,d)) const.int$Region <- const[const.int$Constituency,'Region'] # Martin Baxter's tables baxterregions <- c('East Scotland'='Scotland', 'West Scotland'='Scotland', 'N.Ireland'='N.Ireland', 'North'='North', 'Lancashire'='Northwest','Greater Manchester'='Northwest', 'Yorkshire'='Yorkshire and Humberside','Humberside'='Yorkshire and Humberside', 'West Midlands'='West Midlands','East Midlands'='East Midlands', 'Severn'='West Midlands', 'Wales'='Wales', 'East Anglia'='East Anglia', 'Essex'='Southeast','West'='Southwest', 'North London'='London', 'South London'='London', 'South West'='Southwest','South'='Southeast','South East'='Southeast') # 2001, actual boundaries df <- scan('conlist_flatfile01.txt', what=character(0), sep="\n") df <- matrix(df, byrow=TRUE, ncol=10) df <- data.frame(df) names(df) <- c("Constituency","Candidate","Region","Electorate", "Votes.Conservative","Votes.Labour","Votes.LibDem","Votes.Nationalist","Votes.Minority","OtherVotes") df[] <- lapply(df, as.character) df$Constituency <- substr(df$Constituency, start=nchar("Name:")+1, stop=nchar(df$Constituency)) df$Candidate <- substr(df$Candidate, start=nchar("MP:")+1, stop=nchar(df$Candidate)) df$Region <- substr(df$Region, start=nchar("Region:")+1,stop=nchar(df$Region)) votecols <- c('Votes.Conservative','Votes.Labour','Votes.LibDem','Votes.Nationalist','Votes.Minority','OtherVotes') df[,votecols] <- lapply(df[,votecols], function(x) as.numeric(substr(x,start=nchar("CON:")+1,stop=nchar(x)))) df$Votes.Other <- pmax(df$Votes.Nationalist,df$Votes.Minority) df <- df[,c('Constituency','Region','Votes.Conservative','Votes.Labour','Votes.LibDem','Votes.Other')] ni <- const.int[const.int$Region=='N.Ireland',] ni$Region <- 3 df <- rbind(df,ni[,names(df)]) df$Region <- factor(df$Region, levels=1:20, labels=names(baxterregions)) df$Region <- factor(baxterregions[df$Region], levels=levels(const.int$Region)) baxter2001 <- df # 1997 df <- scan('conlist_flatfile97.txt', what=character(0), sep="\n") df <- matrix(df, byrow=TRUE, ncol=10) df <- data.frame(df) names(df) <- c("Constituency","Candidate","Region","Electorate", "Votes.Conservative","Votes.Labour","Votes.LibDem","Votes.Nationalist","Votes.Minority","OtherVotes") df[] <- lapply(df, as.character) df$Constituency <- substr(df$Constituency, start=nchar("Name:")+1, stop=nchar(df$Constituency)) df$Candidate <- substr(df$Candidate, start=nchar("MP:")+1, stop=nchar(df$Candidate)) df$Region <- substr(df$Region, start=nchar("Region:")+1,stop=nchar(df$Region)) votecols <- c('Votes.Conservative','Votes.Labour','Votes.LibDem','Votes.Nationalist','Votes.Minority','OtherVotes') df[,votecols] <- lapply(df[,votecols], function(x) as.numeric(substr(x,start=nchar("CON:")+1,stop=nchar(x)))) df$Votes.Other <- pmax(df$Votes.Nationalist,df$Votes.Minority) df <- df[,c('Constituency','Region','Votes.Conservative','Votes.Labour','Votes.LibDem','Votes.Other')] ni <- const.int[const.int$Region=='N.Ireland',] ni$Region <- 3 df <- rbind(df,ni[,names(df)]) df$Region <- factor(df$Region, levels=1:20, labels=names(baxterregions)) df$Region <- factor(baxterregions[df$Region], levels=levels(const.int$Region)) baxter1997 <- df #1992, notional boundaries df <- scan('conlist_flatfile92.txt', what=character(0), sep="\n") df <- matrix(df, byrow=TRUE, ncol=10) df <- data.frame(df) names(df) <- c("Constituency","Candidate","Region","Electorate", "Votes.Conservative","Votes.Labour","Votes.LibDem","Votes.Nationalist","Votes.Minority","OtherVotes") df[] <- lapply(df, as.character) df$Constituency <- substr(df$Constituency, start=nchar("Name:")+1, stop=nchar(df$Constituency)) df$Candidate <- substr(df$Candidate, start=nchar("MP:")+1, stop=nchar(df$Candidate)) df$Region <- substr(df$Region, start=nchar("Region:")+1,stop=nchar(df$Region)) votecols <- c('Votes.Conservative','Votes.Labour','Votes.LibDem','Votes.Nationalist','Votes.Minority','OtherVotes') df[,votecols] <- lapply(df[,votecols], function(x) as.numeric(substr(x,start=nchar("CON:")+1,stop=nchar(x)))) df$Votes.Other <- pmax(df$Votes.Nationalist,df$Votes.Minority) df <- df[,c('Constituency','Region','Votes.Conservative','Votes.Labour','Votes.LibDem','Votes.Other')] ni <- const.int[const.int$Region=='N.Ireland',] ni$Region <- 3 df <- rbind(df,ni[,names(df)]) df$Region <- factor(df$Region, levels=1:20, labels=names(baxterregions)) df$Region <- factor(baxterregions[df$Region], levels=levels(const.int$Region)) baxter1992nb <- df ## Read BBC data scraped by DJW regions <- c('South West', 'South East', 'London', 'Eastern', 'East Midlands', 'West Midlands', 'North West','Yorks & Humber','North East', 'Scotland', 'Wales', 'Northern Ireland') partytrans = c('Liberal Democrat'='LibDem') const <- read.csv('data/2005const.csv', header=TRUE, as.is=TRUE) turnout <- read.csv('data/2005turnout.csv', header=TRUE, as.is=TRUE) cand <- read.csv('data/2005cand.csv', header=TRUE, as.is=TRUE) # Some minor renaming/refactoring const$Region <- factor(const$Region, levels=regions) partynames <- partytrans[cand$Party] cand$Party <- ifelse(!is.na(partynames),partynames,cand$Party) # Reshape to have a wide table, one row per constituency, columns for interesting parties parties.int <- c('Labour','Conservative','LibDem') candparties <- cand$Party candparties[!(candparties %in% parties.int)] <- "Other" cand.int <- aggregate(cand$Votes, by=list(Party=candparties,Constituency=cand$Constituency), FUN=max) names(cand.int)[3] <- "Votes" cand.int$Constituency <- as.character(cand.int$Constituency) const.int <- reshape(cand.int, direction='wide', idvar='Constituency', timevar='Party' ) const.int[] <- lapply(const.int, function(d) ifelse(is.na(d),0,d)) # Add in columns for region and turnout const.int <- merge(const.int, turnout, all=TRUE) const.int <- merge(const.int, const, all=TRUE) # Save it bbc2005 <- const.int bbc2005$Constituency[bbc2005$Constituency=="Glasgow North East (Speaker)"] <- "Glasgow North East" row.names(bbc2005) <- bbc2005$Constituency # 2001, notional boundaries df <- scan('conlist_flatfile01nb.txt', what=character(0), sep="\n") df <- matrix(df, byrow=TRUE, ncol=10) df <- data.frame(df) names(df) <- c("Constituency","Candidate","Region","Electorate", "Votes.Conservative","Votes.Labour","Votes.LibDem","Votes.Nationalist","Votes.Minority","OtherVotes") df[] <- lapply(df, as.character) df$Constituency <- substr(df$Constituency, start=nchar("Name:")+1, stop=nchar(df$Constituency)) df$Candidate <- substr(df$Candidate, start=nchar("MP:")+1, stop=nchar(df$Candidate)) df$Region <- substr(df$Region, start=nchar("Region:")+1,stop=nchar(df$Region)) votecols <- c('Votes.Conservative','Votes.Labour','Votes.LibDem','Votes.Nationalist','Votes.Minority','OtherVotes') df[,votecols] <- lapply(df[,votecols], function(x) as.numeric(substr(x,start=nchar("CON:")+1,stop=nchar(x)))) df$Votes.Other <- pmax(df$Votes.Nationalist,df$Votes.Minority) df <- df[,c('Constituency','Region','Votes.Conservative','Votes.Labour','Votes.LibDem','Votes.Other')] baxter2001nb <- df row.names(baxter2001nb) <- baxter2001nb$Constituency #ni <- const.int[const.int$Region=='N.Ireland',] #ni$Region <- 3 #df <- rbind(df,ni[,names(df)]) #df$Region <- factor(df$Region, levels=1:20, labels=names(baxterregions)) #df$Region <- factor(baxterregions[df$Region], levels=levels(const.int$Region))