COMING SOON



Political Mining


Goal here is to apply different data mining tools (GLM, Random Forest/Boosted trees, bayesian analysis in STAN, etc.) to political data.


\[Pr(y_{i}) = {logit}^{-1}(\alpha_{i}+\beta_{i}{X}_{i})\]

a2_n <- a2 %>% group_by(year, Red_Blue) %>% nest()

# Define glm function
g1 <- function(df) {glm(VoteR ~ Inc_c + Religion + Gender + Education + Race, data=df, family = binomial(link="logit"))}
g2 <- function(df) {bayesglm(VoteR ~ Inc_c + Religion + Gender + Education + Race, data=df, 
                             family = binomial(link="logit"), prior.scale = 2.5, prior.df = 1)}
g3 <- function(df) {lm(VoteR ~ Inc_c + Religion + Gender + Education + Race, data=df)}

# Run glm using map function
m1 <- a2_n %>% mutate(model=data %>% map(g1))
m2 <- a2_n %>% mutate(model=data %>% map(g2))

# Using Broom to extract coefficient
r1 <- m1 %>% mutate(tidy = model %>% map(tidy))
ru1 <- unnest(r1, tidy) %>% filter(term == "Inc_c")

r2 <-m2 %>% mutate(tidy = model %>% map(tidy))
ru2 <- unnest(r2, tidy) %>% filter(term == "Inc_c")

# Plotting Income Coefficients by Year/Red_Blue
voteR1 <- plot_ly(ru1, x = ~year, y = ~estimate) %>%
  add_markers(error_y = ~list(value = std.error), color = ~factor(Red_Blue), colors = c("Blue", "Red")) %>%
  layout(margin = list(l = 50), title = "GLM", xaxis = list(title = FALSE))

voteR2 <- plot_ly(ru2, x = ~year, y = ~estimate) %>%
  add_markers(error_y = ~list(value = std.error), color = ~factor(Red_Blue), colors = c("Blue", "Red"), 
              showlegend = FALSE) %>%
  layout(margin = list(l = 50), title = "Bayesian GLM", xaxis = list(title = FALSE))

# Model: lm
# Define VoteR as numeric
a2$VoteR <- as.numeric(as.character(a2$VoteR))

# Group by using nest function
a2_n <- a2 %>% group_by(year, Red_Blue) %>% nest()

# Run glm using map function
m3 <- a2_n %>% mutate(model=data %>% map(g3))

# Using Broom to extract coefficient
r3 <-m3 %>% mutate(tidy = model %>% map(tidy))
ru3 <- unnest(r3, tidy) %>% filter(term == "Inc_c")

voteR3 <- plot_ly(ru3, x = ~year, y = ~estimate) %>%
  add_markers(error_y = ~list(value = std.error), color = ~factor(Red_Blue), colors = c("Blue", "Red"), 
              showlegend = FALSE) %>%
  layout(margin = list(l = 50), title = "LM", xaxis = list(title = FALSE))

tags$div(
  style = "display: flex; flex-wrap: wrap",
  tags$div(voteR1, style = "width: 100%; padding: 1em; border: solid;"),
  tags$div(voteR2, style = "width: 50%; padding: 1em; border: solid;"), tags$div(voteR3, style = "width: 50%; padding: 1em; border: solid;")
) %>% browsable()




Census Region


# Model: Group by Year/Red_Blue (Plot Income Coefficient) - GLM, Bayesglm
# Group by using nest function
a3_n <- a2 %>% group_by(year, geo_4) %>% nest()
a4_n <- a2 %>% group_by(year, Census_Region) %>% nest()

# Run glm using map function
m4 <- a3_n %>% mutate(model=data %>% map(g1))
m5 <- a4_n %>% mutate(model=data %>% map(g1))

# Using Broom to extract coefficient
r4 <- m4 %>% mutate(tidy = model %>% map(tidy))
ru4 <- unnest(r4, tidy) %>% filter(term == "Inc_c")

r5 <-m5 %>% mutate(tidy = model %>% map(tidy))
ru5 <- unnest(r5, tidy) %>% filter(term == "Inc_c")

# Plotting Income Coefficients by Year/Red_Blue
voteR4 <- plot_ly(ru4, x = ~year, y = ~estimate) %>%
  add_markers(error_y = ~list(value = std.error), color = ~factor(geo_4), colors = c("Blue", "Red", "Grey")) %>%
  layout(margin = list(l = 50), xaxis = list(title = FALSE), title = "Last 4 Elections")

voteR5 <- plot_ly(ru5, x = ~year, y = ~estimate) %>%
  add_markers(error_y = ~list(value = std.error), color = ~factor(Census_Region), colors = viridis(4)) %>%
  layout(margin = list(l = 50), xaxis = list(title = FALSE), title = "Census Region")

tags$div(
  style = "display: flex; flex-wrap: wrap",
  tags$div(voteR4, style = "width: 100%; padding: 1em; border: solid;"), tags$div(voteR5, style = "width: 100%; padding: 1em; border: solid;")
) %>% browsable()




Red & Blue


# Model 1: Group by Year/Red_Blue (Plot Education Coefficient) - GLM, Bayesglm
# Group by using nest function
a2_n <- a2 %>% group_by(year, Red_Blue_Battle) %>% nest()

# Define glm function
g1 <- function(df) {glm(voted ~ age + marital + sex +  Race + edu_c, data=df, family = binomial(link="logit"))}
g2 <- function(df) {bayesglm(voted ~ age + marital + sex + Race + edu_c, data=df, 
                             family = binomial(link="logit"), prior.scale = 2.5, prior.df = 1)}
g3 <- function(df) {lm(voted ~ age + marital + sex + Race + edu_c, data=df)}

# Run glm using map function
m1 <- a2_n %>% mutate(model=data %>% map(g1))
m2 <- a2_n %>% mutate(model=data %>% map(g2))

# Using Broom to extract coefficient
r1 <- m1 %>% mutate(tidy = model %>% map(tidy))
ru1 <- unnest(r1, tidy) %>% filter(term == "edu_c")

r2 <-m2 %>% mutate(tidy = model %>% map(tidy))
ru2 <- unnest(r2, tidy) %>% filter(term == "edu_c")

# Plotting Income Coefficients by Year/Red_Blue
voteR1 <- plot_ly(ru1, x = ~year, y = ~estimate) %>%
  add_markers(error_y = ~list(value = std.error), color = ~factor(Red_Blue_Battle), colors = c("grey","Blue", "Red")) %>%
  layout(margin = list(l = 50), title = "GLM")

voteR2 <- plot_ly(ru2, x = ~year, y = ~estimate) %>%
  add_markers(error_y = ~list(value = std.error), color = ~factor(Red_Blue_Battle), colors = c("grey","Blue", "Red"), 
              showlegend = FALSE) %>%
  layout(margin = list(l = 50), title = "Bayesian GLM")

# Model: lm
# Define voted as numeric
a2$voted <- as.numeric(as.character(a2$voted))

# Group by using nest function
a2_n <- a2 %>% group_by(year, Red_Blue_Battle) %>% nest()

# Run glm using map function
m3 <- a2_n %>% mutate(model=data %>% map(g3))

# Using Broom to extract coefficient
r3 <-m3 %>% mutate(tidy = model %>% map(tidy))
ru3 <- unnest(r3, tidy) %>% filter(term == "edu_c")

voteR3 <- plot_ly(ru3, x = ~year, y = ~estimate) %>%
  add_markers(error_y = ~list(value = std.error), color = ~factor(Red_Blue_Battle), colors = c("grey","Blue", "Red"), 
              showlegend = FALSE) %>%
  layout(margin = list(l = 50), title = "LM")

tags$div(
  style = "display: flex; flex-wrap: wrap",
  tags$div(voteR1, style = "width: 100%; padding: 1em; border: solid;"),
  tags$div(voteR2, style = "width: 50%; padding: 1em; border: solid;"), tags$div(voteR3, style = "width: 50%; padding: 1em; border: solid;")
) %>% browsable()




Race


# Model 2: Group by Year/Race (Plot Education Coefficient) - GLM, Bayesglm
# Group by using nest function
a2_n <- a2 %>% group_by(year, Race) %>% nest()

# Define glm function
g1 <- function(df) {glm(voted ~ age + marital + sex +  edu_c, data=df, family = binomial(link="logit"))}
g2 <- function(df) {bayesglm(voted ~ age + marital + sex +  edu_c, data=df, 
                             family = binomial(link="logit"), prior.scale = 2.5, prior.df = 1)}
g3 <- function(df) {lm(voted ~ age + marital + sex +  edu_c, data=df)}

# Run glm using map function
m1 <- a2_n %>% mutate(model=data %>% map(g1))
m2 <- a2_n %>% mutate(model=data %>% map(g2))

# Using Broom to extract coefficient
r1 <- m1 %>% mutate(tidy = model %>% map(tidy))
ru1 <- unnest(r1, tidy) %>% filter(term == "edu_c")

r2 <-m2 %>% mutate(tidy = model %>% map(tidy))
ru2 <- unnest(r2, tidy) %>% filter(term == "edu_c")

# Plotting Income Coefficients by Year/Red_Blue
voteD1 <- plot_ly(ru1, x = ~year, y = ~estimate) %>%
  add_markers(error_y = ~list(value = std.error), color = ~factor(Race), colors = viridis(4)) %>%
  layout(margin = list(l = 50), title = "GLM")

voteD2 <- plot_ly(ru2, x = ~year, y = ~estimate) %>%
  add_markers(error_y = ~list(value = std.error), color = ~factor(Race), colors = viridis(4), 
              showlegend = FALSE) %>%
  layout(margin = list(l = 50), title = "Bayesian GLM")

# Model: lm
# Define voted as numeric
a2$voted <- as.numeric(as.character(a2$voted))

# Group by using nest function
a2_n <- a2 %>% group_by(year, Race) %>% nest()

# Run glm using map function
m3 <- a2_n %>% mutate(model=data %>% map(g3))

# Using Broom to extract coefficient
r3 <-m3 %>% mutate(tidy = model %>% map(tidy))
ru3 <- unnest(r3, tidy) %>% filter(term == "edu_c")

voteD3 <- plot_ly(ru3, x = ~year, y = ~estimate) %>%
  add_markers(error_y = ~list(value = std.error), color = ~factor(Race), colors = viridis(4), 
              showlegend = FALSE) %>%
  layout(margin = list(l = 50), title = "LM")

tags$div(
  style = "display: flex; flex-wrap: wrap",
  tags$div(voteD1, style = "width: 100%; padding: 1em; border: solid;"),
  tags$div(voteD2, style = "width: 50%; padding: 1em; border: solid;"), tags$div(voteD3, style = "width: 50%; padding: 1em; border: solid;")
) %>% browsable()