library(ggplot2)
library(grid)
library(gridExtra)
graphres <- function(df, text="") {
  p1 <- ggplot(df, aes(x=X, y=Y))   + geom_point(alpha=0.3, color="blue") + ggtitle("Y vs X")
  p2 <- ggplot(df, aes(x=X, y=res)) + geom_point(alpha=0.3, color="blue") + ggtitle("Residuals vs X")
  p3  <- ggplot(df, aes(x=res)) + geom_histogram(alpha=0.3, color="blue", fill="lightblue") + ggtitle("Histogram, residuals")
  p4 <- ggplot(df, aes(x=Y, y=res)) + geom_point(alpha=0.3, color="blue") + ggtitle("Residuals vs Y")
  arrangeGrob(p1, p2, p3, p4, top=textGrob(text,gp=gpar(fontsize=20,font=3)))
  }

df <- data.frame(X = rnorm(400,4,1))
df$Y = 2.5 + 0.5*df$X + rnorm(400,0,1)
mod1 <- lm(data=df, Y ~ X)
df$yhat <- predict(mod1)
df$res = df$Y-df$yhat

grobj <- graphres(df, "Simple Residuals")
ggsave("residualsR0.png", grobj, units="cm", width=20, height=11)

df2 <- data.frame(X = rnorm(400,4,1))
df2$Y = exp(1)*exp(0.5*df2$X)*exp(rnorm(400,0,1))
mod2 <- lm(data=df2, Y ~ X)
df2$yhat <- predict(mod2)
df2$res = df2$Y-df2$yhat

grobj <- graphres(df2, "Heteroscedasticity: Multiplicative relationship")
ggsave("residualsR1.png", grobj, units="cm", width=20, height=11)

df3 <- data.frame(X = append(rnorm(200,4,2), rnorm(200,8,2)))
## df3$X = df3$X[df3$X>0]
df3$Y = 1 + 2.5*df3$X + df3$X*rnorm(length(df3$X), 0,1)
mod2 <- lm(data=df3, Y ~ X)
df3$yhat <- predict(mod2)
df3$res = df3$Y-df3$yhat

grobj <- graphres(df2, "Heteroscedasticity: correlation between X and sigma")
ggsave("residualsR2.png", grobj, units="cm", width=20, height=11)

df4 <- data.frame(X = rnorm(400,4,2))
df4$Y = 1 + 1.5*df4$X + 3*exp(rnorm(400,0,0.5))
mod2 <- lm(data=df4, Y ~ X)
df4$yhat <- predict(mod2)
df4$res = df4$Y-df4$yhat
lo = mean(df4$Y) - 3*sd(df4$Y)
hi = mean(df4$Y) + 3*sd(df4$Y)
df4 = df4[df4$Y > lo && df4$Y < hi,]

grobj <- graphres(df4, "Assymetry")
ggsave("residualsR3.png", grobj, units="cm", width=20, height=11)

df5 <- data.frame(X = append(rnorm(200,4,2), rnorm(200,8,2)))
df5$Y = 5 + 1.5*df5$X - 0.1*df5$X*df5$X + rnorm(400,0,1)
mod2 <- lm(data=df5, Y ~ X)
df5$yhat <- predict(mod2)
df5$res = df5$Y-df5$yhat
lo = mean(df5$Y) - 3*sd(df5$Y)
hi = mean(df5$Y) + 3*sd(df5$Y)
df5 = df5[df5$Y > lo && df5$Y < hi,]

grobj <- graphres(df5, "Non-linear relationship")
ggsave("residualsR4.png", grobj, units="cm", width=20, height=11)

df6 <- data.frame(X = append(rnorm(200,4,2), rnorm(200,8,2)))
df6$Y = 2.5 + 0.5*df6$X + rnorm(400,0,1)
df6$X[1] = 25
df6$Y[1] = 38
modwol <- lm(data=df6, Y ~ X)
df6$yhat <- predict(modwol)
df6$res = df6$Y-df6$yhat

grobj <- graphres(df6, "Outliers")
ggsave("residualsR5.png", grobj, units="cm", width=20, height=11)

modnol <- lm(data=df6[2:400,], Y ~ X)
df6$yhat2 <- append(NA,predict(modnol))
df6$res2 = df6$Y-df6$yhat2

ggplot(data=df6, aes(x=X, y=Y)) + geom_point() + geom_point(data=df6[1,], aes(x=X, y=Y), color="red") +
  geom_function(data=df6, color="red", fun=function (x) modwol$coefficients["(Intercept)"] + modwol$coefficients["X"]*x) + 
  geom_function(data=df6[2:400,], fun=function (x) modnol$coefficients["(Intercept)"] + modnol$coefficients["X"]*x)
ggsave("residualsR6.png", units="cm", width=20, height=11)

brgnp = data.frame(bir = c(29.0, 19.5, 14.1, 21.2, 13.7, 17.8, 14.5, 12.4, 28.7, 13.0, 11.0, 27.8,
                           43.6, 20.4, 10.7, 28.0, 26.6, 43.3, 41.8, 30.4, 12.6, 33.4, 11.2, 13.2, 15.2),
                   gnp = c( 1.6,  4.0,  16.6,  2.6,  20.8,  1.3,  1.6,  24.2,  .5,  24.1,  19.8,
                           .3,  .7,  13.6,  27.3,  2.5,  3.1,  .2,  .4,  .7,  8.6,  2.6,  13.4,  17.4,  22.6))
ggplot(brgnp, aes(x=gnp, y=bir)) + geom_point() + geom_smooth(method = "loess", se = FALSE) +
  xlab("GNP per capita") + ylab("Birth rate") + ggtitle("Birth rate and GNP per capita for selected countries")
ggsave("birgnplowessR.png", units="cm", width=20, height=11)

summary(modbir <- lm(data=brgnp, bir ~ gnp))
brgnp$yhatlin <- predict(modbir)

ggplot(data=brgnp, aes(x=gnp, y=bir, yhat=yhatlin)) + geom_point(aes(x=gnp, y=bir)) + geom_point(aes(x=gnp, y=yhatlin, color="red")) + geom_function(data=brgnp, fun=function (x) modbir$coefficients["(Intercept)"] + modbir$coefficients["gnp"]*x)
ggsave("birgnplinR.png", units="cm", width=20, height=11)

brgnp$gsq <- brgnp$gnp^2
summary(modq <- lm(data=brgnp, bir ~ gnp + gsq))

brgnp$yhatquad <- predict(modq)
ggplot(data=brgnp, aes(x=gnp, y=bir, yhat=yhatquad)) + geom_point(aes(x=gnp, y=bir)) + geom_point(aes(x=gnp, y=yhatquad, color="red")) + geom_function(data=brgnp, fun=function (x) modq$coefficients["(Intercept)"] + modq$coefficients["gnp"]*x + modq$coefficients["gsq"]*x^2)
ggsave("birgnpquadR.png", units="cm", width=20, height=11)

brgnp$sqgnp <- sqrt(brgnp$gnp)
summary(modsqrt <- lm(data=brgnp, bir ~ sqgnp))
brgnp$yhatsqrt <- predict(modsqrt)

brgnp$yhatsqrt <- predict(modsqrt)
ggplot(data=brgnp, aes(x=gnp, y=bir, yhat=yhatsqrt)) + geom_point(aes(x=gnp, y=bir)) + geom_point(aes(x=gnp, y=yhatsqrt, color="red")) + geom_function(data=brgnp, fun=function (x) modsqrt$coefficients["(Intercept)"] + modsqrt$coefficients["sqgnp"]*sqrt(x))
ggsave("birgnpsqrtR.png", units="cm", width=20, height=11)

brgnp$lgnp <- log(brgnp$gnp)
summary(modlog <- lm(data=brgnp, bir ~ lgnp))
brgnp$yhatlog <- predict(modlog)

ggplot(data=brgnp, aes(x=gnp, y=bir, yhat=yhatlog)) + geom_point(aes(x=gnp, y=bir)) + geom_point(aes(x=gnp, y=yhatlog, color="red")) + geom_function(data=brgnp, fun=function (x) modlog$coefficients["(Intercept)"] + modlog$coefficients["lgnp"]*log(x))
ggsave("birgnplogR.png", units="cm", width=20, height=11)

ggplot(data=brgnp, aes(x=gnp, y=bir, yhat=yhatlog)) + geom_point(aes(x=gnp, y=bir)) + geom_point(aes(x=gnp, y=yhatlog, color="red")) + geom_function(data=brgnp, fun=function (x) modlog$coefficients["(Intercept)"] + modlog$coefficients["lgnp"]*log(x)) + scale_x_continuous(trans='log10')
ggsave("birgnplogxbR.png", units="cm", width=20, height=11)

ggplot(data=brgnp, aes(x=gnp, y=bir, yhat=yhatlog)) + geom_point(aes(x=gnp, y=lgnp)) + geom_point(aes(x=gnp, y=sqgnp))
ggsave("birgnpsqlgR.png", units="cm", width=20, height=11)
