-
Notifications
You must be signed in to change notification settings - Fork 2k
googlegroupsformatter
bhive01 edited this page Dec 11, 2011
·
14 revisions
require(mclust)
require(ggplot2)
require(ellipse)
#if function, input is original dataset
orig.data<-iris[1:4]
data.mclust<-Mclust(orig.data)
BIC.data<-as.data.frame(data.mclust$BIC)
BIC.data$NumComp<-rownames(BIC.data)
melted.BIC<-melt(BIC.data, var.ids= "NumComp")
ggplot(melted.BIC, aes(x=as.numeric(NumComp), y=value, colour=variable, group=variable))+
scale_x_continuous("Number of Components")+
scale_y_continuous("BIC")+
scale_colour_hue("")+
geom_point()+
geom_line()+
theme_bw()
#plotting correlation matrix of original data colored by model classification
#stripped from plotmatrix()
mapping=aes(colour=as.factor(data.mclust$classification), shape=as.factor(data.mclust$classification))
grid <- expand.grid(x = 1:ncol(orig.data), y = 1:ncol(orig.data))
grid <- subset(grid, x != y)
all <- do.call("rbind", lapply(1:nrow(grid), function(i) {
xcol <- grid[i, "x"]
ycol <- grid[i, "y"]
data.frame(xvar = names(orig.data)[ycol], yvar = names(orig.data)[xcol],
x = orig.data[, xcol], y = orig.data[, ycol], orig.data)
}))
all$xvar <- factor(all$xvar, levels = names(orig.data))
all$yvar <- factor(all$yvar, levels = names(orig.data))
densities <- do.call("rbind", lapply(1:ncol(orig.data), function(i) {
data.frame(xvar = names(orig.data)[i], yvar = names(orig.data)[i],
x = orig.data[, i])
}))
mapping <- defaults(mapping, aes_string(x = "x", y = "y"))
class(mapping) <- "uneval"
ggplot(all, mapping) +
facet_grid(xvar ~ yvar, scales = "free") +
geom_point(na.rm = TRUE) +
stat_density(aes(x = x, y = ..scaled.. * diff(range(x)) + min(x)), data = densities, position = "identity", colour = "grey20", geom = "line") +
opts(legend.postion= "none")
#cant get rid of the damn legend
#it would be a bunch faster if it only plotted 1 v 2 and not also 2 v 1... cest la vie
#scatter plot of width by length
#color and shape by classification
#center of cluster by data.mclust$parameters$mean
get.ellipses <- function(coords, mclust.fit){
centers <- mclust.fit$parameters$mean[coords, ]
vars <- mclust.fit$parameters$variance$sigma[coords, coords, ]
ldply(1:ncol(centers), function(cluster){
data.frame(ellipse(vars[,,cluster], centre = centers[, cluster],
level = 0.5), classification = cluster)
})
}
iris.el <- get.ellipses(c("Sepal.Length", "Sepal.Width"), data.mclust)
orig.data$classification <- data.mclust$classification
ggplot(orig.data, aes(Sepal.Length, Sepal.Width, colour = factor(classification))) +
geom_point(aes(shape = classification))+
geom_path(data = iris.el,
aes(group = classification, linetype = classification))
x4 <- c(330, 835, 317, 396, 486, 391)
y4 <- c(4098.11, 4099.14, 4098.16, 4098.35, 4099.30, 4096.73)
y4b <- c(0.47, 0.29, 0.46, 0.83, 0.72, 0.87)
y4min<-y4-y4b
y4max<-y4+y4b
dat.tgg817a <- data.frame(x4, y4, y4b,y4max, y4min)
t4 <- sum(y4/y4b)/(sum(1/y4b))
x5 <- c(721, 257, 204, 271)
y5 <- c(4096.33, 4095.51, 4095.29, 4095.19)
y5b <- c(0.10, 0.15, 0.18, 0.22)
y5min<-y5-y5b
y5max<-y5+y5b
dat.tgg817b <- data.frame(x5, y5, y5b, y5max, y5min)
t5 <- sum(y5/y5b)/(sum(1/y5b))
ggplot()+
geom_point(data=dat.tgg817a, aes(x=x4, y=y4))+
geom_errorbar(data=dat.tgg817a, aes(x=x4, y=y4, ymin= y4min, ymax= y4max), width = 3, colour='NavyBlue')+
geom_hline(aes(yintercept=t4), colour ='NavyBlue', size=2)+
geom_point(data=dat.tgg817b, aes(x=x5, y=y5), colour='DarkRed')+
geom_hline(aes(yintercept=t5), colour ='DarkRed', size=2)+
geom_errorbar(data=dat.tgg817b, aes(x=x5, y=y5, ymin= y5min, ymax= y5max), width = 3, colour='DarkRed')+
coord_cartesian(ylim=c(4094,4101), wise=TRUE)
require(plyr)
require(ggplot2)
example<-structure(list(x = c(11.3, 8, 6, 4, 2, 1, 11.3, 8, 6, 4, 2, 1,
11.3, 8, 6, 4, 2, 1, 11.3, 8, 6, 4, 2, 1, 11.3, 8, 6, 4, 2, 1,
11.3, 8, 6, 4, 2, 1, 11.3, 8, 6, 4, 2, 1, 11.3, 8, 6, 4, 2, 1,
11.3, 8, 6, 4, 2, 1, 11.3, 8, 6, 4, 2, 1, 11.3, 8, 6, 4, 2, 1,
11.3, 8, 6, 4, 2, 1), y = c(6.8, 6.4, 6.3, 5.9, 5.8, 5.5, 6.2,
6, 5.9, 5.8, 5.7, 5.6, 3.2, 3.7, 4.1, 4.5, 4.9, 5.6, 5.4, 5.1,
5.2, 5, 5.6, 5.6, 6.5, 6.4, 6.3, 5.8, 5.6, 5.5, 6, 6, 5.9, 5.8,
5.7, 5.6, 4, 4.1, 4.3, 4.5, 4.8, 5.4, 5.4, 5.1, 5.2, 5, 5.6,
5.6, 6.4, 6.2, 6.1, 5.8, 5.6, 5.5, 6.2, 6, 5.9, 5.8, 5.7, 5.6,
3.2, 3.7, 4.1, 4.5, 4.9, 5.6, 5.4, 5.1, 5.2, 5, 5.6, 5.6), excipient = structure(c(4L,
4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L,
2L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 4L,
4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L,
2L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("A", "B", "C", "none"
), class = "factor"), pH = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L), .Label = c("5.5", "5.5S", "6", "6.5"), class = "factor")), .Names = c("x",
"y", "excipient", "pH"), class = "data.frame", row.names = c(NA,
-72L))
model.example<-dlply(example, .(excipient, pH), function(X) lm(y~x, data=X))
model.coef<-ldply(model.example, function(model) {
c(
b = coef(model)[1]
, m = coef(model)[2]
, r2 = summary(model)$r.squared
)
})
#renames columns, not sure why ddply isn't outputting correctly
colnames(model.coef)[3:4]<-c("b", "m")
#Build your expressions
model.coef<-ddply(model.coef, .(excipient, pH), transform, eq =
as.character(as.expression(substitute(italic(y) == m %.% italic(x) + b*","~~italic(r)^2~"="~r2,
list(
b = format(b, digits = 3),
m = format(m, digits = 4),
r2 = format(r2, digits = 3)
)
)))
, .progress="text")
#plot
ggplot(data=example, aes(x=x, y=y)) +
geom_point(size = 3) +
stat_smooth(method="lm", se=TRUE) +
facet_grid(excipient ~ pH) +
theme_bw()+
geom_text(data=model.coef, aes(x=Inf, y=-Inf, label=eq),
colour="black", parse = TRUE, hjust=1, vjust=0, size=2)