且构网

分享程序员开发的那些事...
且构网 - 分享程序员编程开发的那些事

R:ggplot对聚类摘要进行微调

更新时间:2022-12-30 18:40:50

One major change: Rather than matching heights of the two charts, I extract the plot panel from gp2, then insert it into column 2 of gp1. There are no margins surrounding the resultant gp2, and thus, partly takes care of your point 3.

With respect to point 2: expand the limits of the axis to make room of the labels. (See point 2. in the code below). The parameters for points 2 and 3 were set by trial-and-error. Adjusting one parameter means the other needs to be adjusted.

With respect to point 1: expand the axis using the additive component of exapnd to add half a unit to each end of the axis (See point 1. in the code below).

Minor edit: updating to ggplot2 2.2.0 and R 3.3.2
axis.ticks.margin is deprecated

X = t(USArrests)

plot_color_clust = function(X, N = N,
 #  cols=c("red","blue", "orange", "darkgreen","green","yellow","grey","black","white")
   cols = rainbow(N)   # Easier to pick colours
  ){

  library(ggplot2)
  library(gtable)
  library(grid)
  library(ggdendro)
  library(plyr)

  if(N > length(cols)) stop("N too big. Not enough colors in cols.")
  if(N > ncol(X)) stop("N too big. Not enough columns in data.")

  fit = ClustOfVar::hclustvar(X.quanti = X)
  dd.row = as.dendrogram(fit)
  ddata_x <- dendro_data(dd.row)
  temp = cutree(fit, k = N)
  lab <- ggdendro::label(ddata_x)

  x = c()
  for(i in 1:nrow(lab)){
    x[i] = paste("clust", as.vector(temp[lab$label[i] == names(temp)]), sep = "")
  }

  lab$group <- x

  p1 <- ggplot(segment(ddata_x)) + 
    geom_segment(aes(x = x, y = y, xend = xend, yend = yend)) + 
    geom_text(data = lab, aes(label = label, x = x, y = -.05, colour = group),  # y = -.05 adds a little space between label and tree
              size = 4, hjust = 1) +
    scale_x_continuous(expand = c(0, .5)) +   # 1. Add half a unit to each end of the vertical axis
    expand_limits(y = -0.4) +   # 2. Make room for labels 
    theme_classic() + 
    scale_colour_manual(values = cols) + 
    coord_flip() +
    theme(legend.position = "none", axis.line = element_blank(),
          axis.text = element_blank(), axis.title = element_blank(), 
          axis.ticks = element_blank(), 
          axis.ticks.length = unit(0, "cm")) 

  df2 <- data.frame(cluster = cutree(fit, N), 
      states = factor(fit$labels, levels = fit$labels[fit$order]))
  df3 <- ddply(df2, .(cluster),summarise,pos=mean(as.numeric(states)))
  p2 <- ggplot(df2, aes(states, y = 1, 
                    fill = factor(as.character(cluster)))) +   # 'as.character' - so that colours match with 10 or more clusters
    geom_tile() +
    scale_y_continuous(expand = c(0, 0)) + 
    scale_x_discrete(expand = c(0, 0)) +
    coord_flip() +
    geom_text(data = df3,aes(x = pos, label = cluster, size = 12)) +
    scale_fill_manual(values = cols)

  gp1 <- ggplotGrob(p1)  # Get ggplot grobs
  gp2 <- ggplotGrob(p2)  

  gp2 <- gp2[6, 4]      # 3. Grab plot panel only from tiles plot (thus, no margins)
  gp1 <- gtable_add_grob(gp1, gp2, t = 6, l = 2, name = "tiles")  # 3. Insert it into dendrogram plot
  gp1$widths[2] = unit(1, "cm")  # 3. Set width of column containing tiles

  grid.newpage()
  grid.draw(gp1)
}

plot_color_clust(X, 6)