r - convert source target value dataframe into a correlation matrix

I have a data.frame containing different combinations of a group and count values where both of them exist. I need to plot or create a matrix similar to correlation matrix. I have come up with a simple example here

dat <- data.frame(source = c('A','A','A','B','B','C'),
              target = c('B','C','D','C','D','D'),
              count = c(4,5,6,3,3,5))

> dat
  source target count
1      A      B     4
2      A      C     5
3      A      D     6
4      B      C     3
5      B      D     3
6      C      D     5

How do I get a matrix like this?? and plot this matrix

dat <- data.frame(source = c('A','A','A','B','B','C'),
              target = c('B','C','D','C','D','D'),
              count = c(4,5,6,3,3,5))

> dat
  source target count
1      A      B     4
2      A      C     5
3      A      D     6
4      B      C     3
5      B      D     3
6      C      D     5

How do I get a matrix like this?? and plot this matrix

Share edited Mar 4 at 12:23 user20650 26k5 gold badges59 silver badges94 bronze badges asked Mar 4 at 11:43 Koundy 5,5753 gold badges27 silver badges39 bronze badges

2 stackoverflow/questions/31095422/… ; stackoverflow/questions/49736791/… – user20650 Commented Mar 4 at 12:11
I don't quite get how you got your drawn matrix. In the link @user20650 posted, the generation of the matrix makes sense. In your code example A/B should be 3, your matrix shows 4. Is that a bad example or do you have special rules which were not communicated? – D.J Commented Mar 4 at 12:18
1 @D.J ; the dat they show doesn't match the code used to generate it, hence the confusion – user20650 Commented Mar 4 at 12:21
That was my assumption but you (or rather I) can't be sure until it is clarified (as OP just did). – D.J Commented Mar 4 at 12:24

Add a comment |

8 Answers 8

Sorted by: Reset to default 4

This uses dplyr, tapply and pheatmap:

library(dplyr)

by <- c("source", "target")
m <- dat %>%
  mutate(across(any_of(by), \(x) factor(x, sort(unique(unlist(.[by])))))) %>%
  { tapply(.$count, .[by], c, default = 0) } %>%
  { . + t(.) + diag(ncol(.)) }
m

giving

      target
source A B C D
     A 1 4 5 6
     B 4 1 3 3
     C 5 3 1 5
     D 6 3 5 1


library(pheatmap)
pheatmap(m, display_numbers = TRUE, cluster_rows = FALSE, cluster_cols = FALSE)

(continued after image)

We can also try the Bioconductor ComplexHeatmap package.

library(circlize)
library(ComplexHeatmap)
library(RColorBrewer)

col_fun <-  colorRamp2(c(0, 3, 6), brewer.pal(n = 3, name = "RdYlBu"))
Heatmap(m, name = "m", col = col_fun,
  cluster_rows = FALSE,
  cluster_columns = FALSE,
  column_names_rot = 0,
  row_names_gp = gpar(fontsize = 20),
  column_names_gp = gpar(fontsize = 20),
  cell_fun = function(j, i, x, y, width, height, fill) {
    grid.text(sprintf("%.0f", m[i, j]), x, y, gp = gpar(fontsize = 30))})

(continued after image)

or without color

Heatmap(m, name = "m", 
  rect_gp = gpar(type = "none"),
  cluster_rows = FALSE,
  cluster_columns = FALSE,
  column_names_rot = 0,
  row_names_gp = gpar(fontsize = 20),
  column_names_gp = gpar(fontsize = 20),
  show_heatmap_legend = FALSE,
  cell_fun = function(j, i, x, y, width, height, fill) {
    grid.rect(x = x, y = y, width = width, height = height, gp = gpar(fill = NA))
    grid.text(sprintf("%.0f", m[i, j]), x, y, gp = gpar(fontsize = 30))})

(continued after image)

Another possibility is a balloonplot using the gplots package.

library(gplots)
balloonplot(as.table(t(m)), show.margins = FALSE, cum.margins = FALSE, main = "m")

(continued after image)

The ggpubr package also has a balloon plot function:

library(ggpubr)

ggballoonplot(m, show.label = TRUE, rotate.x.text = 0)

You can try the following options

Option 1: xtabs with base R

> `diag<-`(xtabs(count ~ ., rbind(dat, setNames(dat, names(dat)[c(2, 1, 3)]))), 1)
      target
source A B C D
     A 1 4 5 6
     B 4 1 3 3
     C 5 3 1 5
     D 6 3 5 1

Option 2: as_adjacency_matrix with igraph package

library(igraph)
dat %>%
    graph_from_data_frame(directed = FALSE) %>%
    as_adjacency_matrix(attr = "count", sparse = FALSE) %>%
    `diag<-`(1)

which gives

"Extend" your data to include (1) the inverse of every combination and (2) identities, then plot using ggplot2::geom_tile() and geom_text():

library(ggplot2)
library(forcats)

vals <- union(dat$source, dat$target)
dat_ext <- data.frame(
  source = c(dat$source, dat$target, vals),
  target = c(dat$target, dat$source, vals),
  count = c(dat$count, dat$count, rep(1, length(vals)))
)

ggplot(dat_ext, aes(source, fct_rev(target))) +
  geom_tile(color = "black") +
  geom_text(aes(label = count)) +
  scale_x_discrete(position = "top") +
  theme_void() +
  theme(axis.text = element_text(face = "bold"))

dat <- data.frame(
  source = c('A', 'A', 'A', 'B', 'B', 'C'),
  target = c('B', 'C', 'D', 'C', 'D', 'D'),
  count = c(4, 5, 6, 3, 3, 5)
)

names <- unique(c(dat$source, dat$target))

dat$source <- factor(dat$source, levels = names)
dat$target <- factor(dat$target, levels = names)

mat <- xtabs(count ~ source + target, data = dat)
mat[lower.tri(mat)] <- t(mat)[lower.tri(mat)]
mat
      target
source A B C D
     A 0 4 5 6
     B 4 0 3 3
     C 5 3 0 5
     D 6 3 5 0

We can create an edge matrix and use it for indexing.

> edges <- lapply(dat[1:2], factor, levels=(lv <- unique(unlist(dat[1:2])))) |> 
  +   sapply(as.integer)
> res <- diag(1, max(edges)) |> `dimnames<-`(list(lv, lv))
> for (i in list(1:2, 2:1)) res[edges[, i]] <- dat$count
> res
  A B C D
A 1 4 5 6
B 4 1 3 3
C 5 3 1 5
D 6 3 5 1

I guess this is similar to what igraph is doing under the hood.

To get back to the original data frame we can use which(..., arr.ind=TRUE)

> idx <- which(upper.tri(res), arr.ind=TRUE)
> data.frame(apply(idx, 2, \(i) rownames(res)[i]), res[idx]) |> 
+   sort_by(~list(row, col))
  row col res.idx.
1   A   B        4
2   A   C        5
4   A   D        6
3   B   C        3
5   B   D        3
6   C   D        5

Doing {igraph}:

# X = 
dat |>
  igraph::graph_from_data_frame(directed = FALSE) |>
  igraph::as_adjacency_matrix(type = 'both', sparse = FALSE, attr = 'count') |>
  `diag<-`(1L)

Edit. Base R colour heatmap from X with text labels.

local({
  rn = unique(rownames(X)); cn = unique(colnames(X))
  nr = length(rn); nc = length(cn)
  s = seq(nr)
  M = t(X[rev(s), ])
  image(x = s, y = s, z = M, xlab = '', ylab = '', axes = FALSE, 
        col = hcl.colors(1e3, 'Reds', rev = TRUE, alpha = .8))
  with(expand.grid(x=s, y=s), text(x, y, labels = M, col = 'black'))
  axis(2, at = s, labels = rev(rn), las = 2, col = NA) # left 
  axis(3, at = s, labels = cn, col = NA) # top
})

Primitive base-R Solution:

# Original data
dat <- data.frame(
  source = c('A', 'A', 'A', 'B', 'B', 'C'),
  target = c('B', 'C', 'D', 'C', 'D', 'D'),
  count = c(4, 5, 6, 3, 3, 5)
)


# Get unique character list
nodes <- unique(unlist(dat[1:2]))  

# Create an empty matrix 
mat <- matrix(nrow = length(nodes), ncol = length(nodes))

# Fill the diagonal with 1
diag(mat) <- 1

# Index mapping
for (k in seq_len(nrow(dat))) {
  i <- which(nodes == dat[[1]][k])  # Find row index
  j <- which(nodes == dat[[2]][k])  # Find column index
  mat[i, j] <- dat$count[k]         # Assign value
}

# Fill lower triangle
mat[lower.tri(mat)] <- t(mat)[lower.tri(mat)]
   

mat

     [,1] [,2] [,3] [,4]
[1,]    1    4    5    6
[2,]    4    1    3    3
[3,]    5    3    1    5
[4,]    6    3    5    1

in Base R:

l <- factor(unlist(dat[1:2]))
`diag<-`(as.matrix(structure(dat$count, Size = nlevels(l), 
                   Labels = levels(l), class = "dist")), 1)
  A B C D
A 1 4 5 6
B 4 1 3 3
C 5 3 1 5
D 6 3 5 1

发布者：admin，转转请注明出处：http://www.yc00.com/questions/1745045917a4608095.html

r - convert source target value dataframe into a correlation matrix - Stack Overflow

8 Answers 8

发表回复

评论列表（0条）

联系我们

400-800-8888

r - convert source target value dataframe into a correlation matrix - Stack Overflow

8 Answers 8

相关推荐