r - Alluvial plots using ggplot2 - highlight certain pairings - Stack Overflow

I have plotted an alluvial plot using ggplot2, however I cannot seem to figure out how to colour only t

I have plotted an alluvial plot using ggplot2, however I cannot seem to figure out how to colour only the most frequent pair "CAGGFNYQLIW" from the variable "CTaa_alpha" which is paired with "CASSVAGPNTEAFF" from the variable "CTaa_beta", while keeping everything else grey.

My code below:

a<- structure(list(CTaa_alpha = c("CAGGFNYQLIW", "CVVNRDDKIIF", "CAVRGDSNYQLIW", 
"CVVNTRSNDYKLSF", "CAVQAAANAGKSTF", "CVVLNTGGFKTIF", "CAYVNNNDMRF", 
"CATDANTGFQKLVF", "CAVRAPDQTGANNLFF", "CALREYGNKLVF", "CATDRDDKIIF", 
"CAYRGGSNYKLTF", "CAMRELTSNTGKLIF", "CALISYNTDKLIF", "CALTPYGNNRLAF", 
"CAAVPNAGNMLTF", "CAWEYGNKLVF", "CVVSVDYGQNFVF", "CAFYGQNFVF", 
"CAPGMETSYDKVIF", "CAVTRNSDGQKLLF", "CAGASGGGSYIPTF", "CAVRDTHNTDKLIF", 
"CAVNIHSGYALNF", "CAGVDTNAGKSTF", "CAPRDSNYQLIW", "CVVNAPSGNTPLVF", 
"CALSELPYSSASKIIF", "CALGDGGATNKLIF", "CAVALSGYALNF", "CLVGDVTAGNKLTF", 
"CAGPFSGGYNKLIF", "CATAPNYGGATNKLIF", "CAGITGGGNKLTF", "CAVTGAAYNTDKLIF", 
"CALPPQKLVF", "CAVGDGQNFVF", "CILRIYQGGSEKLVF", "CAMREITGNTGKLIF", 
"CAVSSSSGSARQLTF", "CASRYNFNKFYF", "CATREAGNMLTF", "CAVRENQAGTALIF", 
"CAVTSPGANNLFF", "CAVSTPTGANSKLTF", "CAVSKSARQLTF", "CAVLSNDYKLSF", 
"CAVRDGDYKLSF", "CAARGVYGNKLVF", "CALSEAPYGGATNKLIF"), CTaa_beta = c("CASSVAGPNTEAFF", 
"CASSVGNRGGTDTQYF", "CASSLRQGPSYEQYF", "CASKPGTTSNQPQHF", "CSVAGTGVYNEQFF", 
"CSVVPGGQGGYEQYF", "CASSSGGLDEQYF", "CATSIGGPPYEQYF", "CASSAGLAGGYEQYF", 
"CASSSPGTTNEKLFF", "CASSLLAGGNNEQFF", "CASSLLQGPSSPLHF", "CASSLGGSSYEQYF", 
"CASSLRDGHYGYTF", "CASSLRDSHYEQYF", "CASSQWMYSPNGYTF", "CSASFGDGGEGETQYF", 
"CASSEGHRGGTDTQYF", "CASSLSGSPAYGYTF", "CASSGTGTGASGNEQFF", "CAWSRPLGYTF", 
"CASSLVGAGANVLTF", "CASSRQAEAFF", "CASSLLAGGNNEQFF", "CASSSHYRGGTDTQYF", 
"CASSEVGGSMETQYF", "CASSTDISSYNEQFF", "CASGLVQQGGTEAFF", "CASSLLPGLAGAGNEQFF", 
"CASTPAVRDGNYEQYF", "CASGPGLQQTYGYTF", "CASSPDRTGEANNEQFF", "CASSLAKAGTGGEKLFF", 
"CASGGTGPYNEQFF", "CSVEDPSSGSYEQYF", "CASSQYRGTEAFF", "CASSPGSSGSETQYF", 
"CASSYSEVTEAFF", "CSARAGGWGTDTQYF", "CSATAYRTGAYEQYF", "CASRPERGHTDTQYF", 
"CASSFEGGGTEAFF", "CASSQYRGTEAFF", "CASSTQGQSYTEAFF", "CASSVGLYSTDTQYF", 
"CASSQDPTDQPQHF", "CASSSTEKDTQYF", "CSAFTGNTEAFF", "CASSYTGRPEQYF", 
"CASSPGQGLLSGELFF"), n = c(268L, 145L, 142L, 109L, 95L, 84L, 
60L, 60L, 56L, 55L, 53L, 52L, 51L, 49L, 48L, 48L, 45L, 42L, 36L, 
34L, 33L, 32L, 32L, 32L, 31L, 31L, 28L, 28L, 27L, 27L, 27L, 26L, 
26L, 26L, 25L, 25L, 23L, 22L, 22L, 20L, 20L, 20L, 20L, 19L, 19L, 
19L, 18L, 18L, 17L, 17L)), row.names = c(NA, -50L), class = c("tbl_df", 
"tbl", "data.frame"))


ggplot(data = a,
       aes(axis1 = CTaa_alpha, axis2 = CTaa_beta, y = n)) +
  geom_alluvium(aes(fill = "green")) +
  geom_stratum() +
  geom_text(stat = "stratum",
            aes(label = after_stat(stratum))) +
  scale_x_discrete(limits = c("CDR3_alpha", "CDR3_beta"),
                   expand = c(0.15, 0.05)) +
  scale_fill_viridis_d() + 
  theme_classic() +theme(legend.position = "none")

The code above gives me the following plot:

As you can see, it is a bit "messy" and I would like to have the option for highlighting specific pairings (whether it is row 1 which is the most frequent pairing or row 10 which is the t0th most frequent pairing). Any insights would be welcome!

I have plotted an alluvial plot using ggplot2, however I cannot seem to figure out how to colour only the most frequent pair "CAGGFNYQLIW" from the variable "CTaa_alpha" which is paired with "CASSVAGPNTEAFF" from the variable "CTaa_beta", while keeping everything else grey.

My code below:

a<- structure(list(CTaa_alpha = c("CAGGFNYQLIW", "CVVNRDDKIIF", "CAVRGDSNYQLIW", 
"CVVNTRSNDYKLSF", "CAVQAAANAGKSTF", "CVVLNTGGFKTIF", "CAYVNNNDMRF", 
"CATDANTGFQKLVF", "CAVRAPDQTGANNLFF", "CALREYGNKLVF", "CATDRDDKIIF", 
"CAYRGGSNYKLTF", "CAMRELTSNTGKLIF", "CALISYNTDKLIF", "CALTPYGNNRLAF", 
"CAAVPNAGNMLTF", "CAWEYGNKLVF", "CVVSVDYGQNFVF", "CAFYGQNFVF", 
"CAPGMETSYDKVIF", "CAVTRNSDGQKLLF", "CAGASGGGSYIPTF", "CAVRDTHNTDKLIF", 
"CAVNIHSGYALNF", "CAGVDTNAGKSTF", "CAPRDSNYQLIW", "CVVNAPSGNTPLVF", 
"CALSELPYSSASKIIF", "CALGDGGATNKLIF", "CAVALSGYALNF", "CLVGDVTAGNKLTF", 
"CAGPFSGGYNKLIF", "CATAPNYGGATNKLIF", "CAGITGGGNKLTF", "CAVTGAAYNTDKLIF", 
"CALPPQKLVF", "CAVGDGQNFVF", "CILRIYQGGSEKLVF", "CAMREITGNTGKLIF", 
"CAVSSSSGSARQLTF", "CASRYNFNKFYF", "CATREAGNMLTF", "CAVRENQAGTALIF", 
"CAVTSPGANNLFF", "CAVSTPTGANSKLTF", "CAVSKSARQLTF", "CAVLSNDYKLSF", 
"CAVRDGDYKLSF", "CAARGVYGNKLVF", "CALSEAPYGGATNKLIF"), CTaa_beta = c("CASSVAGPNTEAFF", 
"CASSVGNRGGTDTQYF", "CASSLRQGPSYEQYF", "CASKPGTTSNQPQHF", "CSVAGTGVYNEQFF", 
"CSVVPGGQGGYEQYF", "CASSSGGLDEQYF", "CATSIGGPPYEQYF", "CASSAGLAGGYEQYF", 
"CASSSPGTTNEKLFF", "CASSLLAGGNNEQFF", "CASSLLQGPSSPLHF", "CASSLGGSSYEQYF", 
"CASSLRDGHYGYTF", "CASSLRDSHYEQYF", "CASSQWMYSPNGYTF", "CSASFGDGGEGETQYF", 
"CASSEGHRGGTDTQYF", "CASSLSGSPAYGYTF", "CASSGTGTGASGNEQFF", "CAWSRPLGYTF", 
"CASSLVGAGANVLTF", "CASSRQAEAFF", "CASSLLAGGNNEQFF", "CASSSHYRGGTDTQYF", 
"CASSEVGGSMETQYF", "CASSTDISSYNEQFF", "CASGLVQQGGTEAFF", "CASSLLPGLAGAGNEQFF", 
"CASTPAVRDGNYEQYF", "CASGPGLQQTYGYTF", "CASSPDRTGEANNEQFF", "CASSLAKAGTGGEKLFF", 
"CASGGTGPYNEQFF", "CSVEDPSSGSYEQYF", "CASSQYRGTEAFF", "CASSPGSSGSETQYF", 
"CASSYSEVTEAFF", "CSARAGGWGTDTQYF", "CSATAYRTGAYEQYF", "CASRPERGHTDTQYF", 
"CASSFEGGGTEAFF", "CASSQYRGTEAFF", "CASSTQGQSYTEAFF", "CASSVGLYSTDTQYF", 
"CASSQDPTDQPQHF", "CASSSTEKDTQYF", "CSAFTGNTEAFF", "CASSYTGRPEQYF", 
"CASSPGQGLLSGELFF"), n = c(268L, 145L, 142L, 109L, 95L, 84L, 
60L, 60L, 56L, 55L, 53L, 52L, 51L, 49L, 48L, 48L, 45L, 42L, 36L, 
34L, 33L, 32L, 32L, 32L, 31L, 31L, 28L, 28L, 27L, 27L, 27L, 26L, 
26L, 26L, 25L, 25L, 23L, 22L, 22L, 20L, 20L, 20L, 20L, 19L, 19L, 
19L, 18L, 18L, 17L, 17L)), row.names = c(NA, -50L), class = c("tbl_df", 
"tbl", "data.frame"))


ggplot(data = a,
       aes(axis1 = CTaa_alpha, axis2 = CTaa_beta, y = n)) +
  geom_alluvium(aes(fill = "green")) +
  geom_stratum() +
  geom_text(stat = "stratum",
            aes(label = after_stat(stratum))) +
  scale_x_discrete(limits = c("CDR3_alpha", "CDR3_beta"),
                   expand = c(0.15, 0.05)) +
  scale_fill_viridis_d() + 
  theme_classic() +theme(legend.position = "none")

The code above gives me the following plot:

As you can see, it is a bit "messy" and I would like to have the option for highlighting specific pairings (whether it is row 1 which is the most frequent pairing or row 10 which is the t0th most frequent pairing). Any insights would be welcome!

Share Improve this question edited Nov 18, 2024 at 21:31 stefan 128k6 gold badges38 silver badges77 bronze badges asked Nov 18, 2024 at 21:20 Zoya QaiyumZoya Qaiyum 998 bronze badges
Add a comment  | 

1 Answer 1

Reset to default 5

To highlight some of the catgories you can map a condition on the fill aes, then set your desired colors using scale_fill_manual, e.g. to highlight the top 3 categories you can do:

library(ggplot2)
library(ggalluvial)

# Highlight Top 3
.highlight <- a[order(a$n, decreasing = TRUE), ] |>
  head(3) |>
  subset(select = CTaa_alpha, drop = TRUE)

ggplot(
  data = a,
  aes(axis1 = CTaa_alpha, axis2 = CTaa_beta, y = n)
) +
  geom_alluvium(aes(fill = CTaa_alpha %in% .highlight)) +
  geom_stratum() +
  geom_text(
    stat = "stratum",
    aes(label = after_stat(stratum))
  ) +
  scale_x_discrete(
    limits = c("CDR3_alpha", "CDR3_beta"),
    expand = c(0.15, 0.05)
  ) +
  scale_fill_manual(
    values = c("grey65", "steelblue")
  ) +
  theme_classic() +
  theme(legend.position = "none")

发布者:admin,转转请注明出处:http://www.yc00.com/questions/1745593742a4634982.html

相关推荐

发表回复

评论列表(0条)

  • 暂无评论

联系我们

400-800-8888

在线咨询: QQ交谈

邮件:admin@example.com

工作时间:周一至周五,9:30-18:30,节假日休息

关注微信