Description

Figures

load("~/GIT/tcgaMut/ppts/kgca_filtered_mr_gtype3_expr.Rda")
fread("/home/tc/DATA/dataset/pan20/covar") ->covar
kgca_filtered_mr_gtype3_expr <-
  left_join(kgca_filtered_mr_gtype3_expr,covar,by=c("Hugo_Symbol"="gene"))
Pyrimidine_pathway <- c("CAD", "UMPS", "DHODH", "CPS1")
sig22 <-
  c(
  "TP53",
  "PIK3CA",
  "PTEN",
  "RB1",
  "KRAS",
  "NRAS",
  "BRAF",
  "CDKN2A",
  "FBXW7",
  "ARID1A" ,
  "MLL2", # not found
  "STAG2",
  "ATM",
  "CASP8",
  "CTCF",
  "ERBB3",
  "HLA-A",
  "HRAS",
  "IDH1",
  "NF1",
  "NFE2L2",
  "PIK3R1"
  )
oppallate <- c("#000000","#0072B2")
gg <- ggplot(data=kgca_filtered_mr_gtype3_expr,aes(x=log2(expr),y=mrca)) +
#  geom_point(alpha=0.01)+
#  stat_smooth()+
#  geom_hex()+lo
  
    geom_smooth(se=FALSE, colour="grey")+
  stat_summary_bin(fun.data= "median_hilow",geom="errorbar",bins=20,width=0.2,colour="grey")+
    stat_summary_bin(fun.data= "median_hilow",geom="pointrange",bins=20,width=0.2,colour="grey")+
  
  geom_point(data=kgca_filtered_mr_gtype3_expr %>% filter(Genetype %in% c("oncogene","TSG")),aes(colour=Genetype),shape=21)+
  geom_point(data=kgca_filtered_mr_gtype3_expr %>% filter(Genetype %in% c("Pyrimidine_biosyn")),colour="red")+
  scale_colour_manual(values=oppallate)+
  scale_y_log10()+
#  scale_x_log10()+
   geom_text_repel(data=kgca_filtered_mr_gtype3_expr %>% filter(Hugo_Symbol %in% c(Pyrimidine_pathway)),aes(label = Hugo_Symbol),colour="red")+
  
  xlab("Gene expression level (log2)")+
  ylab("Gene mutation rate (#Mutations / bp)")+
  theme_classic()
print(gg)

#fread("~/Downloads/Census_allSat May 20 05-12-33 2017.csv") ->cgc
#cgc %>% filter(grepl("Mis|N|F|S",`Mutation Types`)) ->cgc.filtered
#cgc %>% filter(grepl("Mis|N|F|S",`Mutation Types`)) ->cgc.filtered
#cgc.filtered %>% mutate(Hugo_Symbol=`Gene Symbol`,Genetype=`Role in Cancer`) %>% select(Hugo_Symbol,Genetype) ->cgc.filtered.min
#load("~/GIT/tcgaMut/data/CG.uni_cgc_filtered.rda")
fread("~/Downloads/Census_allSat May 20 05-12-33 2017.csv") ->cgc
#cgc %>% filter(grepl("Mis|N|F|S",`Mutation Types`)) ->cgc.filtered
cgc %>% filter(grepl("Mis|N|F|S",`Mutation Types`)) ->cgc.filtered
#Pyrimidine_pathway <- c("CAD", "UMPS", "DHODH", "CPS1")
cgc.filtered %>% mutate(Hugo_Symbol=`Gene Symbol`,Genetype=`Role in Cancer`) %>% select(Hugo_Symbol,Genetype) ->cgc.filtered.min
fread("~/DATA/dataset/pan20/pan20_out.sig_genes.txt") ->
  pan20.sigg
  left_join(pan20.sigg, cgc.filtered.min, by = c("gene" = "Hugo_Symbol")) -> pan20.sigg.cgc
  pan20.sigg.cgc %>% mutate(q2 = ifelse(q < 2.2e-16, 2.2e-16, q)) -> pan20.sigg.cgc
  pan20.sigg.cgc %>% mutate(Genetype = ifelse(is.na(Genetype), "Non-cancer", Genetype)) ->
  pan20.sigg.cgc
# pan20.sigg.cgc <-
#   left_join(pan20.sigg.cgc,covar,by="gene")
  
  
gg <-
  ggplot(data = pan20.sigg.cgc %>% filter(Genetype %in% c("oncogene", "TSG", "Non-cancer")), aes(x =                                                                                            log2(expr), y = q)) +
#  geom_smooth(se=FALSE, colour="grey") +
  #  geom_point()+
  #  stat_summary_bin(data= pan20.sigg.cgc %>% filter(Genetype %in% c("oncogene","TSG")),fun.data= "median_hilow",geom="errorbar",bins=20,width=0.2,alpha=0.2,colour="blue")+
  stat_summary_bin(
    data = pan20.sigg.cgc %>% filter(Genetype %in% c("Non-cancer")),
    fun.data = "median_hilow",
    geom = "errorbar",
    bins = 10,
    width = 0.2,
    colour = "grey"
  ) +
  stat_summary_bin(
    data = pan20.sigg.cgc %>% filter(Genetype %in% c("Non-cancer")),
    fun.data = "median_hilow",
    geom = "pointrange",
    bins = 10,
    width = 0.2,
    colour = "grey"
  ) +
  # geom_point(
  #   data = pan20.sigg.cgc %>% filter(Genetype %in% c("Non-cancer")),
  #   alpha=0.05
  #   ) +
  geom_point(data = pan20.sigg.cgc %>% filter(Genetype %in% c("oncogene", "TSG")),
             aes(colour = Genetype),
             shape=21,
             ) +
  
  geom_point(
    data = pan20.sigg.cgc %>% filter(gene %in% Pyrimidine_pathway),
    colour = "red",
    size = 2
  ) +
  geom_text_repel(
    data = pan20.sigg.cgc %>% filter(gene %in% Pyrimidine_pathway),
    aes(label = gene),
    colour = "blue"
  ) +
  ylim(1, 0) +
  scale_colour_manual(values = oppallate) +
  
  
  ylab("MutsigCV Q-value")+
  theme_classic()
#  theme_classic()
  print(gg)

  gg <-
    ggplot(data = pan20.sigg.cgc %>% filter(
      Genetype %in% c("oncogene", "TSG", "Non-cancer")
      ), aes(x = Genetype, y = q2)) +
    scale_y_log10() +
    geom_jitter() +
    geom_point(data = pan20.sigg.cgc %>% filter(gene %in% Pyrimidine_pathway),colour = "yellow") +
    geom_text_repel(
      data = pan20.sigg.cgc %>% filter(gene %in% Pyrimidine_pathway),
      aes(label = gene),
      colour = "blue") +
    ylab("MutsigCV Q-value")
    
    
    
  print(gg)
# pan20.sigg.violin <-
#  pan20.sigg.cgc %>% mutate(Genetype= ifelse(gene %in% Pyrimidine_pathway,"Pyrimidine_biosyn",Genetype))
  gg <-
    ggplot(data = pan20.sigg.cgc %>% filter(
      Genetype %in% c("oncogene", "TSG", "Non-cancer")
      ), aes(x = Genetype, y = q2)) +
    scale_y_log10() +
    geom_violin() +
    geom_point(data = pan20.sigg.cgc %>% filter(gene %in% Pyrimidine_pathway),colour="red")+
  
  #  geom_jitter(data = pan20.sigg.cgc %>% filter(Genetype %in% c("oncogene", "TSG")),
   #   aes(colour=Genetype),size=2,height=0,alpha=0.33) +
     geom_text_repel(
       data = pan20.sigg.cgc %>% filter(gene %in% Pyrimidine_pathway),
       aes(label = gene),
       colour = "blue") +
    ylab("MutsigCV Q-value")
    
    
    
  print(gg)

# pan20.sigg.violin <-
#  pan20.sigg.cgc %>% mutate(Genetype= ifelse(gene %in% Pyrimidine_pathway,"Pyrimidine_biosyn",Genetype))
  gg <-
    ggplot(data = pan20.sigg.cgc %>% filter(
      Genetype %in% c("oncogene", "TSG", "Non-cancer")
      ), aes(x = Genetype, y = q2)) +
    scale_y_log10() +
    geom_boxplot() +
    geom_point(data = pan20.sigg.cgc %>% filter(gene %in% Pyrimidine_pathway),colour="red")+
  #  geom_jitter(data = pan20.sigg.cgc %>% filter(Genetype %in% c("oncogene", "TSG")),
   #   aes(colour=Genetype),size=2,height=0,alpha=0.33) +
     geom_text_repel(
       data = pan20.sigg.cgc %>% filter(gene %in% Pyrimidine_pathway),
       aes(label = gene),
       colour = "blue") +
    ylab("MutsigCV Q-value")
    
    
    
  print(gg)

