Figures
load("~/GIT/tcgaMut/ppts/kgca_filtered_mr_gtype3_expr.Rda")
fread("/home/tc/DATA/dataset/pan20/covar") ->covar
kgca_filtered_mr_gtype3_expr <-
left_join(kgca_filtered_mr_gtype3_expr,covar,by=c("Hugo_Symbol"="gene"))
Pyrimidine_pathway <- c("CAD", "UMPS", "DHODH", "CPS1")
sig22 <-
c(
"TP53",
"PIK3CA",
"PTEN",
"RB1",
"KRAS",
"NRAS",
"BRAF",
"CDKN2A",
"FBXW7",
"ARID1A" ,
"MLL2", # not found
"STAG2",
"ATM",
"CASP8",
"CTCF",
"ERBB3",
"HLA-A",
"HRAS",
"IDH1",
"NF1",
"NFE2L2",
"PIK3R1"
)
oppallate <- c("#000000","#0072B2")
gg <- ggplot(data=kgca_filtered_mr_gtype3_expr,aes(x=log2(expr),y=mrca)) +
# geom_point(alpha=0.01)+
# stat_smooth()+
# geom_hex()+lo
geom_smooth(se=FALSE, colour="grey")+
stat_summary_bin(fun.data= "median_hilow",geom="errorbar",bins=20,width=0.2,colour="grey")+
stat_summary_bin(fun.data= "median_hilow",geom="pointrange",bins=20,width=0.2,colour="grey")+
geom_point(data=kgca_filtered_mr_gtype3_expr %>% filter(Genetype %in% c("oncogene","TSG")),aes(colour=Genetype),shape=21)+
geom_point(data=kgca_filtered_mr_gtype3_expr %>% filter(Genetype %in% c("Pyrimidine_biosyn")),colour="red")+
scale_colour_manual(values=oppallate)+
scale_y_log10()+
# scale_x_log10()+
geom_text_repel(data=kgca_filtered_mr_gtype3_expr %>% filter(Hugo_Symbol %in% c(Pyrimidine_pathway)),aes(label = Hugo_Symbol),colour="red")+
xlab("Gene expression level (log2)")+
ylab("Gene mutation rate (#Mutations / bp)")+
theme_classic()
print(gg)
#fread("~/Downloads/Census_allSat May 20 05-12-33 2017.csv") ->cgc
#cgc %>% filter(grepl("Mis|N|F|S",`Mutation Types`)) ->cgc.filtered
#cgc %>% filter(grepl("Mis|N|F|S",`Mutation Types`)) ->cgc.filtered
#cgc.filtered %>% mutate(Hugo_Symbol=`Gene Symbol`,Genetype=`Role in Cancer`) %>% select(Hugo_Symbol,Genetype) ->cgc.filtered.min
#load("~/GIT/tcgaMut/data/CG.uni_cgc_filtered.rda")
fread("~/Downloads/Census_allSat May 20 05-12-33 2017.csv") ->cgc
#cgc %>% filter(grepl("Mis|N|F|S",`Mutation Types`)) ->cgc.filtered
cgc %>% filter(grepl("Mis|N|F|S",`Mutation Types`)) ->cgc.filtered
#Pyrimidine_pathway <- c("CAD", "UMPS", "DHODH", "CPS1")
cgc.filtered %>% mutate(Hugo_Symbol=`Gene Symbol`,Genetype=`Role in Cancer`) %>% select(Hugo_Symbol,Genetype) ->cgc.filtered.min
fread("~/DATA/dataset/pan20/pan20_out.sig_genes.txt") ->
pan20.sigg
left_join(pan20.sigg, cgc.filtered.min, by = c("gene" = "Hugo_Symbol")) -> pan20.sigg.cgc
pan20.sigg.cgc %>% mutate(q2 = ifelse(q < 2.2e-16, 2.2e-16, q)) -> pan20.sigg.cgc
pan20.sigg.cgc %>% mutate(Genetype = ifelse(is.na(Genetype), "Non-cancer", Genetype)) ->
pan20.sigg.cgc
# pan20.sigg.cgc <-
# left_join(pan20.sigg.cgc,covar,by="gene")
gg <-
ggplot(data = pan20.sigg.cgc %>% filter(Genetype %in% c("oncogene", "TSG", "Non-cancer")), aes(x = log2(expr), y = q)) +
# geom_smooth(se=FALSE, colour="grey") +
# geom_point()+
# stat_summary_bin(data= pan20.sigg.cgc %>% filter(Genetype %in% c("oncogene","TSG")),fun.data= "median_hilow",geom="errorbar",bins=20,width=0.2,alpha=0.2,colour="blue")+
stat_summary_bin(
data = pan20.sigg.cgc %>% filter(Genetype %in% c("Non-cancer")),
fun.data = "median_hilow",
geom = "errorbar",
bins = 10,
width = 0.2,
colour = "grey"
) +
stat_summary_bin(
data = pan20.sigg.cgc %>% filter(Genetype %in% c("Non-cancer")),
fun.data = "median_hilow",
geom = "pointrange",
bins = 10,
width = 0.2,
colour = "grey"
) +
# geom_point(
# data = pan20.sigg.cgc %>% filter(Genetype %in% c("Non-cancer")),
# alpha=0.05
# ) +
geom_point(data = pan20.sigg.cgc %>% filter(Genetype %in% c("oncogene", "TSG")),
aes(colour = Genetype),
shape=21,
) +
geom_point(
data = pan20.sigg.cgc %>% filter(gene %in% Pyrimidine_pathway),
colour = "red",
size = 2
) +
geom_text_repel(
data = pan20.sigg.cgc %>% filter(gene %in% Pyrimidine_pathway),
aes(label = gene),
colour = "blue"
) +
ylim(1, 0) +
scale_colour_manual(values = oppallate) +
ylab("MutsigCV Q-value")+
theme_classic()
# theme_classic()
print(gg)
gg <-
ggplot(data = pan20.sigg.cgc %>% filter(
Genetype %in% c("oncogene", "TSG", "Non-cancer")
), aes(x = Genetype, y = q2)) +
scale_y_log10() +
geom_jitter() +
geom_point(data = pan20.sigg.cgc %>% filter(gene %in% Pyrimidine_pathway),colour = "yellow") +
geom_text_repel(
data = pan20.sigg.cgc %>% filter(gene %in% Pyrimidine_pathway),
aes(label = gene),
colour = "blue") +
ylab("MutsigCV Q-value")
print(gg)
# pan20.sigg.violin <-
# pan20.sigg.cgc %>% mutate(Genetype= ifelse(gene %in% Pyrimidine_pathway,"Pyrimidine_biosyn",Genetype))
gg <-
ggplot(data = pan20.sigg.cgc %>% filter(
Genetype %in% c("oncogene", "TSG", "Non-cancer")
), aes(x = Genetype, y = q2)) +
scale_y_log10() +
geom_violin() +
geom_point(data = pan20.sigg.cgc %>% filter(gene %in% Pyrimidine_pathway),colour="red")+
# geom_jitter(data = pan20.sigg.cgc %>% filter(Genetype %in% c("oncogene", "TSG")),
# aes(colour=Genetype),size=2,height=0,alpha=0.33) +
geom_text_repel(
data = pan20.sigg.cgc %>% filter(gene %in% Pyrimidine_pathway),
aes(label = gene),
colour = "blue") +
ylab("MutsigCV Q-value")
print(gg)
# pan20.sigg.violin <-
# pan20.sigg.cgc %>% mutate(Genetype= ifelse(gene %in% Pyrimidine_pathway,"Pyrimidine_biosyn",Genetype))
gg <-
ggplot(data = pan20.sigg.cgc %>% filter(
Genetype %in% c("oncogene", "TSG", "Non-cancer")
), aes(x = Genetype, y = q2)) +
scale_y_log10() +
geom_boxplot() +
geom_point(data = pan20.sigg.cgc %>% filter(gene %in% Pyrimidine_pathway),colour="red")+
# geom_jitter(data = pan20.sigg.cgc %>% filter(Genetype %in% c("oncogene", "TSG")),
# aes(colour=Genetype),size=2,height=0,alpha=0.33) +
geom_text_repel(
data = pan20.sigg.cgc %>% filter(gene %in% Pyrimidine_pathway),
aes(label = gene),
colour = "blue") +
ylab("MutsigCV Q-value")
print(gg)
