King's College London - MRC SGDP Centre Summer School - Introduction to R



Bioconductor - a use case - Illumina Genome Analyzer sRNA-seq

Solutions


ShortRead

QUESTION 1:

x<- barplot(
      table(width(solexaTrim)), 
      col= c("blue","green"), 
      las=1, 
      cex.axis=0.5, 
      cex=0.5,
      xlab="read length", 
      ylab="Frequency", 
      main="Read length distribution")
abline(v=20.5,col="red")
abline(v=27.7,col="red")
legend(x='topleft',col=c("blue","green"),legend=c("reads","more reads"), fill=c("blue","green"))
legend(x='topright', col="red", legend="Size cutoffs", lty=1)
miRNAsel <- solexaTrim[width(sread(solexaTrim))>=18 & width(sread(solexaTrim))<=25]
length(miRNAsel)/length(solexa)

QUESTION 2:

?srFilter
multiNucFilter <- polynFilter(25,nuc=c("A","C","T","G","other"))
solexa[multiNucFilter(sread(solexa))]
length(solexa[multiNucFilter(sread(solexa))])/length(solexa)

QUESTION 3:

?srdistance
miR92a <- RNAString("UAUUGCACUUGUCCCGGCCUG")
miR92a <- DNAStringSet(miR92a)
srdistance(miR92a, miR25)
hist(srdistance(sread(solexaTrim), miR92a)[[1]], breaks=0:37, xlab="Edit Distance", main="Edit distance between miRNA and sample")

biomaRt

QUESTION 1:

database <- useMart("ensembl",dataset="mmusculus_gene_ensembl")
getBM(attributes = c("mgi_symbol"),  
      filters = c("mgi_symbol","go"), 
      values = list(c("Tpd52l1", "Birc5", "Rab6", "Tmem204", "Wnt10b"),"GO:0000086"), 
      mart = database)

QUESTION 2:

getSequence(id="Birc5",
            type="mgi_symbol",
            seqType="gene_flank",
            mart=database,
            upstream=1000)

QUESTION 3:

getBM(attributes=c("ensembl_gene_id","mgi_symbol","chromosome_name","exon_chrom_start","exon_chrom_end"), 
      filters=c("mgi_symbol"),
      values=c("Tpd52l1", "Birc5", "Rab6", "Tmem204", "Wnt10b"),
      mart=database)
Error in getBM(attributes = c("ensembl_gene_id", "mgi_symbol", "chromosome_name",  : 
  Query ERROR: caught BioMart::Exception::Usage: Attributes from multiple attribute pages are not allowed
one <- getBM(attributes=c("ensembl_gene_id","mgi_symbol"), 
      filters=c("mgi_symbol"),
      values=c("Tpd52l1", "Birc5", "Rab6", "Tmem204", "Wnt10b"),
      mart=database)
two <- getBM(attributes=c("ensembl_gene_id","chromosome_name","exon_chrom_start","exon_chrom_end"), 
      filters=c("mgi_symbol"),values=c("Tpd52l1", "Birc5", "Rab6", "Tmem204", "Wnt10b"),
      mart=database)
merge(one,two, by="ensembl_gene_id")

QUESTION 4:

database2 <- useMart("ensembl")
database2 <- useDataset("hsapiens_gene_ensembl",mart=database)
getBM(attributes=c("ensembl_gene_id"),
      filters=c("chromosomal_region"), 
      values = c("22:20000000:20100000"), 
      mart=database2)