Set the working environment

library(floundeR)
#> floundeR v0.0.4
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(kableExtra)
#> 
#> Attaching package: 'kableExtra'
#> The following object is masked from 'package:dplyr':
#> 
#>     group_rows

Identify a couple of FAST5 files and check their validity

# using FAST5 included in package
singleFast5 <- system.file("extdata", "single.fast5", package="floundeR")
multiFast5 <- system.file("extdata", "multi.fast5", package="floundeR")

# we can check if a file is a parseable FAST5 with floundeR::file.isFast5
singleF5 <- Fast5$new(fast5_file=singleFast5)
multiF5 <- Fast5$new(fast5_file=multiFast5)

# we can also test if the files correspond to single of multi entry FAST5s
singleF5$is_single_fast5()
#> [1] TRUE
multiF5$is_multi_fast5()
#> [1] TRUE
singleF5$is_multi_fast5()
#> [1] FALSE

Identify the sequencing platform defined in FAST5

multiF5$get_platform()
#> [1] "gridion"

# let's also have a look at a PromethION dataset here
promFast5 <- system.file("extdata", "prom.fast5", package="floundeR")
promF5 <- Fast5$new(fast5_file=promFast5)
promF5$get_platform()
#> [1] "promethion"

Identify the flowcell_id defined in FAST5

# let's review the two different platforms that we have looked at above
multiF5$get_flowcell_id()
#> [1] "FAK42335"

promF5$get_flowcell_id()
#> [1] ""

Identify the experimental start time defined in FAST5

This is the proxy for hold old a dataset is and can be used to make decisions on how the data may be discarded for purposes of resequencing or rebasecalling.

promF5$get_exp_start_time()
#> [1] "2018-05-08 15:53:32 UTC"

Identify the number of sequence reads described in the FAST5

promF5$get_read_count()
#> [1] 25
singleF5$get_read_count()
#> [1] 1

Identify the flowcell_type defined in FAST5

promF5$get_flowcell_type()
#> [1] "flo-pro001"
multiF5$get_flowcell_type()
#> [1] "flo-min106"

Identify the sequencing kit defined in FAST5

promF5$get_sequencing_kit()
#> [1] "sqk-lsk109"

Pull out bulk information for the FAST5 file

bulk_info <- multiF5$get_info(atomic=FALSE)
knitr::kable(bulk_info, 
             caption="Table showing the summary information pulled from FAST5", 
             booktabs=TRUE, table.envir='table*', linesep="") %>%
  kable_styling(latex_options=c("hold_position", font_size=10)) %>%
  add_footnote(c("These data are expected to be redundant"))
Table showing the summary information pulled from FAST5
device_type flow_cell_id exp_start_time experiment_type flowcell_type sequencing_kit
gridion FAK42335 2019-01-15T15:54:24Z genomic_dna flo-min106 sqk-lsk109
gridion FAK42335 2019-01-15T15:54:24Z genomic_dna flo-min106 sqk-lsk109
gridion FAK42335 2019-01-15T15:54:24Z genomic_dna flo-min106 sqk-lsk109
gridion FAK42335 2019-01-15T15:54:24Z genomic_dna flo-min106 sqk-lsk109
gridion FAK42335 2019-01-15T15:54:24Z genomic_dna flo-min106 sqk-lsk109
gridion FAK42335 2019-01-15T15:54:24Z genomic_dna flo-min106 sqk-lsk109
gridion FAK42335 2019-01-15T15:54:24Z genomic_dna flo-min106 sqk-lsk109
gridion FAK42335 2019-01-15T15:54:24Z genomic_dna flo-min106 sqk-lsk109
gridion FAK42335 2019-01-15T15:54:24Z genomic_dna flo-min106 sqk-lsk109
gridion FAK42335 2019-01-15T15:54:24Z genomic_dna flo-min106 sqk-lsk109
a These data are expected to be redundant