Source code for tool.fastq_utils

"""
.. See the NOTICE file distributed with this work for additional information
   regarding copyright ownership.

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.
"""

from __future__ import print_function

import os

from tool.fastqreader import fastqreader


[docs]class fastqUtils(object): # pylint: disable=invalid-name """ Set of methods to help with the management of FastQ files. """
[docs] @staticmethod def fastq_match_paired_ends(fastq_1, fastq_2): """ Take 2 fastq files and remove ends that don't have a matching pair. Requires that the FastQ files are ordered correctly. Mismatches can occur if there is a filtering step that removes one of the paired ends Parameters ---------- fastq_1 : str Location of FastQ file fastq_2 : str Location of FastQ file """ fqr = fastqreader() fqr.openFastQ(fastq_1, fastq_2) fqr.createOutputFiles('match') record1 = fqr.next(1) record2 = fqr.next(2) count_r1 = 0 count_r2 = 0 count_r3 = 0 while fqr.eof(1) is False and fqr.eof(2) is False: r1_id = record1["id"].split(" ") r2_id = record2["id"].split(" ") if r1_id[0] == r2_id[0]: fqr.writeOutput(record1, 1) fqr.writeOutput(record2, 2) record1 = fqr.next(1) record2 = fqr.next(2) count_r1 += 1 count_r2 += 1 count_r3 += 1 elif r1_id[0] < r2_id[0]: record1 = fqr.next(1) count_r1 += 1 else: record2 = fqr.next(2) count_r2 += 1 fqr.closeFastQ() fqr.closeOutputFiles() with open(fqr.f1_output_file_loc, "rb") as f1_match: with open(fastq_1, "wb") as f_old: f_old.write(f1_match.read()) with open(fqr.f2_output_file_loc, "rb") as f2_match: with open(fastq_2, "wb") as f_old: f_old.write(f2_match.read()) os.remove(fqr.f1_output_file_loc) os.remove(fqr.f2_output_file_loc)
[docs] @staticmethod def fastq_sort_file(fastq, output=None): """ Sorting of a FastQ file Parameters ---------- fastq : str Location of the FastQ file to sort output : str [OPTIONAL] Location of the output FastQ file. If left blank then the sorted output is saved to the same location as `fastq` """ fqr = fastqreader() fqr.openFastQ(fastq) record = fqr.next() fq_tmp = {} while fqr.eof() is False: r_id = record["id"].split(" ") fq_tmp[r_id[0]] = [record["start_posn"], record["end_posn"]] record = fqr.next() fqr.closeFastQ() with open(fastq, "r") as fq_file: with open(fastq + ".sorted", "w") as tmp_fq_file: for r_id in sorted(fq_tmp.keys()): fq_file.seek(fq_tmp[r_id][0]) tmp_fq_file.write(fq_file.read(fq_tmp[r_id][1]-fq_tmp[r_id][0])) output_fastq = fastq if output is not None: output_fastq = output with open(fastq + ".sorted", "rb") as fq_sorted: with open(output_fastq, "wb") as f_out: f_out.write(fq_sorted.read()) if output is not None: os.remove(fastq + ".sorted")
[docs] @staticmethod def fastq_randomise(fastq, output=None): """ Randomising the order of reads withim a FastQ file Parameters ---------- fastq : str Location of the FastQ file to randomise output : str [OPTIONAL] Location of the output FastQ file. If left blank then the randomised output is saved to the same location as `fastq` """ fqr = fastqreader() fqr.openFastQ(fastq) record = fqr.next() fq_tmp = {} while fqr.eof() is False: r_id = record["id"].split(" ") fq_tmp[r_id[0]] = [record["start_posn"], record["end_posn"]] record = fqr.next() fqr.closeFastQ() fastq_suffix = "." + fastq[-1].split(".")[-1] with open(fastq, "r") as fq_file: with open(fastq.replace(fastq_suffix, ".random"), "w") as tmp_fq_file: for r_id in list(fq_tmp.keys()): fq_file.seek(fq_tmp[r_id][0]) tmp_fq_file.write(fq_file.read(fq_tmp[r_id][1]-fq_tmp[r_id][0])) output_fastq = fastq if output is not None: output_fastq = output with open(fastq + ".random", "rb") as fq_random: with open(output_fastq, "wb") as f_out: f_out.write(fq_random.read()) if output is not None: os.remove(fastq + ".random")