Structural variations analysis

In [1]:
from datetime import date
from pycltools.pycltools import jprint

jprint('Adrien Leger / EMBL EBI', bold=True, size=150)
jprint('Starting date : 2020_06_04', bold=True, italic=True, size=125)
jprint('Last modification date : {}_{:02}_{:02}'.format(date.today().year, date.today().month, date.today().day), bold=True, italic=True, size=125)
Bad key "text.kerning_factor" on line 4 in
/nfs/software/birney/adrien/miniconda3/envs/Python3.7/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/_classic_test_patch.mplstyle.
You probably need to get an updated matplotlibrc file from
https://github.com/matplotlib/matplotlib/blob/v3.1.3/matplotlibrc.template
or from the matplotlib source distribution

Adrien Leger / EMBL EBI

Starting date : 2020_06_04

Last modification date : 2021_02_16

Imports

In [2]:
# Standard lib imports
import os
from datetime import date
from collections import *
from glob import glob, iglob
from shutil import rmtree
import itertools
from pprint import pprint as pp
from tqdm import tqdm
import csv
import re

# Generic third party imports
from pycltools.pycltools import *
import pysam
import pyfaidx
import pybedtools as pbt
import pyranges as pr
import mappy as mp
from IPython.display import SVG, display, Image
from pyBioTools.Annotation import Annotation 
from scipy.ndimage.filters import gaussian_filter1d

# Ploting lib imports
import matplotlib as mpl
import matplotlib.pyplot as pl
import seaborn as sns
%matplotlib inline

# Data wrangling lib imports
import pandas as pd
import numpy as np
import scipy as sp
from pandas.plotting import table
pd.options.display.max_colwidth = 1000
pd.options.display.max_columns = 200
pd.options.display.max_rows = 100
pd.options.display.min_rows = 100

Diverging Insertions analysis

In [172]:
outdir = "diverging_insertions"
#shutil.rmtree(outdir, ignore_errors=True)
mkdir (outdir)
Creating /hps/research1/birney/users/adrien/analyses/medaka_assembly_graph/diverging_insertions

Find alternative segments

In [475]:
# input files
graph_info_fn = "./graph_assembly/all_ref_graph_seg_info.pkl"
dna_usage_fn =  "segment_usage/raw_reads/segment_usage_len_cov_norm.tsv"
ref_fasta = "references/Oryzias_latipes_HDRR_clean.fa"
outdir = "diverging_insertions"

# Options
ref_type = "HDRR"
min_alt_seg_len = 200
min_alt_path_len = 10000
max_identity = 0.5
min_coverage_dna = 0.5
min_samples_dna = 2
no_Ns_extra = 10000

# Load and filter out segments
graph_info_df = pd.read_pickle(graph_info_fn)
graph_info_df = graph_info_df.drop(columns=["component","connected"])
graph_info_df = graph_info_df[(graph_info_df["type"] != ref_type) & (graph_info_df["length"]>=min_alt_seg_len) & (graph_info_df["identity"]<=max_identity)]

# Load and filter out segments
dna_usage_df = pd.read_csv(dna_usage_fn, sep="\t", index_col=0)
dna_usage_df = dna_usage_df[(dna_usage_df["type"] != ref_type) & (dna_usage_df["length"]>=min_alt_seg_len)]
# Count number of samples with sufficient coverage for each segments
dna_usage_df = dna_usage_df.drop(columns=["chromosome","length","start","end","type"])
dna_usage_df["samples_pass_dna"] = dna_usage_df.ge(min_coverage_dna).sum(axis=1)
dna_usage_df = dna_usage_df[dna_usage_df["samples_pass_dna"]>=min_samples_dna]

all_df = pd.merge(graph_info_df, dna_usage_df[["samples_pass_dna"]], left_index=True, right_index=True, how="inner")
all_df = all_df.sort_values(by="length", ascending=False)

print (f"Number of segments found: {len(all_df)}")
out_fn = os.path.join(outdir, "DNA_alternative_segments.tsv")
all_df.to_csv(out_fn, sep="\t")

l = []
with pyfaidx.Fasta(ref_fasta) as ref_fa_fp:
    for (connected_chromosome, connected_start, connected_end), branch_df in all_df.groupby(["connected_chromosome", "connected_start","connected_end"]):
        if branch_df["length"].sum() > min_alt_path_len:
            d = OrderedDict()
            d["connected_coord"] = f"{connected_chromosome}:{int(connected_start)}-{int(connected_end)}"
            d["Valid segments"] = len(branch_df)
            d["MIKK segments"] = len(branch_df[branch_df["type"].isin(["MIKK"])])
            d["HNI segments"] = len(branch_df[branch_df["type"].isin(["HNI", "HNI+"])])
            d["HSOK segments"] = len(branch_df[branch_df["type"].isin(["HSOK", "HSOK+"])])
            d["segment_lengths"] = branch_df["length"].sum()
            d["Median identity"] = np.median(branch_df["identity"])
            d["Median samples_pass_dna"] = np.median(branch_df["samples_pass_dna"])
            # Test presence of Ns in reference
            seq = str(ref_fa_fp[connected_chromosome][int(connected_start)-no_Ns_extra:int(connected_end)+no_Ns_extra])
            d["Ns in ref"] = seq.count("N")
            # Finally add segment list
            d["segment_ids"] = ",".join(branch_df.index)
            l.append(d)
        
path_df = pd.DataFrame(l)
print (f"Number of alternative diverging paths found including Ns: {len(path_df)}")

path_df = path_df[path_df["Ns in ref"] == 0]
path_df = path_df.drop(columns=["Ns in ref"])
path_df = path_df.sort_values(by="segment_lengths", ascending=False)

print (f"Number of alternative diverging paths found: {len(path_df)}")
display(path_df.head(50))
out_fn = os.path.join(outdir, "DNA_alternative_paths.tsv")
all_df.to_csv(out_fn, sep="\t")
Number of segments found: 18501
Number of alternative diverging paths found including Ns: 592
Number of alternative diverging paths found: 427
connected_coord Valid segments MIKK segments HNI segments HSOK segments segment_lengths Median identity Median samples_pass_dna segment_ids
568 HDRR_9:3927794-3957463 4 1 3 0 101296 0.226967 4.5 s703709,s703711,s979238,s703712
156 HDRR_16:21369159-21399684 1 0 1 0 85351 0.092795 3.0 s743127
254 HDRR_19:7869962-7990224 7 4 3 0 85317 0.300834 9.0 s761874,s761883,s1017331,s761882,s1017337,s1017332,s1017336
268 HDRR_2:1812220-1907024 4 0 4 0 72190 0.165387 6.5 s658990,s659000,s658998,s658992
67 HDRR_11:15857370-15998083 15 4 11 0 72005 0.290481 10.0 s713737,s932137,s713724,s713736,s713731,s713733,s932142,s713732,s713726,s713729,s713734,s932144,s932140,s713723,s713730
551 HDRR_8:14238688-14351248 1 0 0 1 59773 0.131235 7.0 s830888
16 HDRR_1:24269693-24311405 12 12 0 0 54632 0.360770 10.5 s1014602,s1090826,s991133,s965105,s965117,s991130,s965121,s965116,s965118,s965106,s991142,s965107
546 HDRR_8:3098795-3152752 3 2 0 1 53529 0.336211 12.0 s831701,s964917,s964919
294 HDRR_2:11254198-11258205 1 0 1 0 53291 0.100313 2.0 s655905
204 HDRR_18:1676752-1682138 1 0 0 1 52272 0.204486 11.0 s873607
457 HDRR_4:23672961-23713279 1 0 1 0 52003 0.170450 8.0 s672660
273 HDRR_2:4333869-4381630 1 0 0 1 50113 0.064688 5.0 s805712
500 HDRR_6:6884029-6895953 5 5 0 0 48179 0.361484 3.0 s963720,s984295,s949435,s949436,s949434
250 HDRR_19:940269-974087 3 2 1 0 44683 0.378752 5.0 s762680,s1073531,s936438
355 HDRR_21:27476540-27507741 10 9 1 0 42881 0.255228 11.5 s933636,s933627,s972197,s933625,s776390,s933623,s933633,s933634,s933626,s933618
533 HDRR_7:26861283-26870949 3 2 1 0 41807 0.245442 4.0 s692993,s1044529,s1007778
159 HDRR_16:25375142-25416160 1 1 0 0 40415 0.192550 5.0 s1036580
387 HDRR_23:4344994-4363759 6 0 6 0 40282 0.310085 5.5 s784541,s784525,s784527,s784531,s784539,s784528
236 HDRR_18:25882037-26037819 2 0 1 1 39615 0.272905 9.5 s873938,s760698
587 HDRR_9:28875054-28899249 6 3 2 1 37934 0.262111 2.5 s1023514,s1023513,s1102415,s834965,s701970,s701979
131 HDRR_15:9363088-9390783 1 0 0 1 35832 0.203103 2.0 s860524
350 HDRR_21:24983643-25128001 13 5 8 0 35248 0.407504 7.0 s1039259,s775783,s775815,s775804,s775787,s977701,s775788,s775813,s977706,s775811,s775795,s1086553,s1039260
303 HDRR_2:24829249-24860041 7 7 0 0 34739 0.085003 10.0 s1015552,s1015554,s1015556,s1109367,s1015560,s1053832,s1015558
324 HDRR_20:14173528-14175575 1 1 0 0 33477 0.283201 12.0 s1019864
431 HDRR_3:32671431-32693306 1 1 0 0 33438 0.425046 5.0 s945673
344 HDRR_21:14904299-14924601 4 4 0 0 33308 0.337277 10.0 s934156,s934158,s934154,s934159
172 HDRR_17:8913680-8938202 1 1 0 0 33073 0.079446 4.0 s997495
338 HDRR_21:6490610-6571148 4 4 0 0 32685 0.287302 4.5 s1089645,s1028986,s1028992,s1028984
291 HDRR_2:9394131-9401275 1 0 1 0 32188 0.144400 8.0 s660063
399 HDRR_24:11645881-11661166 5 2 3 0 32030 0.240199 5.0 s791136,s791143,s791144,s1069091,s1037521
528 HDRR_7:24636987-24658723 1 0 1 0 31464 0.162746 8.0 s692641
341 HDRR_21:11287124-11367077 14 14 0 0 31118 0.363036 7.0 s1009675,s1009684,s1009672,s1009690,s1046401,s1046399,s1030103,s1009682,s1009676,s1030100,s1009691,s1009679,s1009686,s1030104
404 HDRR_24:15819959-15864998 1 1 0 0 31115 0.201987 2.0 s1041480
264 HDRR_2:345321-365896 3 3 0 0 30646 0.260200 2.0 s1096297,s975762,s975761
120 HDRR_14:23772376-23827286 4 0 4 0 30468 0.407676 4.0 s735156,s735146,s735153,s735148
13 HDRR_1:15950475-15959820 4 1 2 1 30178 0.170986 7.0 s799856,s652129,s919440,s652130
231 HDRR_18:23430754-23455578 1 1 0 0 29581 0.183686 12.0 s1065398
313 HDRR_20:5598832-5602784 3 3 0 0 28839 0.376779 5.0 s946453,s1092943,s1029026
20 HDRR_1:28313937-28334544 1 1 0 0 28755 0.162500 2.0 s1029793
574 HDRR_9:9641551-9674640 3 3 0 0 28226 0.255737 5.0 s901622,s901624,s901628
173 HDRR_17:9454310-9476801 3 3 0 0 27971 0.359653 4.0 s1013032,s1013030,s1013029
349 HDRR_21:24940073-24971442 7 1 6 0 27674 0.347152 7.0 s775762,s775765,s775763,s775760,s775767,s775758,s977697
509 HDRR_6:25586057-25589021 2 2 0 0 27285 0.323210 8.0 s985763,s1092385
584 HDRR_9:22411809-22423967 5 5 0 0 27261 0.233072 12.0 s969598,s969600,s969596,s969602,s969595
511 HDRR_6:27847682-27854190 3 3 0 0 27059 0.408222 6.0 s965023,s918016,s1006714
391 HDRR_23:12060010-12085295 1 0 0 1 26887 0.398381 11.0 s890555
169 HDRR_17:2960507-2987704 8 7 0 1 26835 0.266108 5.0 s930103,s869475,s1013646,s1032192,s1013648,s930104,s1013641,s930097
582 HDRR_9:21049259-21098153 1 0 0 1 26661 0.149893 2.0 s834273
45 HDRR_10:16140825-16237381 1 1 0 0 26604 0.166704 8.0 s1004136
513 HDRR_6:31321108-31354342 3 3 0 0 26186 0.420175 12.0 s1031635,s1017312,s989586

Example Bandage plots for selected candidates

In [477]:
for fn in glob.glob("./diverging_insertions/bandage_plots/DNA_*.png"):
    coord = os.path.split(fn)[-1].split(".")[0]
    jprint(coord, bold=True, size=100)
    display(Image(fn, width=1000))

DNA_HDRR_19:7869962-7990224

DNA_HDRR_1:24269693-24311405

DNA_HDRR_18:25882037-26037819

DNA_HDRR_9:3927794-3957463

DNA_HDRR_2:1812220-1907024

Segments with both DNA and RNA support

Find alternative segments

In [126]:
def gff3_exon_parser(fn):
        """ Parse a gff3 formated file"""
        exon_features = OrderedDict()
        transcript_features = OrderedDict()
        gene_features = OrderedDict()
        
        c=Counter()
        with open(fn) as fp: 
            for line in tqdm(fp):
                if not line.startswith("#"):
                    line = line.strip().split("\t")
                    if line[2] in ["gene", "mRNA", "exon"]:
                        try:
                            d = OrderedDict(chrom=line[0],start=int(line[3]),end=int(line[4]))
                            attrs = {i.split("=")[0]:i.split("=")[1] for i in line[8].split(";")}
                            if line[2] == "gene":
                                ID = attrs.get("ID")
                                if ID:
                                    gene_features[ID] = d
                                c["Valid gene"]+=1
                            elif line[2] == "mRNA":
                                d["gene_id"] = attrs.get("Parent")
                                ID = attrs.get("ID")
                                if ID:
                                    transcript_features[ID] = d
                                c["Valid transcript"]+=1
                            elif line[2] == "exon":
                                d["transcript_id"] = attrs.get("Parent")
                                ID = attrs.get("exon_id")
                                if ID:
                                    exon_features[ID] = d
                                c["Valid exon"]+=1
                        except (IndexError, ValueError) as E:
                            c["Invalid feature line"]+=1
                
        # Convert in a dataframe and cleanup
        for i, j in c.items():
            print(f"{i}:{j}")
        
        for i, j in exon_features.items():
            tx_id = j.get("transcript_id")
            try:
                tx_info = transcript_features.get(tx_id)
                gene_id = tx_info.get("gene_id")
                exon_features[i]["gene_id"]=gene_id
            except:
                exon_features[i]["gene_id"]=tx_id
        
        exon_df = pd.DataFrame.from_dict(exon_features, orient="index")
        exon_df.index.name="exon_id"
        return exon_df
In [129]:
# input files
graph_info_fn = "./graph_assembly/all_ref_graph_seg_info.pkl"
dna_usage_fn =  "segment_usage/raw_reads/segment_usage_len_cov_norm.tsv"
rna_usage_fn =  "segment_usage/rna_seq/segment_usage_len_cov_norm.tsv"
ref_fasta = "references/Oryzias_latipes_HDRR_clean.fa"
gff3_fn = "./references/Oryzias_latipes_all_assemblies_sorted.gff3"
outdir = "diverging_insertions"

# Options
ref_type = "HDRR"
min_alt_seg_len = 200
min_alt_path_len = 10000
max_identity = 0.5
min_coverage_dna = 0.5
min_samples_dna = 2
min_coverage_rna = 0.1
min_samples_rna = 8
no_Ns_extra = 50000
c = Counter()

# Load and filter out segments
graph_info_df = pd.read_pickle(graph_info_fn)
graph_info_df = graph_info_df.drop(columns=["component","connected"])
graph_info_df = graph_info_df[(graph_info_df["type"] != ref_type) & (graph_info_df["length"]>=min_alt_seg_len) & (graph_info_df["identity"]<=max_identity)]

# Load and filter out segments
dna_usage_df = pd.read_csv(dna_usage_fn, sep="\t", index_col=0)
dna_usage_df = dna_usage_df[(dna_usage_df["type"] != ref_type) & (dna_usage_df["length"]>=min_alt_seg_len)]
# Count number of samples with sufficient coverage for each segments
dna_usage_df = dna_usage_df.drop(columns=["chromosome","length","start","end","type"])
dna_usage_df["samples_pass_dna"] = dna_usage_df.ge(min_coverage_dna).sum(axis=1)
dna_usage_df = dna_usage_df[dna_usage_df["samples_pass_dna"]>=min_samples_dna]

# Load and filter out segments
rna_usage_df = pd.read_csv(rna_usage_fn, sep="\t", index_col=0)
rna_usage_df = rna_usage_df[(rna_usage_df["type"] != ref_type) & (rna_usage_df["length"]>=min_alt_seg_len)]
# Count number of samples with sufficient coverage for each segments
rna_usage_df = rna_usage_df.drop(columns=["chromosome","length","start","end","type"])
rna_usage_df["samples_pass_rna"] = rna_usage_df.ge(min_coverage_rna).sum(axis=1)
rna_usage_df = rna_usage_df[rna_usage_df["samples_pass_rna"]>=min_samples_rna]

usage_df = pd.merge(dna_usage_df[["samples_pass_dna"]], rna_usage_df[["samples_pass_rna"]], left_index=True, right_index=True, how="inner")
all_df = pd.merge(graph_info_df, usage_df, left_index=True, right_index=True, how="inner")
all_df = all_df.sort_values(by="length", ascending=False)

stdout_print (f"Number of segments found: {len(all_df)}\n")
out_fn = os.path.join(outdir, "DNA_RNA_alternative_segments.tsv")
all_df.to_csv(out_fn, sep="\t")

stdout_print (f"Parse annotation file\n")
exon_df = gff3_exon_parser(fn=gff3_fn)

l = []
with pyfaidx.Fasta(ref_fasta) as ref_fa_fp:
    for (connected_chromosome, connected_start, connected_end), branch_df in all_df.groupby(["connected_chromosome", "connected_start","connected_end"]):
        banch_len = branch_df["length"].sum()
        if banch_len > min_alt_path_len:
            
            # Check number of Ns
            seq = str(ref_fa_fp[connected_chromosome][int(connected_start)-no_Ns_extra:int(connected_end)+no_Ns_extra])
            Ns_count = seq.count("N")
            if Ns_count:
                c["No overlaping exons"]+=1
                continue
            
            # Find overlaping exon 
            overlapping_exon_info = exon_df[(exon_df["chrom"]==connected_chromosome)&(exon_df["start"]<=connected_end)&(exon_df["end"]>=connected_start)]
            if overlapping_exon_info.empty:
                c["Ns in reference sequence"]+=1
                continue            
            
            gene_ids = set()
            for g in overlapping_exon_info.itertuples():
                gid = g.gene_id.split(":")[1]
                gene_ids.add(gid)
            
            d = OrderedDict()
            d["connected_coord"] = f"{connected_chromosome}:{int(connected_start)}-{int(connected_end)}"
            d["connected_seg_start"] = branch_df.iloc[0]["connected_seg_start"]
            d["connected_seg_end"] = branch_df.iloc[0]["connected_seg_end"]
            d["Valid segments"] = len(branch_df)
            d["MIKK segments"] = len(branch_df[branch_df["type"].isin(["MIKK"])])
            d["HNI segments"] = len(branch_df[branch_df["type"].isin(["HNI", "HNI+"])])
            d["HSOK segments"] = len(branch_df[branch_df["type"].isin(["HSOK", "HSOK+"])])
            d["segment_lengths"] = branch_df["length"].sum()
            d["Median identity"] = np.median(branch_df["identity"])
            d["Samples_pass_dna"] = branch_df["samples_pass_dna"].max()
            d["Samples_pass_rna"] = branch_df["samples_pass_rna"].max()      
            d["n_exons_overlapped"] = len(overlapping_exon_info)
            d["gene_ids_overlapped"] = ",".join(gene_ids)
            d["segment_ids"] = ",".join(branch_df.index)
            
            l.append(d)
            c["Valid interval"]+=1
        
# Convert in a dataframe and cleanup
for i, j in c.items():
    stdout_print(f"{i}:{j}\n")
            
stdout_print ("Cast to dataframe and cleanup\n")
path_df = pd.DataFrame(l)
path_df = path_df.sort_values(by="segment_lengths", ascending=False)

stdout_print (f"Number of alternative diverging paths found: {len(path_df)}\n")
display(path_df)
out_fn = os.path.join(outdir, "DNA_RNA_alternative_paths.tsv")
path_df.to_csv(out_fn, sep="\t")
Number of segments found: 1330
Parse annotation file
2760181it [00:16, 170866.42it/s]
Valid gene:68534
Valid transcript:108919
Valid exon:1175685
No overlaping exons:20
Valid interval:19
Ns in reference sequence:9
Cast to dataframe and cleanup
Number of alternative diverging paths found: 19
connected_coord connected_seg_start connected_seg_end Valid segments MIKK segments HNI segments HSOK segments segment_lengths Median identity Samples_pass_dna Samples_pass_rna n_exons_overlapped gene_ids_overlapped segment_ids
13 HDRR_21:27476540-27507741 s575078 s575094 7 6 1 0 31567 0.249029 12 49 14 ENSORLG00000018162,ENSORLG00000022858 s933636,s933627,s933625,s776390,s933634,s933626,s933618
0 HDRR_1:28313937-28334544 s27476 s27496 1 1 0 0 28755 0.162500 2 14 16 ENSORLG00000028954,ENSORLG00000011449,ENSORLG00000011469 s1029793
7 HDRR_18:24441731-24455814 s501739 s501759 2 2 0 0 26104 0.158425 12 42 17 ENSORLG00000020600 s1073563,s1013541
8 HDRR_19:1264872-1268714 s508796 s508801 1 1 0 0 23091 0.329773 9 15 4 ENSORLG00000024348 s1063055
11 HDRR_2:11750121-11803708 s45431 s45451 1 1 0 0 20334 0.293178 7 12 25 ENSORLG00000001809 s1021577
9 HDRR_2:1761679-1776422 s37795 s37806 2 0 2 0 20318 0.088870 12 30 3 ENSORLG00000022471 s658979,s658983
1 HDRR_11:2928816-2949385 s290830 s290832 1 1 0 0 19629 0.151033 5 50 2 ENSORLG00000024388,ENSORLG00000026682 s992562
2 HDRR_11:20638485-20643966 s307371 s307376 1 0 1 0 17335 0.223931 12 50 1 ENSORLG00000010193 s712659
15 HDRR_5:25269130-25274621 s145139 s145153 1 1 0 0 17154 0.438833 2 47 9 ENSORLG00000013091 s1028384
6 HDRR_18:23094225-23131585 s501121 s501134 3 3 0 0 16739 0.414943 7 50 4 ENSORLG00000022972 s974666,s948392,s974668
10 HDRR_2:1812220-1907024 s37820 s37823 1 0 1 0 16564 0.216535 11 11 11 ENSORLG00000026826,ENSORLG00000000211 s658998
4 HDRR_13:3420451-3427494 s345365 s345378 1 1 0 0 15855 0.394167 11 9 3 ENSORLG00000001341 s1034604
14 HDRR_4:9231102-9238656 s100179 s100189 1 1 0 0 12456 0.086348 9 19 46 ENSORLG00000018696,ENSORLG00000006099,ENSORLG00000024038 s1063151
16 HDRR_6:27765140-27791258 s174982 s175002 1 1 0 0 12021 0.363133 6 26 10 ENSORLG00000029097,ENSORLG00000023185,ENSORLG00000015297 s918031
12 HDRR_20:2409883-2433274 s530814 s530817 1 1 0 0 11887 0.249728 7 37 9 ENSORLG00000024603,ENSORLG00000025893,ENSORLG00000022921 s1065551
3 HDRR_12:3769168-3780505 s318435 s318442 1 1 0 0 11400 0.376077 2 42 2 ENSORLG00000002405 s993576
17 HDRR_6:31321108-31354342 s178775 s178782 1 1 0 0 10988 0.420175 12 28 20 ENSORLG00000028466,ENSORLG00000025658,ENSORLG00000022493,ENSORLG00000026283,ENSORLG00000029831 s1017312
18 HDRR_7:25096755-25101297 s200344 s200347 1 1 0 0 10696 0.411563 12 19 2 ENSORLG00000016166 s1011649
5 HDRR_14:4172544-4177254 s374971 s374978 1 1 0 0 10368 0.496845 7 50 4 ENSORLG00000000991,ENSORLG00000028417 s993534

Explore some of the candidates manually

In [394]:
outdir = "diverging_insertions/sequences"
shutil.rmtree(outdir, ignore_errors=True)
mkdir (outdir)
Creating /hps/research1/birney/users/adrien/analyses/medaka_assembly_graph/diverging_insertions/sequences

Define functions

In [5]:
seg_info = namedtuple("seg_info", ["seg_id", "orient", "length", "sequence"])

def get_seq_from_segments (segment_fasta, seg_list):
    seg_info_list = []
    with pyfaidx.Fasta(segment_fasta) as fa:
        for seg in seg_list:
            seg_id = seg[:-1]
            orient = seg[-1]
            if orient == "+":
                seg_seq = str(fa[seg_id][:])
            else: 
                seg_seq = str(fa[seg_id][:].reverse.complement)
            seg_info_list.append(seg_info(seg_id=seg_id, orient=orient, length=len(seg_seq), sequence=seg_seq))
    return seg_info_list

def get_flank_seq (seg_id, segment_fasta, flank_seq_len=10000, reverse=False):
    seg_num = int(seg_id[1:])
    seg_info_list = []
    tot_len = 0
    with pyfaidx.Fasta(segment_fasta) as fa:
        while tot_len<flank_seq_len:
            if reverse:
                seg_seq = str(fa[seg_id])
                len_seg = len(seg_seq)
                if tot_len+len_seg > flank_seq_len:
                    len_seg = flank_seq_len-tot_len
                    seg_seq = seg_seq[-len_seg:]
                seg_info_list.insert(0, seg_info(seg_id=seg_id, orient="+", length=len_seg, sequence=seg_seq))
                tot_len += len_seg
                seg_num-=1
                seg_id = f"s{seg_num}"
            else:
                seg_seq = str(fa[seg_id])
                len_seg = len(seg_seq)
                if tot_len+len_seg > flank_seq_len:
                    len_seg = flank_seq_len-tot_len
                    seg_seq = seg_seq[:len_seg]
                seg_info_list.append(seg_info(seg_id=seg_id, orient="+", length=len_seg, sequence=seg_seq))
                tot_len += len_seg
                seg_num+=1
                seg_id = f"s{seg_num}"
                
    return seg_info_list

def get_ref_alt_seq (left_ref_seg, right_ref_seg, alt_seg_str, flank_seq_len, segment_fasta, outdir, locus):
    print("Extract left flank info")
    left_seq_info = get_flank_seq (seg_id=left_ref_seg, segment_fasta=segment_fasta, flank_seq_len=flank_seq_len, reverse=True)
    print("Extract right flank info")
    right_seq_info = get_flank_seq (seg_id=right_ref_seg, segment_fasta=segment_fasta, flank_seq_len=flank_seq_len, reverse=False)

    print("Extract reference path info")
    internal_ref_seg_list = [f"s{i}+" for i in range(int(left_ref_seg[1:])+1, int(right_ref_seg[1:]))]
    internal_ref_seq_info = get_seq_from_segments (segment_fasta=segment_fasta, seg_list=internal_ref_seg_list)
    internal_ref_seq_len = sum([i.length for i in internal_ref_seq_info])
    print(f"\tLength of reference path {internal_ref_seq_len}")
    
    print("Extract alternative path info")
    internal_alt_seg_list = [i.strip() for i in alt_seg_str.split(",")]
    internal_alt_seq_info = get_seq_from_segments (segment_fasta=segment_fasta, seg_list=internal_alt_seg_list)
    internal_alt_seq_len = sum([i.length for i in internal_alt_seq_info])
    print(f"\tLength of alternative path {internal_alt_seq_len}")
    
    ref_seg_info = left_seq_info+internal_ref_seq_info+right_seq_info
    alt_seg_info = left_seq_info+internal_alt_seq_info+right_seq_info
    
    # FASTA output
    print("Write FASTA output")
    out_fn = f"{outdir}/{locus}.fa"
    ref_seq = "".join([i.sequence for i in ref_seg_info])
    alt_seq = "".join([i.sequence for i in alt_seg_info])
    with open(out_fn, "w") as out_fa:
        out_fa.write(f">{locus}_reference\n{ref_seq}\n")
        out_fa.write(f">{locus}_alternative\n{alt_seq}\n")
        
    # BED output
    print("Write BED output")
    out_fn = f"{outdir}/{locus}.bed"
    with open(out_fn, "w") as out_bed:
        start=0
        for seg_info in ref_seg_info:
            out_bed.write(f"{locus}_reference\t{start}\t{start+seg_info.length}\t{seg_info.seg_id}{seg_info.orient}\n")
            start+=seg_info.length
        start=0
        for seg_info in alt_seg_info:
            out_bed.write(f"{locus}_alternative\t{start}\t{start+seg_info.length}\t{seg_info.seg_id}{seg_info.orient}\n")
            start+=seg_info.length

HDRR_23:4344994-4363759

Genes overlaped by reference

  • ENSORLG00000028212 (NXPE)
In [433]:
locus = "HDRR_23:4344994-4363759"
print("Extended coordinates: {}:{}-{}".format(locus.split(":")[0], int(locus.split(":")[1].split("-")[0])-50000, int(locus.split(":")[1].split("-")[1])+50000))

path_df = pd.read_csv("./diverging_insertions/DNA_RNA_alternative_paths.tsv", sep="\t", index_col=0)
display(path_df[path_df["connected_coord"]==locus])

display(Image(f"./diverging_insertions/igv_plots/{locus}.png", width=1000))
display(Image(f"./diverging_insertions/bandage_plots/RNA_{locus}.png", width=1000))
Extended coordinates: HDRR_23:4294994-4413759
connected_coord connected_seg_start connected_seg_end Valid segments MIKK segments HNI segments HSOK segments segment_lengths Median identity Median samples_pass_dna Median samples_pass_rna segment_ids
34 HDRR_23:4344994-4363759 609578 609581 2 0 2 0 19061 0.298706 11.0 29.0 s784541,s784531
In [159]:
get_ref_alt_seq (
    left_ref_seg = "s609578",
    right_ref_seg = "s609581",
    alt_seg_str = "s1073563+, s1013541-",
    segment_fasta = "./graph_assembly/Oryzias_latipes_graph_segments.fa",
    flank_seq_len=50000,
    outdir="diverging_insertions/sequences",
    locus = "HDRR_23:4344994-4363759")
Extract left flank info
Extract right flank info
Extract reference path info
	Length of reference path 18765
Extract alternative path info
	Length of alternative path 26104
Write FASTA output
Write BED output

HDRR_18:24441731-24455814

Genes overlaped by reference

  • ENSORLG00000020600 (acox3)
In [434]:
locus = "HDRR_18:24441731-24455814"
print("Extended coordinates: {}:{}-{}".format(locus.split(":")[0], int(locus.split(":")[1].split("-")[0])-50000, int(locus.split(":")[1].split("-")[1])+50000))

path_df = pd.read_csv("./diverging_insertions/DNA_RNA_alternative_paths.tsv", sep="\t", index_col=0)
display(path_df[path_df["connected_coord"]==locus])

display(Image(f"./diverging_insertions/igv_plots/{locus}.png", width=1000))
display(Image(f"./diverging_insertions/bandage_plots/RNA_{locus}.png", width=1000))
Extended coordinates: HDRR_18:24391731-24505814
connected_coord connected_seg_start connected_seg_end Valid segments MIKK segments HNI segments HSOK segments segment_lengths Median identity Median samples_pass_dna Median samples_pass_rna segment_ids
19 HDRR_18:24441731-24455814 501739 501759 2 2 0 0 26104 0.158425 7.0 39.5 s1073563,s1013541
In [6]:
get_ref_alt_seq (
    left_ref_seg = "s501739",
    right_ref_seg = "s501759",
    alt_seg_str = "s1073563+, s1013541-",
    segment_fasta = "./graph_assembly/Oryzias_latipes_graph_segments.fa",
    flank_seq_len=50000,
    outdir="diverging_insertions/sequences",
    locus = "HDRR_18:24441731-24455814")
Extract left flank info
Extract right flank info
Extract reference path info
	Length of reference path 14083
Extract alternative path info
	Length of alternative path 26104
Write FASTA output
Write BED output

HDRR_21:27476540-27507741

Genes overlaped by reference

  • ENSORLG00000022858
  • parp4(ENSORLG00000018162)
In [436]:
locus = "HDRR_21:27476540-27507741"
print("Extended coordinates: {}:{}-{}".format(locus.split(":")[0], int(locus.split(":")[1].split("-")[0])-50000, int(locus.split(":")[1].split("-")[1])+50000))

path_df = pd.read_csv("./diverging_insertions/DNA_RNA_alternative_paths.tsv", sep="\t", index_col=0)
display(path_df[path_df["connected_coord"]==locus])

display(Image(f"./diverging_insertions/igv_plots/{locus}.png", width=1000))
display(Image(f"./diverging_insertions/bandage_plots/RNA_{locus}.png", width=1000))
Extended coordinates: HDRR_21:27426540-27557741
connected_coord connected_seg_start connected_seg_end Valid segments MIKK segments HNI segments HSOK segments segment_lengths Median identity Median samples_pass_dna Median samples_pass_rna segment_ids
32 HDRR_21:27476540-27507741 575078 575094 7 6 1 0 31567 0.249029 12.0 45.0 s933636,s933627,s933625,s776390,s933634,s933626,s933618
In [7]:
get_ref_alt_seq (
    left_ref_seg = "s575078",
    right_ref_seg = "s575094",
    alt_seg_str = "s776384+, s776385+, s933617+, s776386+, s776387+, s933618+, s776390+, s776391+, s933619+, s933620+, s933621+, s933622+, s933624+, s933625+, s933626+, s933627+, s933628+, s933629+, s933630+, s933631+, s933632+, s933633+, s933634+, s933635+, s933636+",
    segment_fasta = "./graph_assembly/Oryzias_latipes_graph_segments.fa",
    flank_seq_len=50000,
    outdir="diverging_insertions/sequences",
    locus = "HDRR_21:27476540-27507741")
Extract left flank info
Extract right flank info
Extract reference path info
	Length of reference path 31201
Extract alternative path info
	Length of alternative path 43751
Write FASTA output
Write BED output

HDRR_2:1761679-1776422

Genes overlaped by reference

  • ENSORLG00000022471 (CD48)
In [437]:
locus = "HDRR_2:1761679-1776422"
print("Extended coordinates: {}:{}-{}".format(locus.split(":")[0], int(locus.split(":")[1].split("-")[0])-50000, int(locus.split(":")[1].split("-")[1])+50000))

path_df = pd.read_csv("./diverging_insertions/DNA_RNA_alternative_paths.tsv", sep="\t", index_col=0)
display(path_df[path_df["connected_coord"]==locus])

display(Image(f"./diverging_insertions/igv_plots/{locus}.png", width=1000))
display(Image(f"./diverging_insertions/bandage_plots/RNA_{locus}.png", width=1000))
Extended coordinates: HDRR_2:1711679-1826422
connected_coord connected_seg_start connected_seg_end Valid segments MIKK segments HNI segments HSOK segments segment_lengths Median identity Median samples_pass_dna Median samples_pass_rna segment_ids
24 HDRR_2:1761679-1776422 37795 37806 2 0 2 0 20318 0.08887 11.5 22.0 s658979,s658983
In [8]:
get_ref_alt_seq (
    left_ref_seg = "s37795",
    right_ref_seg = "s37806",
    alt_seg_str = "s658979+, s658980+, s658981+, s979176-, s658983+",
    segment_fasta = "./graph_assembly/Oryzias_latipes_graph_segments.fa",
    flank_seq_len=50000,
    outdir="diverging_insertions/sequences",
    locus = "HDRR_2:1761679-1776422")
Extract left flank info
Extract right flank info
Extract reference path info
	Length of reference path 14743
Extract alternative path info
	Length of alternative path 21086
Write FASTA output
Write BED output

HDRR_18:23094225-23131585

Genes overlaped by reference

In [438]:
locus = "HDRR_18:23094225-23131585"
print("Extended coordinates: {}:{}-{}".format(locus.split(":")[0], int(locus.split(":")[1].split("-")[0])-50000, int(locus.split(":")[1].split("-")[1])+50000))

path_df = pd.read_csv("./diverging_insertions/DNA_RNA_alternative_paths.tsv", sep="\t", index_col=0)
display(path_df[path_df["connected_coord"]==locus])

display(Image(f"./diverging_insertions/igv_plots/{locus}.png", width=1000))
display(Image(f"./diverging_insertions/bandage_plots/RNA_{locus}.png", width=1000))
Extended coordinates: HDRR_18:23044225-23181585
connected_coord connected_seg_start connected_seg_end Valid segments MIKK segments HNI segments HSOK segments segment_lengths Median identity Median samples_pass_dna Median samples_pass_rna segment_ids
18 HDRR_18:23094225-23131585 501121 501134 3 3 0 0 16739 0.414943 7.0 50.0 s974666,s948392,s974668
In [9]:
get_ref_alt_seq (
    left_ref_seg = "s501121",
    right_ref_seg = "s501134",
    alt_seg_str = "s948397-, s948396-, s948395-, s974668-, s948393-, s948392-, s948391-, s948390-, s948389-, s974666-",
    segment_fasta = "./graph_assembly/Oryzias_latipes_graph_segments.fa",
    flank_seq_len=50000,
    outdir="diverging_insertions/sequences",
    locus = "HDRR_18:23094225-23131585")
Extract left flank info
Extract right flank info
Extract reference path info
	Length of reference path 37360
Extract alternative path info
	Length of alternative path 33110
Write FASTA output
Write BED output

HDRR_2:8837517-8853181 (No longer in the list)

Genes overlaped by reference

  • HDRR_2:ENSORLG00000022699 (intron)

Extended window to include whole intron

  • start= s42910
  • end = s42945
In [226]:
display(Image("./diverging_insertions/igv_plots/HDRR_2:8837517-8853181.png", width=1000))
display(Image("./diverging_insertions/bandage_plots/HDRR_2:8837517-8853181.png", width=1000))
In [227]:
# Reference path
ref_path = [f"s{i}+" for i in range(42910, 42946)]
ref_seq = seq_from_segments (segment_fasta="./references/Oryzias_latipes_graph_segments.fa", seg_list=ref_path)
print(",".join(ref_path))
print(f"Length of reference path: {len(ref_seq)}" )

# Alternative path
alt_path_str = "s42910+, s42911+, s976704-, s42913+, s976703-, s42915+, s931080-, s42916+, s931079-, s931078-, s931077-, s931076-, s42918+, s42919+, s42920+, s42921+, s42922+, s42923+, s42924+, s42925+, s976700-, s976699-, s1041532+, s996971-, s42928+, s42929+, s42930+, s42931+, s42932+, s42933+, s42934+, s42935+, s42936+, s931165-, s42938+, s42940+, s931164-, s976697-, s931162-, s976696-, s976695-, s976694-, s42944+"
alt_path = [i.strip() for i in alt_path_str.split(",")]
alt_seq = seq_from_segments (segment_fasta="./references/Oryzias_latipes_graph_segments.fa", seg_list=alt_path)
print(",".join(alt_path))
print(f"Length of alternative path: {len(alt_seq)}" )

insert_id="HDRR_2:8837517-8853181"
out_fn = f"diverging_insertions/sequences/{insert_id}.fa"
with open(out_fn, "w") as out_fa:
    out_fa.write(f">{insert_id}_reference\n{ref_seq}\n")
    out_fa.write(f">{insert_id}_alternative\n{alt_seq}\n")
s42910+,s42911+,s42912+,s42913+,s42914+,s42915+,s42916+,s42917+,s42918+,s42919+,s42920+,s42921+,s42922+,s42923+,s42924+,s42925+,s42926+,s42927+,s42928+,s42929+,s42930+,s42931+,s42932+,s42933+,s42934+,s42935+,s42936+,s42937+,s42938+,s42939+,s42940+,s42941+,s42942+,s42943+,s42944+,s42945+
Length of reference path: 71338
s42910+,s42911+,s976704-,s42913+,s976703-,s42915+,s931080-,s42916+,s931079-,s931078-,s931077-,s931076-,s42918+,s42919+,s42920+,s42921+,s42922+,s42923+,s42924+,s42925+,s976700-,s976699-,s1041532+,s996971-,s42928+,s42929+,s42930+,s42931+,s42932+,s42933+,s42934+,s42935+,s42936+,s931165-,s42938+,s42940+,s931164-,s976697-,s931162-,s976696-,s976695-,s976694-,s42944+
Length of alternative path: 64784

HDRR_5:5075836-5101956 (No longer in the list)

Genes overlaped by reference

  • ENSORLG00000024568
  • Wnk2 (ENSORLG00000026376)

Extended window to include genes

  • start= s126150 (HDRR_5:5012842-5015871)
  • end = s126191 (HDRR_5:5134589-5140183)
In [228]:
display(Image("./diverging_insertions/igv_plots/HDRR_5:5075836-5101956.png", width=1000))
display(Image("./diverging_insertions/bandage_plots/HDRR_5:5075836-5101956.png", width=1000))
In [229]:
# Reference path
ref_path = [f"s{i}+" for i in range(126150, 126192)]
ref_seq = seq_from_segments (segment_fasta="./references/Oryzias_latipes_graph_segments.fa", seg_list=ref_path)
print(",".join(ref_path))
print(f"Length of reference path: {len(ref_seq)}" )

# Alternative path
left = [f"s{i}+" for i in range(126150, 126152)]
right = [f"s{i}+" for i in range(126153, 126192)]
alt_path_str = "s126151+, s675762+, s675763+, s675764+, s675765+, s1029010+, s675767+, s1029011+, s675769+, s1050018-, s675772+, s1062580+, s1062581+, s1062582+, s675775+, s675776+, s1062583+, s675778+, s1062584+, s675780+, s675781+, s1062585+, s675783+, s1062586+, s675785+, s1096510-, s675787+, s1062587+, s675789+, s1062588+, s675791+, s1062589+, s675793+, s675794+, s675795+, s675796+, s675797+, s1062590+, s1062591+, s1096508-, s675802+, s126153+"
alt_path = [i.strip() for i in alt_path_str.split(",")]
alt_path = left+alt_path+right
alt_seq = seq_from_segments (segment_fasta="./references/Oryzias_latipes_graph_segments.fa", seg_list=alt_path)
print(",".join(alt_path))
print(f"Length of alternative path: {len(alt_seq)}" )

insert_id="HDRR_5:5075836-5101956"
out_fn = f"diverging_insertions/sequences/{insert_id}.fa"
with open(out_fn, "w") as out_fa:
    out_fa.write(f">{insert_id}_reference\n{ref_seq}\n")
    out_fa.write(f">{insert_id}_alternative\n{alt_seq}\n")
s126150+,s126151+,s126152+,s126153+,s126154+,s126155+,s126156+,s126157+,s126158+,s126159+,s126160+,s126161+,s126162+,s126163+,s126164+,s126165+,s126166+,s126167+,s126168+,s126169+,s126170+,s126171+,s126172+,s126173+,s126174+,s126175+,s126176+,s126177+,s126178+,s126179+,s126180+,s126181+,s126182+,s126183+,s126184+,s126185+,s126186+,s126187+,s126188+,s126189+,s126190+,s126191+
Length of reference path: 66812
s126150+,s126151+,s126151+,s675762+,s675763+,s675764+,s675765+,s1029010+,s675767+,s1029011+,s675769+,s1050018-,s675772+,s1062580+,s1062581+,s1062582+,s675775+,s675776+,s1062583+,s675778+,s1062584+,s675780+,s675781+,s1062585+,s675783+,s1062586+,s675785+,s1096510-,s675787+,s1062587+,s675789+,s1062588+,s675791+,s1062589+,s675793+,s675794+,s675795+,s675796+,s675797+,s1062590+,s1062591+,s1096508-,s675802+,s126153+,s126153+,s126154+,s126155+,s126156+,s126157+,s126158+,s126159+,s126160+,s126161+,s126162+,s126163+,s126164+,s126165+,s126166+,s126167+,s126168+,s126169+,s126170+,s126171+,s126172+,s126173+,s126174+,s126175+,s126176+,s126177+,s126178+,s126179+,s126180+,s126181+,s126182+,s126183+,s126184+,s126185+,s126186+,s126187+,s126188+,s126189+,s126190+,s126191+
Length of alternative path: 106221

Merge all bed and all fasta

In [11]:
fn_out = "diverging_insertions/sequences/merged.fa"
with open (fn_out, "w") as fa_out: 
    for fn_in in glob.glob("./diverging_insertions/sequences/*.fa"):
        with open(fn_in) as fa_in:
            fa_out.write(fa_in.read())
with pyfaidx.Fasta(fn_out) as fa:
    tot_len = len(fa)
    
fn_out = "diverging_insertions/sequences/merged_segments_annot.bed"
with open (fn_out, "w") as bed_out: 
    for fn_in in glob.glob("./diverging_insertions/sequences/HDRR_*.bed"):
        with open(fn_in) as bed_in:
            bed_out.write(bed_in.read())

Align RNA-Seq data to ref and alt paths

Make bed track

In [93]:
bed_fn = "diverging_insertions/sequences/merged_annot.bed"
fa_fn = "diverging_insertions/sequences/merged.fa"
extend = 50000

with open(bed_fn, "w") as bed_fp:
    with pyfaidx.Fasta(fa_fn) as fa_fp:
        for seq in fa_fp:
            start = extend
            end = len(seq)-extend
            bed_fp.write(f"{seq.name}\t{start}\t{end}\n")
            
cat(bed_fn)
HDRR_2:1761679-1776422_reference	50000	64743
HDRR_2:1761679-1776422_alternative	50000	71086
HDRR_23:4344994-4363759_reference	50000	68765
HDRR_23:4344994-4363759_alternative	50000	76104
HDRR_18:23094225-23131585_reference	50000	87360
HDRR_18:23094225-23131585_alternative	50000	83110
HDRR_21:27476540-27507741_reference	50000	81201
HDRR_21:27476540-27507741_alternative	50000	93751
HDRR_18:24441731-24455814_reference	50000	64083
HDRR_18:24441731-24455814_alternative	50000	76104

Make annotations

In [53]:
    def gff3_parser(fn, feature_type=[]):
        """ Parse a gff3 formated file"""
        all_features = []
        c=Counter()
        with open(fn) as fp: 
            for line in fp:
                if not line.startswith("#"):
                    line = line.strip().split("\t")
                    if feature_type and line[2] not in feature_type:
                        c["Invalid feature type"]+=1
                        continue
                    try:
                        d = OrderedDict(refid=line[0],source=line[1],ftype=line[2],start=int(line[3]),end=int(line[4]),score=line[5],strand=line[6],frame=line[7],attributes=line[8])
                        all_features.append(d)
                        c["Valid feature type"]+=1
                    except (IndexError, ValueError) as E:
                        c["Invalid feature line"]+=1
                
        # Convert in a dataframe and cleanup
        for i, j in c.items():
            print(f"{i}:{j}")
        
        df = pd.DataFrame(all_features)
        return df
In [95]:
coord_list = (
    "HDRR_2:1761679-1776422",
    "HDRR_23:4344994-4363759",
    "HDRR_18:23094225-23131585",
    "HDRR_21:27476540-27507741",
    "HDRR_18:24441731-24455814")
gff3_in_fn = "./references/Oryzias_latipes_HDRR_sorted.gff3"
gff3_out_fn = "diverging_insertions/sequences/merged_annot.gff3"
extend = 50000

print ("Inport Gff3 annotatons")
df = gff3_parser(fn=gff3_in_fn)

print ("Extract corresponding annotations")
coord_df_list = []
for coord in coord_list:
    chrom, start, end = supersplit(coord, ["-", ":"])
    ext_start = int(start)-extend
    ext_end = int(end)+extend
    
    coord_df = df[(df["refid"]==chrom)&(df["start"]>=ext_start)&(df["end"]<=ext_end)].copy()
    refid = f"{chrom}:{start}-{end}_reference"
    coord_df["refid"] = refid
    coord_df["start"] = coord_df["start"]-ext_start
    coord_df["end"] = coord_df["end"]-ext_start
    coord_df_list.append(coord_df)
    
all_coord_df = pd.concat(coord_df_list)
all_coord_df = all_coord_df.sort_values(["refid", "start", "end"])

print ("Write new annotations")
with open (gff3_out_fn, "w") as fp:
    # write gff header
    fp.write("##gff-version 3\n")
    for coord in coord_list:
        chrom, start, end = supersplit(coord, ["-", ":"])
        ext_end = int(end)+extend
        refid = f"{chrom}:{start}-{end}_reference"
        fp.write(f"##sequence-region\t{refid}\t1\t{ext_end}\n")
    for l in all_coord_df.itertuples(index=False):
        fp.write("\t".join([str(i) for i in l])+"\n")
        
head(gff3_out_fn, max_char_col=100, max_char_line=200)
Inport Gff3 annotatons
Extract corresponding annotations
Write new annotations
##gff-version 3
##sequence-region	HDRR_2:1761679-1776422_reference	1	1826422
##sequence-region	HDRR_23:4344994-4363759_reference	1	4413759
##sequence-region	HDRR_18:23094225-23131585_reference	1	23181585
##sequence-region	HDRR_21:27476540-27507741_reference	1	27557741
##sequence-region	HDRR_18:24441731-24455814_reference	1	24505814
HDRR_18:23094225-23131585_reference	ensembl	exon	5896	5898	.	+	.	Parent=transcript:ENSORLT00000026860;Name=ENSORLE00000312657;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=ENSORLE00000312...
HDRR_18:23094225-23131585_reference	ensembl	CDS	5896	5898	.	+	0	ID=CDS:ENSORLP00000035412;Parent=transcript:ENSORLT00000026860;protein_id=ENSORLP00000035412
HDRR_18:23094225-23131585_reference	ensembl	gene	5896	8212	.	+	.	ID=gene:ENSORLG00000026821;biotype=protein_coding;gene_id=ENSORLG00000026821;logic_name=ensembl;version=1
HDRR_18:23094225-23131585_reference	ensembl	mRNA	5896	8212	.	+	.	ID=transcript:ENSORLT00000026860;Parent=gene:ENSORLG00000026821;biotype=protein_coding;transcript_id=ENSORLT00000026860;version=1

Star Index

In [439]:
outdir = "diverging_insertions/star_index"
shutil.rmtree(outdir, ignore_errors=True)
mkdir (outdir)
Creating /hps/research1/birney/users/adrien/analyses/medaka_assembly_graph/diverging_insertions/star_index
Merge all fasta files and compute length of the SA pre-indexing string for STAR
In [442]:
fn_out = "diverging_insertions/sequences/merged.fa"
with open (fn_out, "w") as fa_out: 
    for fn_in in glob.glob("./diverging_insertions/sequences/*.fa"):
        with open(fn_in) as fa_in:
            fa_out.write(fa_in.read())
with pyfaidx.Fasta(fn_out) as fa:
    tot_len = len(fa)

print (f"Merge reference Total Length: {tot_len}")
len_idx_str = min(14, np.log2(total_len)/2 - 1)
print (f"Length of STAR index str: {len_idx_str}")
Merge reference Total Length: 1266307
Length of STAR index str: 8.473928317729879
Index
In [448]:
outdir = "diverging_insertions/star_index"
fasta_files = "diverging_insertions/sequences/merged.fa"
mem = 10000
threads = 20
dry = False

stdout = f"{outdir}/star_index.out"
stderr = f"{outdir}/star_index.err"

cmd = f"STAR --runMode genomeGenerate --runThreadN {threads} --genomeDir {outdir} --genomeFastaFiles {fasta_files} --genomeSAindexNbases 8"
bsub(cmd = cmd, conda="Star_v2.7.5", dry=dry, mem=mem, threads=threads, stdout_fp=stdout, stderr_fp=stderr)
bsub -M 10000 -R 'rusage[mem=10000]' -n 20 -oo diverging_insertions/star_index/star_index.out -eo diverging_insertions/star_index/star_index.err  "STAR --runMode genomeGenerate --runThreadN 20 --genomeDir diverging_insertions/star_index --genomeFastaFiles diverging_insertions/sequences/merged.fa --genomeSAindexNbases 8"
Job <6777830> is submitted to default queue <research-rh74>.
Out[448]:
'6777830'

Star Align RNA-Seq

In [458]:
outdir = "diverging_insertions/star_align"
shutil.rmtree(outdir, ignore_errors=True)
mkdir (outdir)
Creating /hps/research1/birney/users/adrien/analyses/medaka_assembly_graph/diverging_insertions/star_align
In [178]:
mem = 20000
threads = 25
dry = False

index_dir = "diverging_insertions/star_index"
outdir = "diverging_insertions/star_align"

info_fn = "/hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/Analysis_low_high/candidates/liver_candidate_rep1.tsv"
sample_d = OrderedDict()
with open(info_fn) as csvfile:
    reader = csv.DictReader(csvfile, delimiter='\t')
    for row in reader:
        sample_d[row["sanger_id"]] = row["sample_id"].replace("_", "-")

for sanger_id, sample_id in sample_d.items():
    R1 = f"/hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_{sanger_id}_1.fastq.gz"
    R2 = f"/hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_{sanger_id}_2.fastq.gz"
    prefix = os.path.join(outdir, f"{sanger_id}_{sample_id}")
    
    stdout = f"{prefix}_star_align.out"
    stderr = f"{prefix}_star_align.err"
    
    opt = "--outFilterType BySJout --outSJfilterCountTotalMin -1 10 10 10 --outSJfilterOverhangMin -1 15 15 15 --alignSJoverhangMin 10 --alignSJDBoverhangMin 3"
    cmd = f"STAR {opt} --readFilesCommand 'gunzip -c' --runMode alignReads --outSAMtype BAM SortedByCoordinate --limitBAMsortRAM 1000000000 --runThreadN {threads} --genomeDir {index_dir} --readFilesIn {R1} {R2} --outFileNamePrefix {prefix}"
    bsub(cmd = cmd, conda="Star_v2.7.5", dry=dry, mem=mem, threads=threads, stdout_fp=stdout, stderr_fp=stderr)
bsub -M 20000 -R 'rusage[mem=20000]' -n 25 -oo diverging_insertions/star_align/5093STDY7225690_104-1_star_align.out -eo diverging_insertions/star_align/5093STDY7225690_104-1_star_align.err  "STAR --outFilterType BySJout --outSJfilterCountTotalMin -1 10 10 10 --outSJfilterOverhangMin -1 15 15 15 --alignSJoverhangMin 10 --alignSJDBoverhangMin 3 --readFilesCommand 'gunzip -c' --runMode alignReads --outSAMtype BAM SortedByCoordinate --limitBAMsortRAM 1000000000 --runThreadN 25 --genomeDir diverging_insertions/star_index --readFilesIn /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7225690_1.fastq.gz /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7225690_2.fastq.gz --outFileNamePrefix diverging_insertions/star_align/5093STDY7225690_104-1"
Job <268829> is submitted to default queue <research-rh74>.
bsub -M 20000 -R 'rusage[mem=20000]' -n 25 -oo diverging_insertions/star_align/5093STDY7223970_106-1_star_align.out -eo diverging_insertions/star_align/5093STDY7223970_106-1_star_align.err  "STAR --outFilterType BySJout --outSJfilterCountTotalMin -1 10 10 10 --outSJfilterOverhangMin -1 15 15 15 --alignSJoverhangMin 10 --alignSJDBoverhangMin 3 --readFilesCommand 'gunzip -c' --runMode alignReads --outSAMtype BAM SortedByCoordinate --limitBAMsortRAM 1000000000 --runThreadN 25 --genomeDir diverging_insertions/star_index --readFilesIn /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7223970_1.fastq.gz /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7223970_2.fastq.gz --outFileNamePrefix diverging_insertions/star_align/5093STDY7223970_106-1"
Job <268833> is submitted to default queue <research-rh74>.
bsub -M 20000 -R 'rusage[mem=20000]' -n 25 -oo diverging_insertions/star_align/5093STDY7223802_106-2_star_align.out -eo diverging_insertions/star_align/5093STDY7223802_106-2_star_align.err  "STAR --outFilterType BySJout --outSJfilterCountTotalMin -1 10 10 10 --outSJfilterOverhangMin -1 15 15 15 --alignSJoverhangMin 10 --alignSJDBoverhangMin 3 --readFilesCommand 'gunzip -c' --runMode alignReads --outSAMtype BAM SortedByCoordinate --limitBAMsortRAM 1000000000 --runThreadN 25 --genomeDir diverging_insertions/star_index --readFilesIn /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7223802_1.fastq.gz /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7223802_2.fastq.gz --outFileNamePrefix diverging_insertions/star_align/5093STDY7223802_106-2"
Job <268837> is submitted to default queue <research-rh74>.
bsub -M 20000 -R 'rusage[mem=20000]' -n 25 -oo diverging_insertions/star_align/5093STDY7223314_10-1_star_align.out -eo diverging_insertions/star_align/5093STDY7223314_10-1_star_align.err  "STAR --outFilterType BySJout --outSJfilterCountTotalMin -1 10 10 10 --outSJfilterOverhangMin -1 15 15 15 --alignSJoverhangMin 10 --alignSJDBoverhangMin 3 --readFilesCommand 'gunzip -c' --runMode alignReads --outSAMtype BAM SortedByCoordinate --limitBAMsortRAM 1000000000 --runThreadN 25 --genomeDir diverging_insertions/star_index --readFilesIn /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7223314_1.fastq.gz /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7223314_2.fastq.gz --outFileNamePrefix diverging_insertions/star_align/5093STDY7223314_10-1"
Job <268845> is submitted to default queue <research-rh74>.
bsub -M 20000 -R 'rusage[mem=20000]' -n 25 -oo diverging_insertions/star_align/5093STDY7225386_117-2_star_align.out -eo diverging_insertions/star_align/5093STDY7225386_117-2_star_align.err  "STAR --outFilterType BySJout --outSJfilterCountTotalMin -1 10 10 10 --outSJfilterOverhangMin -1 15 15 15 --alignSJoverhangMin 10 --alignSJDBoverhangMin 3 --readFilesCommand 'gunzip -c' --runMode alignReads --outSAMtype BAM SortedByCoordinate --limitBAMsortRAM 1000000000 --runThreadN 25 --genomeDir diverging_insertions/star_index --readFilesIn /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7225386_1.fastq.gz /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7225386_2.fastq.gz --outFileNamePrefix diverging_insertions/star_align/5093STDY7225386_117-2"
Job <268851> is submitted to default queue <research-rh74>.
bsub -M 20000 -R 'rusage[mem=20000]' -n 25 -oo diverging_insertions/star_align/5093STDY7223490_11-1_star_align.out -eo diverging_insertions/star_align/5093STDY7223490_11-1_star_align.err  "STAR --outFilterType BySJout --outSJfilterCountTotalMin -1 10 10 10 --outSJfilterOverhangMin -1 15 15 15 --alignSJoverhangMin 10 --alignSJDBoverhangMin 3 --readFilesCommand 'gunzip -c' --runMode alignReads --outSAMtype BAM SortedByCoordinate --limitBAMsortRAM 1000000000 --runThreadN 25 --genomeDir diverging_insertions/star_index --readFilesIn /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7223490_1.fastq.gz /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7223490_2.fastq.gz --outFileNamePrefix diverging_insertions/star_align/5093STDY7223490_11-1"
Job <268860> is submitted to default queue <research-rh74>.
bsub -M 20000 -R 'rusage[mem=20000]' -n 25 -oo diverging_insertions/star_align/5093STDY7223322_11-2_star_align.out -eo diverging_insertions/star_align/5093STDY7223322_11-2_star_align.err  "STAR --outFilterType BySJout --outSJfilterCountTotalMin -1 10 10 10 --outSJfilterOverhangMin -1 15 15 15 --alignSJoverhangMin 10 --alignSJDBoverhangMin 3 --readFilesCommand 'gunzip -c' --runMode alignReads --outSAMtype BAM SortedByCoordinate --limitBAMsortRAM 1000000000 --runThreadN 25 --genomeDir diverging_insertions/star_index --readFilesIn /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7223322_1.fastq.gz /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7223322_2.fastq.gz --outFileNamePrefix diverging_insertions/star_align/5093STDY7223322_11-2"
Job <268868> is submitted to default queue <research-rh74>.
bsub -M 20000 -R 'rusage[mem=20000]' -n 25 -oo diverging_insertions/star_align/5093STDY7225626_125-1_star_align.out -eo diverging_insertions/star_align/5093STDY7225626_125-1_star_align.err  "STAR --outFilterType BySJout --outSJfilterCountTotalMin -1 10 10 10 --outSJfilterOverhangMin -1 15 15 15 --alignSJoverhangMin 10 --alignSJDBoverhangMin 3 --readFilesCommand 'gunzip -c' --runMode alignReads --outSAMtype BAM SortedByCoordinate --limitBAMsortRAM 1000000000 --runThreadN 25 --genomeDir diverging_insertions/star_index --readFilesIn /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7225626_1.fastq.gz /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7225626_2.fastq.gz --outFileNamePrefix diverging_insertions/star_align/5093STDY7225626_125-1"
Job <268871> is submitted to default queue <research-rh74>.
bsub -M 20000 -R 'rusage[mem=20000]' -n 25 -oo diverging_insertions/star_align/5093STDY7222810_129-1_star_align.out -eo diverging_insertions/star_align/5093STDY7222810_129-1_star_align.err  "STAR --outFilterType BySJout --outSJfilterCountTotalMin -1 10 10 10 --outSJfilterOverhangMin -1 15 15 15 --alignSJoverhangMin 10 --alignSJDBoverhangMin 3 --readFilesCommand 'gunzip -c' --runMode alignReads --outSAMtype BAM SortedByCoordinate --limitBAMsortRAM 1000000000 --runThreadN 25 --genomeDir diverging_insertions/star_index --readFilesIn /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7222810_1.fastq.gz /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7222810_2.fastq.gz --outFileNamePrefix diverging_insertions/star_align/5093STDY7222810_129-1"
Job <268878> is submitted to default queue <research-rh74>.
bsub -M 20000 -R 'rusage[mem=20000]' -n 25 -oo diverging_insertions/star_align/5093STDY7225682_132-4-1_star_align.out -eo diverging_insertions/star_align/5093STDY7225682_132-4-1_star_align.err  "STAR --outFilterType BySJout --outSJfilterCountTotalMin -1 10 10 10 --outSJfilterOverhangMin -1 15 15 15 --alignSJoverhangMin 10 --alignSJDBoverhangMin 3 --readFilesCommand 'gunzip -c' --runMode alignReads --outSAMtype BAM SortedByCoordinate --limitBAMsortRAM 1000000000 --runThreadN 25 --genomeDir diverging_insertions/star_index --readFilesIn /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7225682_1.fastq.gz /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7225682_2.fastq.gz --outFileNamePrefix diverging_insertions/star_align/5093STDY7225682_132-4-1"
Job <268886> is submitted to default queue <research-rh74>.
bsub -M 20000 -R 'rusage[mem=20000]' -n 25 -oo diverging_insertions/star_align/5093STDY7226002_132-5_star_align.out -eo diverging_insertions/star_align/5093STDY7226002_132-5_star_align.err  "STAR --outFilterType BySJout --outSJfilterCountTotalMin -1 10 10 10 --outSJfilterOverhangMin -1 15 15 15 --alignSJoverhangMin 10 --alignSJDBoverhangMin 3 --readFilesCommand 'gunzip -c' --runMode alignReads --outSAMtype BAM SortedByCoordinate --limitBAMsortRAM 1000000000 --runThreadN 25 --genomeDir diverging_insertions/star_index --readFilesIn /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7226002_1.fastq.gz /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7226002_2.fastq.gz --outFileNamePrefix diverging_insertions/star_align/5093STDY7226002_132-5"
Job <268889> is submitted to default queue <research-rh74>.
bsub -M 20000 -R 'rusage[mem=20000]' -n 25 -oo diverging_insertions/star_align/5093STDY7223074_133-2_star_align.out -eo diverging_insertions/star_align/5093STDY7223074_133-2_star_align.err  "STAR --outFilterType BySJout --outSJfilterCountTotalMin -1 10 10 10 --outSJfilterOverhangMin -1 15 15 15 --alignSJoverhangMin 10 --alignSJDBoverhangMin 3 --readFilesCommand 'gunzip -c' --runMode alignReads --outSAMtype BAM SortedByCoordinate --limitBAMsortRAM 1000000000 --runThreadN 25 --genomeDir diverging_insertions/star_index --readFilesIn /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7223074_1.fastq.gz /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7223074_2.fastq.gz --outFileNamePrefix diverging_insertions/star_align/5093STDY7223074_133-2"
Job <268890> is submitted to default queue <research-rh74>.
bsub -M 20000 -R 'rusage[mem=20000]' -n 25 -oo diverging_insertions/star_align/5093STDY7224026_134-1_star_align.out -eo diverging_insertions/star_align/5093STDY7224026_134-1_star_align.err  "STAR --outFilterType BySJout --outSJfilterCountTotalMin -1 10 10 10 --outSJfilterOverhangMin -1 15 15 15 --alignSJoverhangMin 10 --alignSJDBoverhangMin 3 --readFilesCommand 'gunzip -c' --runMode alignReads --outSAMtype BAM SortedByCoordinate --limitBAMsortRAM 1000000000 --runThreadN 25 --genomeDir diverging_insertions/star_index --readFilesIn /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7224026_1.fastq.gz /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7224026_2.fastq.gz --outFileNamePrefix diverging_insertions/star_align/5093STDY7224026_134-1"
Job <268891> is submitted to default queue <research-rh74>.
bsub -M 20000 -R 'rusage[mem=20000]' -n 25 -oo diverging_insertions/star_align/5093STDY7225466_135-2_star_align.out -eo diverging_insertions/star_align/5093STDY7225466_135-2_star_align.err  "STAR --outFilterType BySJout --outSJfilterCountTotalMin -1 10 10 10 --outSJfilterOverhangMin -1 15 15 15 --alignSJoverhangMin 10 --alignSJDBoverhangMin 3 --readFilesCommand 'gunzip -c' --runMode alignReads --outSAMtype BAM SortedByCoordinate --limitBAMsortRAM 1000000000 --runThreadN 25 --genomeDir diverging_insertions/star_index --readFilesIn /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7225466_1.fastq.gz /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7225466_2.fastq.gz --outFileNamePrefix diverging_insertions/star_align/5093STDY7225466_135-2"
Job <268900> is submitted to default queue <research-rh74>.
bsub -M 20000 -R 'rusage[mem=20000]' -n 25 -oo diverging_insertions/star_align/5093STDY7225594_137-4_star_align.out -eo diverging_insertions/star_align/5093STDY7225594_137-4_star_align.err  "STAR --outFilterType BySJout --outSJfilterCountTotalMin -1 10 10 10 --outSJfilterOverhangMin -1 15 15 15 --alignSJoverhangMin 10 --alignSJDBoverhangMin 3 --readFilesCommand 'gunzip -c' --runMode alignReads --outSAMtype BAM SortedByCoordinate --limitBAMsortRAM 1000000000 --runThreadN 25 --genomeDir diverging_insertions/star_index --readFilesIn /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7225594_1.fastq.gz /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7225594_2.fastq.gz --outFileNamePrefix diverging_insertions/star_align/5093STDY7225594_137-4"
Job <268909> is submitted to default queue <research-rh74>.
bsub -M 20000 -R 'rusage[mem=20000]' -n 25 -oo diverging_insertions/star_align/5093STDY7225306_139-4_star_align.out -eo diverging_insertions/star_align/5093STDY7225306_139-4_star_align.err  "STAR --outFilterType BySJout --outSJfilterCountTotalMin -1 10 10 10 --outSJfilterOverhangMin -1 15 15 15 --alignSJoverhangMin 10 --alignSJDBoverhangMin 3 --readFilesCommand 'gunzip -c' --runMode alignReads --outSAMtype BAM SortedByCoordinate --limitBAMsortRAM 1000000000 --runThreadN 25 --genomeDir diverging_insertions/star_index --readFilesIn /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7225306_1.fastq.gz /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7225306_2.fastq.gz --outFileNamePrefix diverging_insertions/star_align/5093STDY7225306_139-4"
Job <268913> is submitted to default queue <research-rh74>.
bsub -M 20000 -R 'rusage[mem=20000]' -n 25 -oo diverging_insertions/star_align/5093STDY7223514_13-2_star_align.out -eo diverging_insertions/star_align/5093STDY7223514_13-2_star_align.err  "STAR --outFilterType BySJout --outSJfilterCountTotalMin -1 10 10 10 --outSJfilterOverhangMin -1 15 15 15 --alignSJoverhangMin 10 --alignSJDBoverhangMin 3 --readFilesCommand 'gunzip -c' --runMode alignReads --outSAMtype BAM SortedByCoordinate --limitBAMsortRAM 1000000000 --runThreadN 25 --genomeDir diverging_insertions/star_index --readFilesIn /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7223514_1.fastq.gz /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7223514_2.fastq.gz --outFileNamePrefix diverging_insertions/star_align/5093STDY7223514_13-2"
Job <268925> is submitted to default queue <research-rh74>.
bsub -M 20000 -R 'rusage[mem=20000]' -n 25 -oo diverging_insertions/star_align/5093STDY7224058_140-1_star_align.out -eo diverging_insertions/star_align/5093STDY7224058_140-1_star_align.err  "STAR --outFilterType BySJout --outSJfilterCountTotalMin -1 10 10 10 --outSJfilterOverhangMin -1 15 15 15 --alignSJoverhangMin 10 --alignSJDBoverhangMin 3 --readFilesCommand 'gunzip -c' --runMode alignReads --outSAMtype BAM SortedByCoordinate --limitBAMsortRAM 1000000000 --runThreadN 25 --genomeDir diverging_insertions/star_index --readFilesIn /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7224058_1.fastq.gz /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7224058_2.fastq.gz --outFileNamePrefix diverging_insertions/star_align/5093STDY7224058_140-1"
Job <268930> is submitted to default queue <research-rh74>.
bsub -M 20000 -R 'rusage[mem=20000]' -n 25 -oo diverging_insertions/star_align/5093STDY7223642_140-3_star_align.out -eo diverging_insertions/star_align/5093STDY7223642_140-3_star_align.err  "STAR --outFilterType BySJout --outSJfilterCountTotalMin -1 10 10 10 --outSJfilterOverhangMin -1 15 15 15 --alignSJoverhangMin 10 --alignSJDBoverhangMin 3 --readFilesCommand 'gunzip -c' --runMode alignReads --outSAMtype BAM SortedByCoordinate --limitBAMsortRAM 1000000000 --runThreadN 25 --genomeDir diverging_insertions/star_index --readFilesIn /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7223642_1.fastq.gz /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7223642_2.fastq.gz --outFileNamePrefix diverging_insertions/star_align/5093STDY7223642_140-3"
Job <268933> is submitted to default queue <research-rh74>.
bsub -M 20000 -R 'rusage[mem=20000]' -n 25 -oo diverging_insertions/star_align/5093STDY7224130_141-3_star_align.out -eo diverging_insertions/star_align/5093STDY7224130_141-3_star_align.err  "STAR --outFilterType BySJout --outSJfilterCountTotalMin -1 10 10 10 --outSJfilterOverhangMin -1 15 15 15 --alignSJoverhangMin 10 --alignSJDBoverhangMin 3 --readFilesCommand 'gunzip -c' --runMode alignReads --outSAMtype BAM SortedByCoordinate --limitBAMsortRAM 1000000000 --runThreadN 25 --genomeDir diverging_insertions/star_index --readFilesIn /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7224130_1.fastq.gz /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7224130_2.fastq.gz --outFileNamePrefix diverging_insertions/star_align/5093STDY7224130_141-3"
Job <268935> is submitted to default queue <research-rh74>.
bsub -M 20000 -R 'rusage[mem=20000]' -n 25 -oo diverging_insertions/star_align/5093STDY7225794_14-1_star_align.out -eo diverging_insertions/star_align/5093STDY7225794_14-1_star_align.err  "STAR --outFilterType BySJout --outSJfilterCountTotalMin -1 10 10 10 --outSJfilterOverhangMin -1 15 15 15 --alignSJoverhangMin 10 --alignSJDBoverhangMin 3 --readFilesCommand 'gunzip -c' --runMode alignReads --outSAMtype BAM SortedByCoordinate --limitBAMsortRAM 1000000000 --runThreadN 25 --genomeDir diverging_insertions/star_index --readFilesIn /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7225794_1.fastq.gz /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7225794_2.fastq.gz --outFileNamePrefix diverging_insertions/star_align/5093STDY7225794_14-1"
Job <268945> is submitted to default queue <research-rh74>.
bsub -M 20000 -R 'rusage[mem=20000]' -n 25 -oo diverging_insertions/star_align/5093STDY7223994_15-1_star_align.out -eo diverging_insertions/star_align/5093STDY7223994_15-1_star_align.err  "STAR --outFilterType BySJout --outSJfilterCountTotalMin -1 10 10 10 --outSJfilterOverhangMin -1 15 15 15 --alignSJoverhangMin 10 --alignSJDBoverhangMin 3 --readFilesCommand 'gunzip -c' --runMode alignReads --outSAMtype BAM SortedByCoordinate --limitBAMsortRAM 1000000000 --runThreadN 25 --genomeDir diverging_insertions/star_index --readFilesIn /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7223994_1.fastq.gz /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7223994_2.fastq.gz --outFileNamePrefix diverging_insertions/star_align/5093STDY7223994_15-1"
Job <268957> is submitted to default queue <research-rh74>.
bsub -M 20000 -R 'rusage[mem=20000]' -n 25 -oo diverging_insertions/star_align/5093STDY7223042_17-1_star_align.out -eo diverging_insertions/star_align/5093STDY7223042_17-1_star_align.err  "STAR --outFilterType BySJout --outSJfilterCountTotalMin -1 10 10 10 --outSJfilterOverhangMin -1 15 15 15 --alignSJoverhangMin 10 --alignSJDBoverhangMin 3 --readFilesCommand 'gunzip -c' --runMode alignReads --outSAMtype BAM SortedByCoordinate --limitBAMsortRAM 1000000000 --runThreadN 25 --genomeDir diverging_insertions/star_index --readFilesIn /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7223042_1.fastq.gz /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7223042_2.fastq.gz --outFileNamePrefix diverging_insertions/star_align/5093STDY7223042_17-1"
Job <268970> is submitted to default queue <research-rh74>.
bsub -M 20000 -R 'rusage[mem=20000]' -n 25 -oo diverging_insertions/star_align/5093STDY7226010_20-1_star_align.out -eo diverging_insertions/star_align/5093STDY7226010_20-1_star_align.err  "STAR --outFilterType BySJout --outSJfilterCountTotalMin -1 10 10 10 --outSJfilterOverhangMin -1 15 15 15 --alignSJoverhangMin 10 --alignSJDBoverhangMin 3 --readFilesCommand 'gunzip -c' --runMode alignReads --outSAMtype BAM SortedByCoordinate --limitBAMsortRAM 1000000000 --runThreadN 25 --genomeDir diverging_insertions/star_index --readFilesIn /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7226010_1.fastq.gz /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7226010_2.fastq.gz --outFileNamePrefix diverging_insertions/star_align/5093STDY7226010_20-1"
Job <268975> is submitted to default queue <research-rh74>.
bsub -M 20000 -R 'rusage[mem=20000]' -n 25 -oo diverging_insertions/star_align/5093STDY7223194_21-2_star_align.out -eo diverging_insertions/star_align/5093STDY7223194_21-2_star_align.err  "STAR --outFilterType BySJout --outSJfilterCountTotalMin -1 10 10 10 --outSJfilterOverhangMin -1 15 15 15 --alignSJoverhangMin 10 --alignSJDBoverhangMin 3 --readFilesCommand 'gunzip -c' --runMode alignReads --outSAMtype BAM SortedByCoordinate --limitBAMsortRAM 1000000000 --runThreadN 25 --genomeDir diverging_insertions/star_index --readFilesIn /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7223194_1.fastq.gz /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7223194_2.fastq.gz --outFileNamePrefix diverging_insertions/star_align/5093STDY7223194_21-2"
Job <268977> is submitted to default queue <research-rh74>.
bsub -M 20000 -R 'rusage[mem=20000]' -n 25 -oo diverging_insertions/star_align/5093STDY7225826_23-1_star_align.out -eo diverging_insertions/star_align/5093STDY7225826_23-1_star_align.err  "STAR --outFilterType BySJout --outSJfilterCountTotalMin -1 10 10 10 --outSJfilterOverhangMin -1 15 15 15 --alignSJoverhangMin 10 --alignSJDBoverhangMin 3 --readFilesCommand 'gunzip -c' --runMode alignReads --outSAMtype BAM SortedByCoordinate --limitBAMsortRAM 1000000000 --runThreadN 25 --genomeDir diverging_insertions/star_index --readFilesIn /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7225826_1.fastq.gz /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7225826_2.fastq.gz --outFileNamePrefix diverging_insertions/star_align/5093STDY7225826_23-1"
Job <268982> is submitted to default queue <research-rh74>.
bsub -M 20000 -R 'rusage[mem=20000]' -n 25 -oo diverging_insertions/star_align/5093STDY7223714_30-1_star_align.out -eo diverging_insertions/star_align/5093STDY7223714_30-1_star_align.err  "STAR --outFilterType BySJout --outSJfilterCountTotalMin -1 10 10 10 --outSJfilterOverhangMin -1 15 15 15 --alignSJoverhangMin 10 --alignSJDBoverhangMin 3 --readFilesCommand 'gunzip -c' --runMode alignReads --outSAMtype BAM SortedByCoordinate --limitBAMsortRAM 1000000000 --runThreadN 25 --genomeDir diverging_insertions/star_index --readFilesIn /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7223714_1.fastq.gz /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7223714_2.fastq.gz --outFileNamePrefix diverging_insertions/star_align/5093STDY7223714_30-1"
Job <268996> is submitted to default queue <research-rh74>.
bsub -M 20000 -R 'rusage[mem=20000]' -n 25 -oo diverging_insertions/star_align/5093STDY7223234_32-2_star_align.out -eo diverging_insertions/star_align/5093STDY7223234_32-2_star_align.err  "STAR --outFilterType BySJout --outSJfilterCountTotalMin -1 10 10 10 --outSJfilterOverhangMin -1 15 15 15 --alignSJoverhangMin 10 --alignSJDBoverhangMin 3 --readFilesCommand 'gunzip -c' --runMode alignReads --outSAMtype BAM SortedByCoordinate --limitBAMsortRAM 1000000000 --runThreadN 25 --genomeDir diverging_insertions/star_index --readFilesIn /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7223234_1.fastq.gz /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7223234_2.fastq.gz --outFileNamePrefix diverging_insertions/star_align/5093STDY7223234_32-2"
Job <269008> is submitted to default queue <research-rh74>.
bsub -M 20000 -R 'rusage[mem=20000]' -n 25 -oo diverging_insertions/star_align/5093STDY7225922_39-1_star_align.out -eo diverging_insertions/star_align/5093STDY7225922_39-1_star_align.err  "STAR --outFilterType BySJout --outSJfilterCountTotalMin -1 10 10 10 --outSJfilterOverhangMin -1 15 15 15 --alignSJoverhangMin 10 --alignSJDBoverhangMin 3 --readFilesCommand 'gunzip -c' --runMode alignReads --outSAMtype BAM SortedByCoordinate --limitBAMsortRAM 1000000000 --runThreadN 25 --genomeDir diverging_insertions/star_index --readFilesIn /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7225922_1.fastq.gz /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7225922_2.fastq.gz --outFileNamePrefix diverging_insertions/star_align/5093STDY7225922_39-1"
Job <269015> is submitted to default queue <research-rh74>.
bsub -M 20000 -R 'rusage[mem=20000]' -n 25 -oo diverging_insertions/star_align/5093STDY7225762_40-1_star_align.out -eo diverging_insertions/star_align/5093STDY7225762_40-1_star_align.err  "STAR --outFilterType BySJout --outSJfilterCountTotalMin -1 10 10 10 --outSJfilterOverhangMin -1 15 15 15 --alignSJoverhangMin 10 --alignSJDBoverhangMin 3 --readFilesCommand 'gunzip -c' --runMode alignReads --outSAMtype BAM SortedByCoordinate --limitBAMsortRAM 1000000000 --runThreadN 25 --genomeDir diverging_insertions/star_index --readFilesIn /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7225762_1.fastq.gz /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7225762_2.fastq.gz --outFileNamePrefix diverging_insertions/star_align/5093STDY7225762_40-1"
Job <269022> is submitted to default queue <research-rh74>.
bsub -M 20000 -R 'rusage[mem=20000]' -n 25 -oo diverging_insertions/star_align/5093STDY7225858_40-2_star_align.out -eo diverging_insertions/star_align/5093STDY7225858_40-2_star_align.err  "STAR --outFilterType BySJout --outSJfilterCountTotalMin -1 10 10 10 --outSJfilterOverhangMin -1 15 15 15 --alignSJoverhangMin 10 --alignSJDBoverhangMin 3 --readFilesCommand 'gunzip -c' --runMode alignReads --outSAMtype BAM SortedByCoordinate --limitBAMsortRAM 1000000000 --runThreadN 25 --genomeDir diverging_insertions/star_index --readFilesIn /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7225858_1.fastq.gz /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7225858_2.fastq.gz --outFileNamePrefix diverging_insertions/star_align/5093STDY7225858_40-2"
Job <269030> is submitted to default queue <research-rh74>.
bsub -M 20000 -R 'rusage[mem=20000]' -n 25 -oo diverging_insertions/star_align/5093STDY7223458_49-1_star_align.out -eo diverging_insertions/star_align/5093STDY7223458_49-1_star_align.err  "STAR --outFilterType BySJout --outSJfilterCountTotalMin -1 10 10 10 --outSJfilterOverhangMin -1 15 15 15 --alignSJoverhangMin 10 --alignSJDBoverhangMin 3 --readFilesCommand 'gunzip -c' --runMode alignReads --outSAMtype BAM SortedByCoordinate --limitBAMsortRAM 1000000000 --runThreadN 25 --genomeDir diverging_insertions/star_index --readFilesIn /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7223458_1.fastq.gz /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7223458_2.fastq.gz --outFileNamePrefix diverging_insertions/star_align/5093STDY7223458_49-1"
Job <269037> is submitted to default queue <research-rh74>.
bsub -M 20000 -R 'rusage[mem=20000]' -n 25 -oo diverging_insertions/star_align/5093STDY7225434_4-1_star_align.out -eo diverging_insertions/star_align/5093STDY7225434_4-1_star_align.err  "STAR --outFilterType BySJout --outSJfilterCountTotalMin -1 10 10 10 --outSJfilterOverhangMin -1 15 15 15 --alignSJoverhangMin 10 --alignSJDBoverhangMin 3 --readFilesCommand 'gunzip -c' --runMode alignReads --outSAMtype BAM SortedByCoordinate --limitBAMsortRAM 1000000000 --runThreadN 25 --genomeDir diverging_insertions/star_index --readFilesIn /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7225434_1.fastq.gz /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7225434_2.fastq.gz --outFileNamePrefix diverging_insertions/star_align/5093STDY7225434_4-1"
Job <269045> is submitted to default queue <research-rh74>.
bsub -M 20000 -R 'rusage[mem=20000]' -n 25 -oo diverging_insertions/star_align/5093STDY7225402_4-2_star_align.out -eo diverging_insertions/star_align/5093STDY7225402_4-2_star_align.err  "STAR --outFilterType BySJout --outSJfilterCountTotalMin -1 10 10 10 --outSJfilterOverhangMin -1 15 15 15 --alignSJoverhangMin 10 --alignSJDBoverhangMin 3 --readFilesCommand 'gunzip -c' --runMode alignReads --outSAMtype BAM SortedByCoordinate --limitBAMsortRAM 1000000000 --runThreadN 25 --genomeDir diverging_insertions/star_index --readFilesIn /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7225402_1.fastq.gz /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7225402_2.fastq.gz --outFileNamePrefix diverging_insertions/star_align/5093STDY7225402_4-2"
Job <269063> is submitted to default queue <research-rh74>.
bsub -M 20000 -R 'rusage[mem=20000]' -n 25 -oo diverging_insertions/star_align/5093STDY7223578_50-2_star_align.out -eo diverging_insertions/star_align/5093STDY7223578_50-2_star_align.err  "STAR --outFilterType BySJout --outSJfilterCountTotalMin -1 10 10 10 --outSJfilterOverhangMin -1 15 15 15 --alignSJoverhangMin 10 --alignSJDBoverhangMin 3 --readFilesCommand 'gunzip -c' --runMode alignReads --outSAMtype BAM SortedByCoordinate --limitBAMsortRAM 1000000000 --runThreadN 25 --genomeDir diverging_insertions/star_index --readFilesIn /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7223578_1.fastq.gz /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7223578_2.fastq.gz --outFileNamePrefix diverging_insertions/star_align/5093STDY7223578_50-2"
Job <269096> is submitted to default queue <research-rh74>.
bsub -M 20000 -R 'rusage[mem=20000]' -n 25 -oo diverging_insertions/star_align/5093STDY7223138_55-2_star_align.out -eo diverging_insertions/star_align/5093STDY7223138_55-2_star_align.err  "STAR --outFilterType BySJout --outSJfilterCountTotalMin -1 10 10 10 --outSJfilterOverhangMin -1 15 15 15 --alignSJoverhangMin 10 --alignSJDBoverhangMin 3 --readFilesCommand 'gunzip -c' --runMode alignReads --outSAMtype BAM SortedByCoordinate --limitBAMsortRAM 1000000000 --runThreadN 25 --genomeDir diverging_insertions/star_index --readFilesIn /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7223138_1.fastq.gz /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7223138_2.fastq.gz --outFileNamePrefix diverging_insertions/star_align/5093STDY7223138_55-2"
Job <269130> is submitted to default queue <research-rh74>.
bsub -M 20000 -R 'rusage[mem=20000]' -n 25 -oo diverging_insertions/star_align/5093STDY7223002_59-1_star_align.out -eo diverging_insertions/star_align/5093STDY7223002_59-1_star_align.err  "STAR --outFilterType BySJout --outSJfilterCountTotalMin -1 10 10 10 --outSJfilterOverhangMin -1 15 15 15 --alignSJoverhangMin 10 --alignSJDBoverhangMin 3 --readFilesCommand 'gunzip -c' --runMode alignReads --outSAMtype BAM SortedByCoordinate --limitBAMsortRAM 1000000000 --runThreadN 25 --genomeDir diverging_insertions/star_index --readFilesIn /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7223002_1.fastq.gz /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7223002_2.fastq.gz --outFileNamePrefix diverging_insertions/star_align/5093STDY7223002_59-1"
Job <269158> is submitted to default queue <research-rh74>.
bsub -M 20000 -R 'rusage[mem=20000]' -n 25 -oo diverging_insertions/star_align/5093STDY7223386_59-2_star_align.out -eo diverging_insertions/star_align/5093STDY7223386_59-2_star_align.err  "STAR --outFilterType BySJout --outSJfilterCountTotalMin -1 10 10 10 --outSJfilterOverhangMin -1 15 15 15 --alignSJoverhangMin 10 --alignSJDBoverhangMin 3 --readFilesCommand 'gunzip -c' --runMode alignReads --outSAMtype BAM SortedByCoordinate --limitBAMsortRAM 1000000000 --runThreadN 25 --genomeDir diverging_insertions/star_index --readFilesIn /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7223386_1.fastq.gz /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7223386_2.fastq.gz --outFileNamePrefix diverging_insertions/star_align/5093STDY7223386_59-2"
Job <269176> is submitted to default queue <research-rh74>.
bsub -M 20000 -R 'rusage[mem=20000]' -n 25 -oo diverging_insertions/star_align/5093STDY7223778_61-1_star_align.out -eo diverging_insertions/star_align/5093STDY7223778_61-1_star_align.err  "STAR --outFilterType BySJout --outSJfilterCountTotalMin -1 10 10 10 --outSJfilterOverhangMin -1 15 15 15 --alignSJoverhangMin 10 --alignSJDBoverhangMin 3 --readFilesCommand 'gunzip -c' --runMode alignReads --outSAMtype BAM SortedByCoordinate --limitBAMsortRAM 1000000000 --runThreadN 25 --genomeDir diverging_insertions/star_index --readFilesIn /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7223778_1.fastq.gz /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7223778_2.fastq.gz --outFileNamePrefix diverging_insertions/star_align/5093STDY7223778_61-1"
Job <269190> is submitted to default queue <research-rh74>.
bsub -M 20000 -R 'rusage[mem=20000]' -n 25 -oo diverging_insertions/star_align/5093STDY7222970_62-2_star_align.out -eo diverging_insertions/star_align/5093STDY7222970_62-2_star_align.err  "STAR --outFilterType BySJout --outSJfilterCountTotalMin -1 10 10 10 --outSJfilterOverhangMin -1 15 15 15 --alignSJoverhangMin 10 --alignSJDBoverhangMin 3 --readFilesCommand 'gunzip -c' --runMode alignReads --outSAMtype BAM SortedByCoordinate --limitBAMsortRAM 1000000000 --runThreadN 25 --genomeDir diverging_insertions/star_index --readFilesIn /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7222970_1.fastq.gz /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7222970_2.fastq.gz --outFileNamePrefix diverging_insertions/star_align/5093STDY7222970_62-2"
Job <269217> is submitted to default queue <research-rh74>.
bsub -M 20000 -R 'rusage[mem=20000]' -n 25 -oo diverging_insertions/star_align/5093STDY7223122_69-1_star_align.out -eo diverging_insertions/star_align/5093STDY7223122_69-1_star_align.err  "STAR --outFilterType BySJout --outSJfilterCountTotalMin -1 10 10 10 --outSJfilterOverhangMin -1 15 15 15 --alignSJoverhangMin 10 --alignSJDBoverhangMin 3 --readFilesCommand 'gunzip -c' --runMode alignReads --outSAMtype BAM SortedByCoordinate --limitBAMsortRAM 1000000000 --runThreadN 25 --genomeDir diverging_insertions/star_index --readFilesIn /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7223122_1.fastq.gz /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7223122_2.fastq.gz --outFileNamePrefix diverging_insertions/star_align/5093STDY7223122_69-1"
Job <269239> is submitted to default queue <research-rh74>.
bsub -M 20000 -R 'rusage[mem=20000]' -n 25 -oo diverging_insertions/star_align/5093STDY7223554_71-1_star_align.out -eo diverging_insertions/star_align/5093STDY7223554_71-1_star_align.err  "STAR --outFilterType BySJout --outSJfilterCountTotalMin -1 10 10 10 --outSJfilterOverhangMin -1 15 15 15 --alignSJoverhangMin 10 --alignSJDBoverhangMin 3 --readFilesCommand 'gunzip -c' --runMode alignReads --outSAMtype BAM SortedByCoordinate --limitBAMsortRAM 1000000000 --runThreadN 25 --genomeDir diverging_insertions/star_index --readFilesIn /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7223554_1.fastq.gz /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7223554_2.fastq.gz --outFileNamePrefix diverging_insertions/star_align/5093STDY7223554_71-1"
Job <269267> is submitted to default queue <research-rh74>.
bsub -M 20000 -R 'rusage[mem=20000]' -n 25 -oo diverging_insertions/star_align/5093STDY7223610_72-2_star_align.out -eo diverging_insertions/star_align/5093STDY7223610_72-2_star_align.err  "STAR --outFilterType BySJout --outSJfilterCountTotalMin -1 10 10 10 --outSJfilterOverhangMin -1 15 15 15 --alignSJoverhangMin 10 --alignSJDBoverhangMin 3 --readFilesCommand 'gunzip -c' --runMode alignReads --outSAMtype BAM SortedByCoordinate --limitBAMsortRAM 1000000000 --runThreadN 25 --genomeDir diverging_insertions/star_index --readFilesIn /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7223610_1.fastq.gz /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7223610_2.fastq.gz --outFileNamePrefix diverging_insertions/star_align/5093STDY7223610_72-2"
Job <269281> is submitted to default queue <research-rh74>.
bsub -M 20000 -R 'rusage[mem=20000]' -n 25 -oo diverging_insertions/star_align/5093STDY7225722_79-2_star_align.out -eo diverging_insertions/star_align/5093STDY7225722_79-2_star_align.err  "STAR --outFilterType BySJout --outSJfilterCountTotalMin -1 10 10 10 --outSJfilterOverhangMin -1 15 15 15 --alignSJoverhangMin 10 --alignSJDBoverhangMin 3 --readFilesCommand 'gunzip -c' --runMode alignReads --outSAMtype BAM SortedByCoordinate --limitBAMsortRAM 1000000000 --runThreadN 25 --genomeDir diverging_insertions/star_index --readFilesIn /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7225722_1.fastq.gz /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7225722_2.fastq.gz --outFileNamePrefix diverging_insertions/star_align/5093STDY7225722_79-2"
Job <269297> is submitted to default queue <research-rh74>.
bsub -M 20000 -R 'rusage[mem=20000]' -n 25 -oo diverging_insertions/star_align/5093STDY7223738_80-1_star_align.out -eo diverging_insertions/star_align/5093STDY7223738_80-1_star_align.err  "STAR --outFilterType BySJout --outSJfilterCountTotalMin -1 10 10 10 --outSJfilterOverhangMin -1 15 15 15 --alignSJoverhangMin 10 --alignSJDBoverhangMin 3 --readFilesCommand 'gunzip -c' --runMode alignReads --outSAMtype BAM SortedByCoordinate --limitBAMsortRAM 1000000000 --runThreadN 25 --genomeDir diverging_insertions/star_index --readFilesIn /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7223738_1.fastq.gz /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7223738_2.fastq.gz --outFileNamePrefix diverging_insertions/star_align/5093STDY7223738_80-1"
Job <269300> is submitted to default queue <research-rh74>.
bsub -M 20000 -R 'rusage[mem=20000]' -n 25 -oo diverging_insertions/star_align/5093STDY7222946_84-2_star_align.out -eo diverging_insertions/star_align/5093STDY7222946_84-2_star_align.err  "STAR --outFilterType BySJout --outSJfilterCountTotalMin -1 10 10 10 --outSJfilterOverhangMin -1 15 15 15 --alignSJoverhangMin 10 --alignSJDBoverhangMin 3 --readFilesCommand 'gunzip -c' --runMode alignReads --outSAMtype BAM SortedByCoordinate --limitBAMsortRAM 1000000000 --runThreadN 25 --genomeDir diverging_insertions/star_index --readFilesIn /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7222946_1.fastq.gz /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7222946_2.fastq.gz --outFileNamePrefix diverging_insertions/star_align/5093STDY7222946_84-2"
Job <269308> is submitted to default queue <research-rh74>.
bsub -M 20000 -R 'rusage[mem=20000]' -n 25 -oo diverging_insertions/star_align/5093STDY7223834_8-2_star_align.out -eo diverging_insertions/star_align/5093STDY7223834_8-2_star_align.err  "STAR --outFilterType BySJout --outSJfilterCountTotalMin -1 10 10 10 --outSJfilterOverhangMin -1 15 15 15 --alignSJoverhangMin 10 --alignSJDBoverhangMin 3 --readFilesCommand 'gunzip -c' --runMode alignReads --outSAMtype BAM SortedByCoordinate --limitBAMsortRAM 1000000000 --runThreadN 25 --genomeDir diverging_insertions/star_index --readFilesIn /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7223834_1.fastq.gz /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7223834_2.fastq.gz --outFileNamePrefix diverging_insertions/star_align/5093STDY7223834_8-2"
Job <269319> is submitted to default queue <research-rh74>.
bsub -M 20000 -R 'rusage[mem=20000]' -n 25 -oo diverging_insertions/star_align/5093STDY7225890_91-1_star_align.out -eo diverging_insertions/star_align/5093STDY7225890_91-1_star_align.err  "STAR --outFilterType BySJout --outSJfilterCountTotalMin -1 10 10 10 --outSJfilterOverhangMin -1 15 15 15 --alignSJoverhangMin 10 --alignSJDBoverhangMin 3 --readFilesCommand 'gunzip -c' --runMode alignReads --outSAMtype BAM SortedByCoordinate --limitBAMsortRAM 1000000000 --runThreadN 25 --genomeDir diverging_insertions/star_index --readFilesIn /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7225890_1.fastq.gz /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7225890_2.fastq.gz --outFileNamePrefix diverging_insertions/star_align/5093STDY7225890_91-1"
Job <269326> is submitted to default queue <research-rh74>.
bsub -M 20000 -R 'rusage[mem=20000]' -n 25 -oo diverging_insertions/star_align/5093STDY7223906_94-1_star_align.out -eo diverging_insertions/star_align/5093STDY7223906_94-1_star_align.err  "STAR --outFilterType BySJout --outSJfilterCountTotalMin -1 10 10 10 --outSJfilterOverhangMin -1 15 15 15 --alignSJoverhangMin 10 --alignSJDBoverhangMin 3 --readFilesCommand 'gunzip -c' --runMode alignReads --outSAMtype BAM SortedByCoordinate --limitBAMsortRAM 1000000000 --runThreadN 25 --genomeDir diverging_insertions/star_index --readFilesIn /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7223906_1.fastq.gz /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7223906_2.fastq.gz --outFileNamePrefix diverging_insertions/star_align/5093STDY7223906_94-1"
Job <269336> is submitted to default queue <research-rh74>.
bsub -M 20000 -R 'rusage[mem=20000]' -n 25 -oo diverging_insertions/star_align/5093STDY7222906_95-1_star_align.out -eo diverging_insertions/star_align/5093STDY7222906_95-1_star_align.err  "STAR --outFilterType BySJout --outSJfilterCountTotalMin -1 10 10 10 --outSJfilterOverhangMin -1 15 15 15 --alignSJoverhangMin 10 --alignSJDBoverhangMin 3 --readFilesCommand 'gunzip -c' --runMode alignReads --outSAMtype BAM SortedByCoordinate --limitBAMsortRAM 1000000000 --runThreadN 25 --genomeDir diverging_insertions/star_index --readFilesIn /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7222906_1.fastq.gz /hps/research1/birney/users/adrien/analyses/medaka_RNA_illumina/Indigene_pilot/RNA_pipeline2/results/input/fastp/high_5093STDY7222906_2.fastq.gz --outFileNamePrefix diverging_insertions/star_align/5093STDY7222906_95-1"
Job <269352> is submitted to default queue <research-rh74>.

index and coverage for IGV

In [4]:
mem = 10000
threads = 10
fai = "./diverging_insertions/sequences/merged.fa.fai"
dry = False

for bam in glob.glob("./diverging_insertions/star_align/*.out.bam"):
    bsub (cmd=f"samtools index {bam}", conda="samtools", dry=dry, mem=mem, threads=threads)
    bsub (cmd=f"igvtools count -z 1 -w 10 -f median {bam} {bam}.tdf {fai}", conda="IGV", dry=dry, mem=mem, threads=threads)
    # bsub (cmd=f"igvtools count --minMapQuality 10 -z 1 -w 10 -f median {bam} {bam}_mapq10.tdf {fai}", conda="IGV", dry=dry, mem=mem, threads=threads)
    # bsub (cmd=f"igvtools count --minMapQuality 30 -z 1 -w 10 -f median {bam} {bam}_mapq30.tdf {fai}", conda="IGV", dry=dry, mem=mem, threads=threads)
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 10 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7222906_95-1Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7222906_95-1Aligned.sortedByCoord.out.bam_mapq10.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <377262> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 30 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7222906_95-1Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7222906_95-1Aligned.sortedByCoord.out.bam_mapq30.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <377269> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 10 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7223906_94-1Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7223906_94-1Aligned.sortedByCoord.out.bam_mapq10.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <377274> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 30 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7223906_94-1Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7223906_94-1Aligned.sortedByCoord.out.bam_mapq30.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <377277> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 10 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7225890_91-1Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7225890_91-1Aligned.sortedByCoord.out.bam_mapq10.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <377282> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 30 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7225890_91-1Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7225890_91-1Aligned.sortedByCoord.out.bam_mapq30.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <377284> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 10 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7223834_8-2Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7223834_8-2Aligned.sortedByCoord.out.bam_mapq10.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <377289> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 30 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7223834_8-2Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7223834_8-2Aligned.sortedByCoord.out.bam_mapq30.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <377292> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 10 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7223610_72-2Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7223610_72-2Aligned.sortedByCoord.out.bam_mapq10.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <377298> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 30 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7223610_72-2Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7223610_72-2Aligned.sortedByCoord.out.bam_mapq30.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <377303> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 10 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7223122_69-1Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7223122_69-1Aligned.sortedByCoord.out.bam_mapq10.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <377308> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 30 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7223122_69-1Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7223122_69-1Aligned.sortedByCoord.out.bam_mapq30.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <377324> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 10 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7223554_71-1Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7223554_71-1Aligned.sortedByCoord.out.bam_mapq10.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <377331> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 30 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7223554_71-1Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7223554_71-1Aligned.sortedByCoord.out.bam_mapq30.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <377336> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 10 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7223738_80-1Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7223738_80-1Aligned.sortedByCoord.out.bam_mapq10.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <377342> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 30 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7223738_80-1Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7223738_80-1Aligned.sortedByCoord.out.bam_mapq30.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <377345> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 10 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7225722_79-2Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7225722_79-2Aligned.sortedByCoord.out.bam_mapq10.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <377354> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 30 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7225722_79-2Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7225722_79-2Aligned.sortedByCoord.out.bam_mapq30.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <377355> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 10 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7223778_61-1Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7223778_61-1Aligned.sortedByCoord.out.bam_mapq10.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <377359> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 30 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7223778_61-1Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7223778_61-1Aligned.sortedByCoord.out.bam_mapq30.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <377363> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 10 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7222970_62-2Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7222970_62-2Aligned.sortedByCoord.out.bam_mapq10.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <377366> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 30 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7222970_62-2Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7222970_62-2Aligned.sortedByCoord.out.bam_mapq30.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <377369> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 10 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7222946_84-2Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7222946_84-2Aligned.sortedByCoord.out.bam_mapq10.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <377375> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 30 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7222946_84-2Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7222946_84-2Aligned.sortedByCoord.out.bam_mapq30.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <377392> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 10 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7223386_59-2Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7223386_59-2Aligned.sortedByCoord.out.bam_mapq10.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <377401> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 30 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7223386_59-2Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7223386_59-2Aligned.sortedByCoord.out.bam_mapq30.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <377405> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 10 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7223138_55-2Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7223138_55-2Aligned.sortedByCoord.out.bam_mapq10.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <377413> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 30 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7223138_55-2Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7223138_55-2Aligned.sortedByCoord.out.bam_mapq30.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <377418> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 10 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7223002_59-1Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7223002_59-1Aligned.sortedByCoord.out.bam_mapq10.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <377424> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 30 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7223002_59-1Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7223002_59-1Aligned.sortedByCoord.out.bam_mapq30.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <377429> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 10 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7223578_50-2Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7223578_50-2Aligned.sortedByCoord.out.bam_mapq10.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <377444> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 30 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7223578_50-2Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7223578_50-2Aligned.sortedByCoord.out.bam_mapq30.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <377459> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 10 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7225402_4-2Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7225402_4-2Aligned.sortedByCoord.out.bam_mapq10.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <377474> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 30 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7225402_4-2Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7225402_4-2Aligned.sortedByCoord.out.bam_mapq30.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <377477> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 10 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7225434_4-1Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7225434_4-1Aligned.sortedByCoord.out.bam_mapq10.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <377484> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 30 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7225434_4-1Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7225434_4-1Aligned.sortedByCoord.out.bam_mapq30.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <377490> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 10 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7223458_49-1Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7223458_49-1Aligned.sortedByCoord.out.bam_mapq10.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <377516> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 30 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7223458_49-1Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7223458_49-1Aligned.sortedByCoord.out.bam_mapq30.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <377557> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 10 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7223234_32-2Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7223234_32-2Aligned.sortedByCoord.out.bam_mapq10.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <377597> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 30 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7223234_32-2Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7223234_32-2Aligned.sortedByCoord.out.bam_mapq30.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <377613> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 10 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7225922_39-1Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7225922_39-1Aligned.sortedByCoord.out.bam_mapq10.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <377617> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 30 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7225922_39-1Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7225922_39-1Aligned.sortedByCoord.out.bam_mapq30.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <377629> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 10 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7225762_40-1Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7225762_40-1Aligned.sortedByCoord.out.bam_mapq10.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <377634> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 30 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7225762_40-1Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7225762_40-1Aligned.sortedByCoord.out.bam_mapq30.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <377643> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 10 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7223714_30-1Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7223714_30-1Aligned.sortedByCoord.out.bam_mapq10.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <377650> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 30 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7223714_30-1Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7223714_30-1Aligned.sortedByCoord.out.bam_mapq30.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <377656> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 10 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7225858_40-2Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7225858_40-2Aligned.sortedByCoord.out.bam_mapq10.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <377671> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 30 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7225858_40-2Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7225858_40-2Aligned.sortedByCoord.out.bam_mapq30.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <377676> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 10 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7225826_23-1Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7225826_23-1Aligned.sortedByCoord.out.bam_mapq10.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <377684> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 30 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7225826_23-1Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7225826_23-1Aligned.sortedByCoord.out.bam_mapq30.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <377692> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 10 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7223194_21-2Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7223194_21-2Aligned.sortedByCoord.out.bam_mapq10.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <377697> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 30 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7223194_21-2Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7223194_21-2Aligned.sortedByCoord.out.bam_mapq30.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <377704> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 10 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7226010_20-1Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7226010_20-1Aligned.sortedByCoord.out.bam_mapq10.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <377776> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 30 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7226010_20-1Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7226010_20-1Aligned.sortedByCoord.out.bam_mapq30.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <377842> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 10 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7223042_17-1Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7223042_17-1Aligned.sortedByCoord.out.bam_mapq10.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <377852> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 30 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7223042_17-1Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7223042_17-1Aligned.sortedByCoord.out.bam_mapq30.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <377859> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 10 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7223994_15-1Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7223994_15-1Aligned.sortedByCoord.out.bam_mapq10.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <377865> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 30 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7223994_15-1Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7223994_15-1Aligned.sortedByCoord.out.bam_mapq30.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <377871> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 10 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7225794_14-1Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7225794_14-1Aligned.sortedByCoord.out.bam_mapq10.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <377878> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 30 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7225794_14-1Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7225794_14-1Aligned.sortedByCoord.out.bam_mapq30.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <377886> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 10 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7224130_141-3Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7224130_141-3Aligned.sortedByCoord.out.bam_mapq10.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <377889> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 30 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7224130_141-3Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7224130_141-3Aligned.sortedByCoord.out.bam_mapq30.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <377896> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 10 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7223642_140-3Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7223642_140-3Aligned.sortedByCoord.out.bam_mapq10.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <377904> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 30 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7223642_140-3Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7223642_140-3Aligned.sortedByCoord.out.bam_mapq30.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <377909> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 10 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7224058_140-1Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7224058_140-1Aligned.sortedByCoord.out.bam_mapq10.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <377918> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 30 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7224058_140-1Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7224058_140-1Aligned.sortedByCoord.out.bam_mapq30.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <378055> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 10 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7223514_13-2Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7223514_13-2Aligned.sortedByCoord.out.bam_mapq10.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <378307> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 30 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7223514_13-2Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7223514_13-2Aligned.sortedByCoord.out.bam_mapq30.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <378460> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 10 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7225306_139-4Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7225306_139-4Aligned.sortedByCoord.out.bam_mapq10.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <378575> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 30 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7225306_139-4Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7225306_139-4Aligned.sortedByCoord.out.bam_mapq30.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <378578> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 10 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7225594_137-4Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7225594_137-4Aligned.sortedByCoord.out.bam_mapq10.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <378584> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 30 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7225594_137-4Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7225594_137-4Aligned.sortedByCoord.out.bam_mapq30.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <378599> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 10 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7225466_135-2Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7225466_135-2Aligned.sortedByCoord.out.bam_mapq10.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <378601> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 30 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7225466_135-2Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7225466_135-2Aligned.sortedByCoord.out.bam_mapq30.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <378613> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 10 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7224026_134-1Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7224026_134-1Aligned.sortedByCoord.out.bam_mapq10.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <378615> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 30 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7224026_134-1Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7224026_134-1Aligned.sortedByCoord.out.bam_mapq30.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <378616> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 10 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7223074_133-2Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7223074_133-2Aligned.sortedByCoord.out.bam_mapq10.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <378622> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 30 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7223074_133-2Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7223074_133-2Aligned.sortedByCoord.out.bam_mapq30.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <378632> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 10 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7226002_132-5Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7226002_132-5Aligned.sortedByCoord.out.bam_mapq10.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <378640> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 30 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7226002_132-5Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7226002_132-5Aligned.sortedByCoord.out.bam_mapq30.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <378645> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 10 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7222810_129-1Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7222810_129-1Aligned.sortedByCoord.out.bam_mapq10.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <378655> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 30 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7222810_129-1Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7222810_129-1Aligned.sortedByCoord.out.bam_mapq30.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <378660> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 10 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7225682_132-4-1Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7225682_132-4-1Aligned.sortedByCoord.out.bam_mapq10.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <378667> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 30 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7225682_132-4-1Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7225682_132-4-1Aligned.sortedByCoord.out.bam_mapq30.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <378673> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 10 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7225626_125-1Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7225626_125-1Aligned.sortedByCoord.out.bam_mapq10.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <378676> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 30 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7225626_125-1Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7225626_125-1Aligned.sortedByCoord.out.bam_mapq30.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <378684> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 10 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7223490_11-1Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7223490_11-1Aligned.sortedByCoord.out.bam_mapq10.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <378687> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 30 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7223490_11-1Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7223490_11-1Aligned.sortedByCoord.out.bam_mapq30.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <378694> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 10 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7223322_11-2Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7223322_11-2Aligned.sortedByCoord.out.bam_mapq10.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <378697> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 30 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7223322_11-2Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7223322_11-2Aligned.sortedByCoord.out.bam_mapq30.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <378743> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 10 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7223314_10-1Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7223314_10-1Aligned.sortedByCoord.out.bam_mapq10.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <378747> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 30 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7223314_10-1Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7223314_10-1Aligned.sortedByCoord.out.bam_mapq30.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <378755> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 10 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7225386_117-2Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7225386_117-2Aligned.sortedByCoord.out.bam_mapq10.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <378757> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 30 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7225386_117-2Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7225386_117-2Aligned.sortedByCoord.out.bam_mapq30.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <378761> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 10 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7223802_106-2Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7223802_106-2Aligned.sortedByCoord.out.bam_mapq10.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <378770> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 30 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7223802_106-2Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7223802_106-2Aligned.sortedByCoord.out.bam_mapq30.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <378776> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 10 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7225690_104-1Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7225690_104-1Aligned.sortedByCoord.out.bam_mapq10.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <378780> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 30 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7225690_104-1Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7225690_104-1Aligned.sortedByCoord.out.bam_mapq30.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <378784> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 10 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7223970_106-1Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7223970_106-1Aligned.sortedByCoord.out.bam_mapq10.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <378789> is submitted to default queue <research-rh74>.
bsub -M 10000 -R 'rusage[mem=10000]' -n 10  "igvtools count --minMapQuality 30 -z 1 -w 10 -f median ./diverging_insertions/star_align/5093STDY7223970_106-1Aligned.sortedByCoord.out.bam ./diverging_insertions/star_align/5093STDY7223970_106-1Aligned.sortedByCoord.out.bam_mapq30.tdf ./diverging_insertions/sequences/merged.fa.fai"
Job <378798> is submitted to default queue <research-rh74>.

Deletions Analysis

In [61]:
outdir = "deletions"
shutil.rmtree(outdir, ignore_errors=True)
mkdir (outdir)
Creating /hps/research1/birney/users/adrien/analyses/medaka_assembly_graph/deletions

Find large deletions overlapping exons

In [4]:
    def gff3_exon_parser(fn):
        """ Parse a gff3 formated file"""
        exon_features = OrderedDict()
        transcript_features = OrderedDict()
        gene_features = OrderedDict()
        
        c=Counter()
        with open(fn) as fp: 
            for line in tqdm(fp):
                if not line.startswith("#"):
                    line = line.strip().split("\t")
                    if line[2] in ["gene", "mRNA", "exon"]:
                        try:
                            d = OrderedDict(chrom=line[0],start=int(line[3]),end=int(line[4]))
                            attrs = {i.split("=")[0]:i.split("=")[1] for i in line[8].split(";")}
                            if line[2] == "gene":
                                ID = attrs.get("ID")
                                if ID:
                                    gene_features[ID] = d
                                c["Valid gene"]+=1
                            elif line[2] == "mRNA":
                                d["gene_id"] = attrs.get("Parent")
                                ID = attrs.get("ID")
                                if ID:
                                    transcript_features[ID] = d
                                c["Valid transcript"]+=1
                            elif line[2] == "exon":
                                d["transcript_id"] = attrs.get("Parent")
                                ID = attrs.get("exon_id")
                                if ID:
                                    exon_features[ID] = d
                                c["Valid exon"]+=1
                        except (IndexError, ValueError) as E:
                            c["Invalid feature line"]+=1
                
        # Convert in a dataframe and cleanup
        for i, j in c.items():
            print(f"{i}:{j}")
        
        for i, j in exon_features.items():
            tx_id = j.get("transcript_id")
            try:
                tx_info = transcript_features.get(tx_id)
                gene_id = tx_info.get("gene_id")
                exon_features[i]["gene_id"]=gene_id
            except:
                exon_features[i]["gene_id"]=tx_id
        
        exon_df = pd.DataFrame.from_dict(exon_features, orient="index")
        exon_df.index.name="exon_id"
        return exon_df
In [5]:
link_fn = "./link_usage/raw_reads/norm_link_cov.tsv"
ref_fasta = "references/Oryzias_latipes_HDRR_clean.fa"
gff3_fn = "./references/Oryzias_latipes_all_assemblies_sorted.gff3"
outdir = "deletions"
min_link_cov = 0.5
min_samples = 2
min_del_len = 10000
no_Ns_extra = 50000
c = Counter()

print("Import data and filter")
link_df = pd.read_csv (link_fn, sep="\t", index_col=0)

# Select intra HDRR deletions longer than min_del_len
link_df = link_df[(link_df["SV_type"]=="deletion") & (link_df["connect_type"]=="intra_HDRR") & (link_df["connect_gap"]>=min_del_len)]
c["All deletions"]=len(link_df)

# Calculate number of samples with sufficient coverage over deletion link 
data_col = ["4-1","4-2","7-1","7-2","11-1","69-1","79-2","80-1","117-2","131-1","134-1","134-2"]
link_df["samples_pass"] = link_df[data_col].ge(min_link_cov).sum(axis=1)
link_df = link_df[link_df["samples_pass"]>=min_samples]
c["Deletion with sufficient coverage"]=len(link_df)

display(link_df.head())
out_fn = os.path.join(outdir, "large_deletions.tsv")
link_df.to_csv(out_fn, sep="\t", index=False)

# Filter out reference sequence containing Ns (probable misassemblies)
print("Filter out sequence containing Ns and non overlaping exon")
l = []
exon_df = gff3_exon_parser(fn=gff3_fn)
with pyfaidx.Fasta(ref_fasta) as ref_fa_fp:
    
    for s in tqdm(link_df.itertuples()):
        chrom = s.s1_chromosome
        start = min(int(s.s1_pos), int(s.s2_pos))
        end = max(int(s.s1_pos), int(s.s2_pos))
                
        # Check number of Ns
        seq = str(ref_fa_fp[chrom][start-no_Ns_extra:end+no_Ns_extra])
        num_Ns = seq.count("N")
        if num_Ns:
            c["Ns in reference sequence"]+=1
            continue
        
        # Find overlaping exon 
        c["Deletion with sufficient coverage"]=len(link_df)
        overlapping_exon_info = exon_df[(exon_df["chrom"]==chrom)&(exon_df["start"]<=end)&(exon_df["end"]>=start)]
        if overlapping_exon_info.empty:
            c["No overlaping exons"]+=1
            continue
        
        gene_ids = set()
        for g in overlapping_exon_info.itertuples():
            gid = g.gene_id.split(":")[1]
            gene_ids.add(gid)
        
        d = OrderedDict()
        d["gap_coord"] = f"{chrom}:{start}-{end}"
        d["start_segment"] = s.Index.split("_")[0]
        d["end_segment"] = s.Index.split("_")[1]
        d["gap_length"] = int(s.connect_gap)
        d["supporting_samples"] = s.samples_pass
        d["n_exons_overlapped"] = len(overlapping_exon_info)
        d["gene_ids_overlapped"] = ",".join(gene_ids)

        l.append(d)
        c["Valid interval"]+=1

select_link_df = pd.DataFrame(l)
select_link_df = select_link_df.sort_values(by="gap_length", ascending=False)
select_link_df = select_link_df.reset_index(drop=True)

# Convert in a dataframe and cleanup
for i, j in c.items():
    print(f"{i}:{j}")

display(select_link_df)
out_fn = os.path.join(outdir, "large_deletions_exon_noNs.tsv")
select_link_df.to_csv(out_fn, sep="\t", index=False)

out_fn = os.path.join(outdir, "large_deletions_exon_noNs.bed")
with open (out_fn, "w") as out_fp:
    for i in select_link_df.itertuples():
        coord = i.gap_coord
        chrom = coord.split(":")[0]
        start = int(coord.split(":")[1].split("-")[0])
        end = int(coord.split(":")[1].split("-")[1])
        out_fp.write(f"{chrom}\t{start}\t{end}\n")
Import data and filter
s1_chromosome s1_pos s2_chromosome s2_pos s2_type connect_type connect_gap SV_type 4-1 4-2 7-1 7-2 11-1 69-1 79-2 80-1 117-2 131-1 134-1 134-2 samples_pass
link_id
s9882+_s9884+ HDRR_1 9414281 HDRR_1 9425864 HDRR intra_HDRR 11583.0 deletion 0.000000 0.000000 0.333333 0.70 0.714286 0.941176 0.000000 1.083333 0.000000 1.153846 0.000000 1.033333 6
s12535+_s12537+ HDRR_1 12207185 HDRR_1 12217254 HDRR intra_HDRR 10069.0 deletion 0.368421 0.608696 0.777778 0.40 0.142857 0.588235 0.473684 0.666667 0.782609 0.923077 0.444444 0.766667 7
s63759+_s63778+ HDRR_3 4895736 HDRR_3 4915505 HDRR intra_HDRR 19769.0 deletion 0.157895 0.043478 0.000000 0.10 0.428571 0.411765 0.157895 0.083333 0.130435 1.076923 0.722222 0.166667 2
s71942+_s71944+ HDRR_3 13919324 HDRR_3 13936469 HDRR intra_HDRR 17145.0 deletion 0.736842 0.739130 0.833333 1.45 1.642857 1.117647 0.736842 0.000000 0.434783 0.730769 0.555556 1.166667 10
s73542+_s73544+ HDRR_3 15990378 HDRR_3 16013237 HDRR intra_HDRR 22859.0 deletion 1.473684 1.391304 1.000000 1.10 1.071429 0.764706 1.631579 1.416667 1.347826 1.730769 0.500000 0.000000 11
367it [00:00, 3262.11it/s]
Filter out sequence containing Ns and non overlaping exon
2760181it [00:18, 148653.66it/s]
Valid gene:68534
Valid transcript:108919
Valid exon:1175685
63it [00:05, 10.82it/s]
All deletions:98
Deletion with sufficient coverage:63
Ns in reference sequence:16
No overlaping exons:31
Valid interval:16

gap_coord start_segment end_segment gap_length supporting_samples n_exons_overlapped gene_ids_overlapped
0 HDRR_14:10739051-10869664 s380423+ s380486+ 130613 5 5 ENSORLG00000025784,ENSORLG00000004288
1 HDRR_16:15648550-15701259 s437996+ s438009+ 52709 9 43 ENSORLG00000006961,ENSORLG00000006905,ENSORLG00000006939
2 HDRR_19:11024197-11053768 s516697+ s516718+ 29571 8 3 ENSORLG00000009222
3 HDRR_18:22576279-22602557 s500977+ s500980+ 26278 8 5 ENSORLG00000029749,ENSORLT00000032139
4 HDRR_10:14825181-14849652 s273553+ s273577+ 24471 8 10 ENSORLG00000023281,ENSORLG00000026383,ENSORLG00000029815,ENSORLG00000021827
5 HDRR_18:26391943-26407221 s502595+ s502621+ 15278 5 33 ENSORLG00000010593
6 HDRR_16:25053356-25068605 s444093+ s444097+ 15249 2 6 ENSORLG00000030176,ENSORLG00000029558
7 HDRR_10:14960006-14975102 s273662+ s273670+ 15096 10 7 ENSORLG00000022222,ENSORLG00000029709,ENSORLG00000027884
8 HDRR_21:1403241-1416829 s552520+ s552530+ 13588 5 30 ENSORLG00000024019,ENSORLG00000030179,ENSORLG00000027647
9 HDRR_6:4103589-4115715 s155670+ s155683+ 12126 2 1 ENSORLG00000026047
10 HDRR_12:27179941-27191966 s338289+ s338292+ 12025 4 1 ENSORLG00000026950
11 HDRR_5:20299593-20310932 s139977+ s139979+ 11339 2 1 ENSORLG00000010732
12 HDRR_13:30212674-30223915 s368296+ s368298+ 11241 12 1 ENSORLG00000029636
13 HDRR_9:9663320-9674535 s239571+ s239575+ 11215 2 4 ENSORLG00000004857,ENSORLG00000027013,ENSORLG00000004869
14 HDRR_19:21993135-22004171 s525489+ s525517+ 11036 2 5 ENSORLG00000013649
15 HDRR_19:7041437-7051592 s514318+ s514326+ 10155 12 3 ENSORLG00000022011