Source code for pydna.gateway

# -*- coding: utf-8 -*-
from pydna.dseqrecord import Dseqrecord
from Bio.SeqFeature import SimpleLocation
from pydna.recombinase import Recombinase, RecombinaseCollection


[docs] def create_recombinase_dict() -> dict[str, dict[str, RecombinaseCollection]]: """Create a dictionary of recombinases for the Gateway reaction.""" raw_gateway_common = { "attB1": "CHWVTWTgtacaaaAAANNNG", "attB2": "CHWVTWTgtacaagAAANNNG", "attB3": "CHWVTWTgtataatAAANNNG", "attB4": "CHWVTWTgtatagaAAANNNG", "attB5": "CHWVTWTgtatacaAAANNNG", "attL1": "VAAWWAWKRWTTTWWTTYGACTGATAGTGACCTGTWCGTYGMAACAVATTGATRAGCAATKMTTTYYTATAWTGHCMASTWTgtacaaaAAANNNG", "attL2": "VAAWWAWKRWTTTWWTTYGACTGATAGTGACCTGTWCGTYGMAACAVATTGATRAGCAATKMTTTYYTATAWTGHCMASTWTgtacaagAAANNNG", "attL3": "VAAWWAWKRWTTTWWTTYGACTGATAGTGACCTGTWCGTYGMAACAVATTGATRAGCAATKMTTTYYTATAWTGHCMASTWTgtataatAAANNNG", "attL4": "VAAWWAWKRWTTTWWTTYGACTGATAGTGACCTGTWCGTYGMAACAVATTGATRAGCAATKMTTTYYTATAWTGHCMASTWTgtatagaAAANNNG", "attL5": "VAAWWAWKRWTTTWWTTYGACTGATAGTGACCTGTWCGTYGMAACAVATTGATRAGCAATKMTTTYYTATAWTGHCMASTWTgtatacaAAANNNG", "attR1": "CHWVTWTgtacaaaAAAGYWGARCGAGAARCGTAARRTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATRCTGTAARACACAACATATBCAGTCV", "attR2": "CHWVTWTgtacaagAAAGYWGARCGAGAARCGTAARRTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATRCTGTAARACACAACATATBCAGTCV", "attR3": "CHWVTWTgtataatAAAGYWGARCGAGAARCGTAARRTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATRCTGTAARACACAACATATBCAGTCV", "attR4": "CHWVTWTgtatagaAAAGYWGARCGAGAARCGTAARRTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATRCTGTAARACACAACATATBCAGTCV", "attR5": "CHWVTWTgtatacaAAAGYWGARCGAGAARCGTAARRTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATRCTGTAARACACAACATATBCAGTCV", "overlap_1": "TWTgtacaaaAAA", "overlap_2": "TWTgtacaagAAA", "overlap_3": "TWTgtataatAAA", "overlap_4": "TWTgtatagaAAA", "overlap_5": "TWTgtatacaAAA", } raw_gateway_sites_greedy = { **raw_gateway_common, "attP1": "VAAWWAWKRWTTTWWTTYGACTGATAGTGACCTGTWCGTYGMAACAVATTGATRAGCAATKMTTTYYTATAWTGHCMASTWTgtacaaaAAAGYWGARCGAGAARCGTAARRTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATRCTGTAARACACAACATATBCAGTCV", "attP2": "VAAWWAWKRWTTTWWTTYGACTGATAGTGACCTGTWCGTYGMAACAVATTGATRAGCAATKMTTTYYTATAWTGHCMASTWTgtacaagAAAGYWGARCGAGAARCGTAARRTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATRCTGTAARACACAACATATBCAGTCV", "attP3": "VAAWWAWKRWTTTWWTTYGACTGATAGTGACCTGTWCGTYGMAACAVATTGATRAGCAATKMTTTYYTATAWTGHCMASTWTgtataatAAAGYWGARCGAGAARCGTAARRTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATRCTGTAARACACAACATATBCAGTCV", "attP4": "VAAWWAWKRWTTTWWTTYGACTGATAGTGACCTGTWCGTYGMAACAVATTGATRAGCAATKMTTTYYTATAWTGHCMASTWTgtatagaAAAGYWGARCGAGAARCGTAARRTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATRCTGTAARACACAACATATBCAGTCV", "attP5": "VAAWWAWKRWTTTWWTTYGACTGATAGTGACCTGTWCGTYGMAACAVATTGATRAGCAATKMTTTYYTATAWTGHCMASTWTgtatacaAAAGYWGARCGAGAARCGTAARRTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATRCTGTAARACACAACATATBCAGTCV", } raw_gateway_sites_conservative = { **raw_gateway_common, "attP1": "AAAWWAWKRWTTTWWTTTGACTGATAGTGACCTGTTCGTTGCAACAMATTGATRAGCAATGCTTTYTTATAATGCCMASTTTgtacaaaAAAGYWGAACGAGAARCGTAAARTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATACTGTAAAACACAACATATSCAGTCACTATGAAYCAACTACTTAGATGGTATTAGTGACCTGTA", "attP2": "AAAWWAWKRWTTTWWTTTGACTGATAGTGACCTGTTCGTTGCAACAMATTGATRAGCAATGCTTTYTTATAATGCCMASTTTgtacaagAAAGYWGAACGAGAARCGTAAARTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATACTGTAAAACACAACATATSCAGTCACTATGAAYCAACTACTTAGATGGTATTAGTGACCTGTA", "attP3": "AAAWWAWKRWTTTWWTTTGACTGATAGTGACCTGTTCGTTGCAACAMATTGATRAGCAATGCTTTYTTATAATGCCMASTTTgtataatAAAGYWGAACGAGAARCGTAAARTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATACTGTAAAACACAACATATSCAGTCACTATGAAYCAACTACTTAGATGGTATTAGTGACCTGTA", "attP4": "AAAWWAWKRWTTTWWTTTGACTGATAGTGACCTGTTCGTTGCAACAMATTGATRAGCAATGCTTTYTTATAATGCCMASTTTgtatagaAAAGYWGAACGAGAARCGTAAARTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATACTGTAAAACACAACATATSCAGTCACTATGAAYCAACTACTTAGATGGTATTAGTGACCTGTA", "attP5": "AAAWWAWKRWTTTWWTTTGACTGATAGTGACCTGTTCGTTGCAACAMATTGATRAGCAATGCTTTYTTATAATGCCMASTTTgtatacaAAAGYWGAACGAGAARCGTAAARTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATACTGTAAAACACAACATATSCAGTCACTATGAAYCAACTACTTAGATGGTATTAGTGACCTGTA", } out_dict = {} for reaction in ["BP", "LR"]: left, right = reaction conservative: list[Recombinase] = [] greedy: list[Recombinase] = [] for i in range(1, 6): site1 = f"att{left}{i}" site2 = f"att{right}{i}" seq1_conservative = raw_gateway_sites_conservative[site1] seq2_conservative = raw_gateway_sites_conservative[site2] seq1_greedy = raw_gateway_sites_greedy[site1] seq2_greedy = raw_gateway_sites_greedy[site2] conservative.append( Recombinase(seq1_conservative, seq2_conservative, site1, site2) ) greedy.append(Recombinase(seq1_greedy, seq2_greedy, site1, site2)) out_dict[reaction] = { "conservative": RecombinaseCollection(conservative), "greedy": RecombinaseCollection(greedy), } return out_dict
recombinase_dict = create_recombinase_dict()
[docs] def gateway_overlap( seqx: Dseqrecord, seqy: Dseqrecord, reaction: str, greedy: bool ) -> list[tuple[int, int, int]]: """ Assembly Algorithm: Find gateway overlaps. If greedy is True, it uses a more greedy consensus site to find attP sites, which might give false positives. Parameters ---------- seqx : Dseqrecord First sequence to find overlaps. seqy : Dseqrecord Second sequence to find overlaps. reaction : str Type of Gateway reaction (BP or LR). greedy : bool If True, use greedy gateway consensus sites. Returns ------- list[tuple[int, int, int]] A list of overlaps between the two sequences. """ mode = "greedy" if greedy else "conservative" return recombinase_dict[reaction][mode].overlap(seqx, seqy)
[docs] def find_gateway_sites( seq: Dseqrecord, greedy: bool ) -> dict[str, list[SimpleLocation]]: """Find all gateway sites in a sequence and return a dictionary with the name and positions of the sites.""" mode = "greedy" if greedy else "conservative" collection = RecombinaseCollection( recombinase_dict["BP"][mode].recombinases + recombinase_dict["LR"][mode].recombinases ) return collection.find(seq)
[docs] def annotate_gateway_sites(seq: Dseqrecord, greedy: bool) -> Dseqrecord: """Annotate gateway sites in a sequence.""" mode = "greedy" if greedy else "conservative" collection = RecombinaseCollection( recombinase_dict["BP"][mode].recombinases + recombinase_dict["LR"][mode].recombinases ) return collection.annotate(seq)