Source code for seqwalk.design

from seqwalk.generation import *
from seqwalk.filtering import *
import math


[docs] def max_size(L, k, alphabet="ACT", RCfree=False, GClims=None, prevented_patterns=["AAAA", "CCCC", "GGGG", "TTTT"]): """ design a max size library of length L sequences with SSM k Args: L: integer length of desired seqs k: SSM k value alphabet: string of allowable letters (default "ACT") RCfree: bool, True if orthogonality with RCs is required GClims: tuple of (GCmin, GCmax), allowable range of number of GC bases prevented_patterns: list of prevented patterns (default 4N) Returns: list of strings : seqs library of orthogonal sequences """ assert (L > k), "L must greater than k" if RCfree and len(alphabet) == 4: seq = adapted_hierholzer(k, alphabet) seqs = partition_path(seq, L, k) if len(seqs) == 0: return seqs if GClims != None: GCmin, GCmax = GClims seqs = filter_gc(seqs, GCmin, GCmax) for pattern in prevented_patterns: seqs = filter_pattern(seqs, pattern) return seqs else: seq = "".join([alphabet[i-1] for i in simple_shift(k, len(alphabet))]) seqs = partition_path(seq[:-1], L, k) if RCfree: seqs = filter_rc_3letter(seqs, k) if len(seqs) == 0: return seqs if GClims != None: GCmin, GCmax = GClims seqs = filter_gc(seqs, GCmin, GCmax) for pattern in prevented_patterns: seqs = filter_pattern(seqs, pattern) return seqs
[docs] def max_orthogonality(N, L, alphabet="ACT", RCfree=False, GClims=None, prevented_patterns=["AAAA", "CCCC", "GGGG","TTTT"], k_init=None): """ design a maximally orthogonal library of N length L sequences Args: N: minimum number of sequences in library L: integer length of desired seqs alphabet: string of allowable letters (default "ACT") RCfree: bool, True if orthogonality with RCs is required GClims: tuple of (GCmin, GCmax), allowable range of number of GC bases prevented_patterns: list of prevented patterns (default 4N) k_init: initial guess for SSM k value Returns: list of strings : seqs library of orthogonal sequences """ if k_init == None: k_init = max(int(math.log(N)/math.log(len(alphabet))), 2) if (k_init % 2 == 0) and RCfree: k_init += 1 while True: library = max_size(L, k_init, alphabet, RCfree, GClims, prevented_patterns) if len(library)>N: print("Number of sequences: %d" % len(library)) print("SSM k value: %d" % k_init) return library k_init += (1 + RCfree)