Source code for sugar.core._adapter

# (C) 2025, Tom Eulenfeld, MIT license
# The functions in this module are documented in the corresponding classes
"""
Adapters for other bio libraries

The functions in this module are not meant to be called directly.
Rather, they should be called via the corresponding sugar class methods.
"""

from copy import copy
from sugar.core.fts import Feature, FeatureList, Location
from sugar.core.meta import Attr
from sugar.core.seq import BioSeq, BioBasket


### BioPython sequences

[docs] def seq2biopython(seq): """ See `.BioSeq.tobiopython()` """ from Bio.Seq import Seq from Bio.SeqRecord import SeqRecord features = fts2biopython(seq.fts) if len(seq.fts) > 0 else None kw = {k: dict(v) if isinstance(v, Attr) else v for k, v in seq.meta.get('_biopython', {}).items()} if seq.id: kw['id'] = seq.id return SeqRecord(Seq(seq.data), features=features, **kw)
[docs] def seqs2biopython(seqs, msa=False): """ See `.BioBasket.tobiopython()` """ seqs = [seq2biopython(seq) for seq in seqs] if msa: from Bio.Align import MultipleSeqAlignment seqs = MultipleSeqAlignment(seqs) return seqs
[docs] def biopython2seq(obj, cls=BioSeq): """ See `.BioSeq.frombiopython()` """ if hasattr(obj, 'seq'): # SeqRecord data = str(obj.seq) id_ = obj.id if obj.id != '<unknown id>' else None biopy = {} if obj.name != '<unknown name>': biopy['name'] = obj.name if obj.description != '<unknown description>': biopy['description'] = obj.description if obj.annotations: biopy['annotations'] = obj.annotations if obj.annotations: biopy['annotations'] = obj.annotations if obj.dbxrefs: biopy['dbxrefs'] = obj.dbxrefs if obj.letter_annotations: biopy['letter_annotations'] = obj.letter_annotations meta = {'id': id_} if biopy: meta['_biopython'] = biopy if obj.features: meta['fts'] = biopython2fts(obj.features) else: # Seq data = str(obj) meta = None return cls(data, meta=meta)
[docs] def biopython2seqs(obj, cls=BioBasket): """ See `.BioBasket.frombiopython()` """ seqs = [biopython2seq(seq) for seq in obj] return cls(seqs)
### BioPython features _biopy_strand = {'+': 1, '-': -1, '.': None, '?': None} _biopy_strand_r = {1: '+', -1: '-', None: '.'}
[docs] def ft2biopython(ft): """ See `.Feature.tobiopython()` """ from Bio.SeqFeature import CompoundLocation, SeqFeature, SimpleLocation # we ignore any defects, for now locs = [SimpleLocation(loc.start, loc.stop, _biopy_strand[loc.strand], ref=ft.seqid) for loc in ft.locs] loc = locs[0] if len(locs) == 1 else CompoundLocation(locs) kw = {attr: getattr(ft, attr) for attr in 'id type'.split() if getattr(ft, attr)} if '_bioypthon' in ft.meta or 'name' in ft.meta: kw['qualifiers'] = dict(ft.meta.get('_biopython', {})) if 'name' in ft.meta: kw['qualifiers']['name'] = ft.meta.name biopyft = SeqFeature(location=loc, **kw) return biopyft
[docs] def fts2biopython(fts): """ See `.FeatureList.tobiopython()` """ return [ft2biopython(ft) for ft in fts]
[docs] def biopython2ft(obj, cls=Feature): """ See `.Feature.frombiopython()` """ biopylocs = obj.location.parts locs = [Location(int(loc.start), int(loc.end), _biopy_strand_r[loc.strand]) for loc in biopylocs] meta = {} if obj.id != '<unknown id>': meta['id'] = obj.id if obj.qualifiers: meta['_biopython'] = obj.qualifiers if 'name' in meta['_biopython']: meta['name'] = meta['_biopython']['name'] if biopylocs[0].ref: meta['seqid'] = biopylocs[0].ref ft = cls(obj.type or None, locs, meta=meta) return ft
[docs] def biopython2fts(obj, cls=FeatureList): """ See `.FeatureList.frombiopython()` """ return cls([biopython2ft(biopyft) for biopyft in obj])
### Biotite sequences
[docs] def seq2biotite(seq, type=None, gap='-', warn=True): """ See `.BioSeq.tobiotite()` """ from biotite.sequence import NucleotideSequence, ProteinSequence data = seq.data if gap: for g in gap: if g in data: if warn: from warnings import warn warn(f'Remove gap characters {gap} for the conversion to biotite') for g in gap: data = data.replace(g, '') break type = type or seq.type cls = {'nt': NucleotideSequence, 'aa': ProteinSequence}[type] return cls(data)
[docs] def seqs2biotite(oseqs, type=None, msa=False, gap='-', warn=True): """ See `.BioBasket.tobiotite()` """ seqs = [seq.tobiotite(type=type, gap=gap, warn=not msa and warn) for seq in oseqs] if msa: from biotite.sequence.align import Alignment trace = Alignment.trace_from_strings([seq.data for seq in oseqs]) seqs = Alignment(seqs, trace) return seqs
[docs] def biotite2seq(obj, cls=BioSeq): """ See `.BioSeq.frombiotite()` """ from biotite.sequence import NucleotideSequence, ProteinSequence data = ''.join(obj.symbols) type_ = None if isinstance(obj, NucleotideSequence): type_ = 'nt' elif isinstance(obj, ProteinSequence): type_ = 'aa' return cls(data, type=type_)
[docs] def biotite2seqs(obj, cls=BioBasket): """ See `.BioBasket.frombiotite()` """ if hasattr(obj, 'sequences'): # Alignment object types = [BioSeq.frombiotite(seq).type for seq in obj.sequences] ali = [BioSeq(data, type=type_) for data, type_ in zip(obj.get_gapped_sequences(), types)] return cls(ali) else: seqs = [BioSeq.frombiotite(seq) for seq in obj] return cls(seqs)