Source code for sugar.imaging.ftsviewer

from warnings import warn

import sugar
from sugar.core.util import _pop_kws_for_func


def _align_fts(grouped, align, *,
               crop=False,
               inplace=True,
               return_details=False
               ):
    """
    Align features in each group by shifting their locations.

    :param grouped: Grouped features, as returned by `~.FeatureList.groupby()`
    :param align: Align features by this method or list of features,
        can be one of the strings ``'center'``, ``'first'``, ``'last'``,
        or a list or BioBasket of features to align each group to the the first found feature in the list.
    :param crop: Crop the sequence lines to the first and last feature in each group after alignment,
        can be ``True`` to crop to the minimal region including all features,
        or an integer to add some extra space before the first features.
    :param inplace: If True, modify the grouped features inplace, otherwise make a copy first.
    :param return_details: If True, return a tuple of (grouped, seq_start, pos_first_ft, stop_pos_last_ft)
        where seq_start is a dict mapping group keys to the new sequence start (after alignment),
        and pos_first_ft is a dict mapping group keys to the position of the first feature (after alignment),
        and stop_pos_last_ft is a dict mapping group keys to the stop position of the last feature (after alignment).
    :return: Aligned and grouped features, or a tuple if ``return_details=True``
    """
    if isinstance(align, str) and align not in ('center', 'first', 'last'):
        raise ValueError(f'Unknown align method: {align}')
    # Because we change feature locations we have to make a copy of the features.
    # Before, we have to search for the features to be centered.
    pos_first_ft = {}
    stop_pos_last_ft = {}
    pos_align = {}
    for gkey in grouped:
        pos_first_ft[gkey] = min(grouped[gkey], key=lambda ft: ft.loc.start).loc.start
        stop_pos_last_ft[gkey] = max(grouped[gkey], key=lambda ft: ft.loc.stop).loc.stop
        if align == 'first':
            pos_align[gkey] = pos_first_ft[gkey]
        elif align == 'last':
            pos_align[gkey] = stop_pos_last_ft[gkey]
        elif align == 'center':
            pos_align[gkey] = (stop_pos_last_ft[gkey]- pos_first_ft[gkey]) // 2 + pos_first_ft[gkey]
        else:
            for ft in grouped[gkey]:
                if ft in align:
                    pos_align[gkey] = ft.loc.start
                    break
            else:
                warn(f'No feature found for centering group {gkey}')
                pos_align[gkey] = pos_first_ft[gkey]
    if not inplace:
        from copy import deepcopy
        grouped = deepcopy(grouped)
    seq_start = {}
    for gkey in grouped:
        shift = - pos_align[gkey] + max(pos_align.values())
        for ft in grouped[gkey]:
            ft.locs.shift(shift)
        pos_first_ft[gkey] += shift
        stop_pos_last_ft[gkey] += shift
        seq_start[gkey] = shift
    if crop:
        shift = -min(pos_first_ft.values()) + (0 if crop is True else crop)
        for gkey in grouped:
            for ft in grouped[gkey]:
                ft.locs.shift(shift)
            pos_first_ft[gkey] += shift
            stop_pos_last_ft[gkey] += shift
            seq_start[gkey] += shift
    if return_details:
        return grouped, seq_start, pos_first_ft, stop_pos_last_ft
    return grouped


[docs] def plot_ftsviewer( fts: sugar.FeatureList, fname=None, *, seqs=None, groupby='seqid', align=None, crop=False, figsize=None, ncols=1, sharex=False, sharey=False, fig_kw={}, ax=None, xticks=False, axlabel=True, axlabel_kw={}, same_colors=True, colorby='type', color=None, dpi=None, transparent=None, bbox_inches=None, show=False, seqlen=None, first_index=None, **kw): r""" Plot features using DNAFeaturesViewer_ and `~.FeatureList.toftsviewer()` :param fts: features :param fname: The filename if saving the plot, the figure will be closed afterwards, default: do not save the plot and return figure :param seqs: The corresponding sequences, instance of `.BioBasket`, will be used to extract the length of each sequence, they are also needed when the ``plot_sequence`` option is used. :param groupby: Features are grouped by this key, see `~.FeatureList.groupby()`, and each group is plotted in a separate ax, defaults to ``'seqid'``. :param align: Align features by this method or list of features, can be one of the strings ``'center'``, ``'first'``, ``'last'``, or a list or BioBasket of features to align each group to the the first found feature in the list. By default no alignment is done. If align is used, you should set ``sharex=True``. :param crop: If align is used, crop the sequence lines to the first and last feature in each group, can be ``True`` to crop to the minimal region including all features, or an integer to add some extra space before the first features. :param figsize: The size of the created figure, by default a reasonable value is calculated with ``(6 * ncols, 2 * nrows)``, has to be tuned for non-usual cases. :param ncols: Number of columns in the figure, by default all groups are plotted in a single column :param sharex,sharey: parameters passed to `plt.subplots <matplotlib.pyplot.subplots>` :param fig_kw: dict of other parameters passed to `plt.subplots <matplotlib.pyplot.subplots>` :param ax: Instead of specifying the figure with figsize, etc. one may pass an axes instance to draw the annotation in, make sure to only have a single annotation group :param axlabel: Plot the name of each group in the axis, default: True :param axlabel_kw: Dict of corresponding options passed to :meth:`~matplotlib.axes.Axes.annotate` :param same_colors: Wether to use the same color mapping in all subplots, default True. :param dpi,transparent,bbox_inches: Parameters passed to :meth:`~matplotlib.figure.Figure.savefig` if the figure is saved :param show: True shows the figure, default: False :param seqlen: Can be used to set the sequence length if not using ``seqs``. :param first_index: Can be used to set the sequence start index (where the line representing the sequence starts), for aligned features this is determined automatically. If the parameter is set, it overrides the automatic detection. Can be either an int or a dict mapping group keys to first indices. :param \*\*kw: Other kwargs are passed to `~.FeatureList.toftsviewer()`, `~dna_features_viewer.GraphicFeature` or `~dna_features_viewer.GraphicRecord` or `~dna_features_viewer.CircularGraphicRecord`, and `GraphicRecord.plot() <dna_features_viewer.GraphicRecord.plot>`. :return: Figure object if ``fname=None``, otherwise the figure is saved and closed .. rubric:: Example >>> from sugar import read >>> seqs = read() >>> seqs.plot_ftsviewer() .. image:: ../_static/fts1.png :width: 40% """ try: from dna_features_viewer import GraphicRecord except ImportError as ex: raise ImportError('Please install dna_features_viewer_lite to use ftsviewer functionality') from ex import matplotlib.pyplot as plt grouped = fts.groupby(groupby, flatten=True) N = len(grouped) if N > 1 and same_colors: from sugar.imaging.alignment import _get_fts_colordict color, colorby = _get_fts_colordict(fts, color, colorby) if align: if not sharex: warn('It is recommended to use sharex=True when align is used') grouped, seq_start, pos_first_ft, stop_pos_last_ft = _align_fts(grouped, align, crop=crop, inplace=False, return_details=True) else: first_index2 = None if ax is None: nrows = (N + ncols - 1) // ncols if figsize is None: figsize = (6 * ncols, 2 * nrows) fig, axes = plt.subplots(nrows, ncols, sharex=sharex, sharey=sharey, squeeze=False, figsize=figsize, **fig_kw) else: if N > 1: raise ValueError('Use ax only with a single annotation group, please') fig = ax.figure axes = None seq = None kw2 = _pop_kws_for_func(kw, GraphicRecord.plot) xlims = [] for i, gkey in enumerate(grouped): # fi is the position where the sequence aka line begins if first_index: fi = first_index if isinstance(first_index, int) else first_index[gkey] elif align and crop is not False: fi = pos_first_ft[gkey] + (0 if crop is True else -crop) seqlen = (stop_pos_last_ft[gkey] - pos_first_ft[gkey]) + (0 if crop is True else 2 * crop) elif align: fi = seq_start[gkey] else: fi = 0 if seqs is not None: seq = seqs.d[grouped[gkey][0].seqid] if axes is not None: ax = axes[i % nrows, i // nrows] record = grouped[gkey].toftsviewer(seq=seq, colorby=colorby, color=color, seqlen=seqlen, first_index=fi, **kw) record.plot(ax=ax, **kw2) xlims.append(ax.get_xlim()) if axlabel is True and gkey is not None or axlabel not in (True, False, None): axlabel_kw = axlabel_kw.copy() axlabel_kw.setdefault('xy', (0, 0.8)) axlabel_kw.setdefault('xycoords', 'axes fraction') axlabel_kw.setdefault('va', 'top') l_ = ' '.join(map(str, gkey)) if axlabel is True else grouped[gkey][0].meta[axlabel] ax.annotate(l_, **axlabel_kw) if sharex: x0lim = min(x[0] for x in xlims) x1lim = max(x[1] for x in xlims) ax.set_xlim(x0lim, x1lim) if xticks is True and sharex: import matplotlib.ticker as ticker ax.xaxis.set_major_locator(ticker.MaxNLocator(integer=True)) elif xticks and xticks is not True: ax.set_xticks(xticks) elif not xticks: if axes is not None: for i in range(ncols*nrows): axes[i % nrows, i // nrows].axis('off') if fname is not None: fig.savefig(fname, dpi=dpi, transparent=transparent, bbox_inches=bbox_inches) if show: plt.show() plt.close(fig) else: if show: plt.show() return fig
if __name__ == '__main__': from sugar import read, read_fts fts = read_fts() plot_ftsviewer(fts) plot_ftsviewer(fts, groupby='type') seqs = read() plot_ftsviewer(seqs.fts, groupby=('seqid', 'type')) seqs.plot_ftsviewer()