# (C) 2024, Tom Eulenfeld, MIT license
from warnings import warn
import matplotlib.pyplot as plt
import numpy as np
from sugar.data import gcode
from sugar.imaging.colors import get_color_scheme
from matplotlib.colors import to_rgb
from matplotlib.patches import Rectangle
def _get_colordict(color, alphabet, default='white', gap='-', gap_color='white'):
if color is None:
color = {l: 'C{}'.format(i % 10) for i, l in enumerate(alphabet)}
try:
color = to_rgb(color)
except ValueError:
if isinstance(color, dict):
colord = color
elif isinstance(color, (tuple, list)):
colord = {l: color[i % len(color)] for i, l in enumerate(alphabet)}
else:
colord = get_color_scheme(color)
else:
colord = {l: color for l in alphabet}
for g in gap:
colord.setdefault(g, gap_color)
return {l: to_rgb(colord.get(l, default)) for l in list(alphabet) + list(gap)}
def _get_fts_colordict(fts, fts_color, fts_colorby):
if isinstance(fts_colorby, str):
metaget = fts_colorby
fts_colorby = lambda ft: ft.meta.get(metaget)
if fts_color is None:
fts_color = ['C{}'.format(i) for i in range(10)]
if isinstance(fts_color, str):
fts_color = [fts_color]
if not isinstance(fts_color, dict):
d = {}
i = 0
for ft in fts:
k = fts_colorby(ft)
if k not in d:
d[k] = fts_color[i]
i = (i + 1) % len(fts_color)
fts_color = d
for k in fts_color:
fts_color[k] = to_rgb(fts_color[k])
return fts_color, fts_colorby
def _get_xy(extent, n, m):
if extent is None:
extent = [-0.5, n - 0.5, -0.5, m - 0.5]
if len(extent) == 4:
extent = [extent[0:2], extent[2:4]]
x, y = extent
if len(x) == 2:
x = np.linspace(x[0], x[1], n+1)
if len(y) == 2:
y = np.linspace(y[1], y[0], m+1)
return x, y
def _despine(ax, show_spines, spine_offset):
if show_spines is not True:
sides = ['top', 'right', 'left', 'bottom']
if show_spines is False:
despine = [True, True, True, True]
elif isinstance(show_spines, str):
despine = [side == show_spines for side in sides]
else:
despine = show_spines
for despineit, side in zip(despine, sides):
if despineit:
ax.spines[side].set_visible(False)
elif spine_offset:
ax.spines[side].set_position(('outward', spine_offset))
_FTS_ACCOUNT_FOR_GAPS = True
[docs]
def plot_alignment(
seqs, fname=None, *,
ax=None, figsize=None,
extent=None, aspect=None,
gap='- ',
color='gray', gap_color='white',
symbols=False,
symbol_color='black',
symbol_gap_color='black',
symbol_size=None,
scale_symbol_size=1,
symbol_kw=None,
fts=None,
fts_display='facecolor',
fts_colorby='type',
fts_color=None,
fts_color_gap_alpha=1,
fts_alpha=None,
fts_box_groups='type',
fts_box_lw=5,
fts_box_kw=None,
show_spines=False, spine_offset=None,
xticks=True,
dpi=None, transparent=None, bbox_inches=None, show=False,
**kw
):
r"""
Plot an alignment
:param seqs: sequences
:param fname: The filename if saving the plot, the figure will be closed afterwards, default: do not save the plot and return figure
:param ax: The ax to plot in, default: create a new ax
:param figsize: The size of the created figure, only applies for ``ax=None``
:param extent: The extent of the plotted alignment in data coordinates ``[xmin, xmax, ymin, ymax]``,
the default plots each symbol and sequence at integer coordinates
:param aspect: Wether to shrink the axis to guarantee the given aspect ratio,
default no shrinkage, ``aspect=2`` gives visually appealing plots if symbols are also plotted.
:param gap: The characters recognized as gaps, default is ``'- '``
:param color: The background color,
might be any constant color (defaults to ``'gray'``),
a list of colors, or
None for the default matplotlib color cycle,
a dictionary mapping the alphabet to colors, or
a supported color scheme, see `.get_color_scheme()`, e.g. ``'flower'``.
:param gap_color: The color of gaps, default is ``'white'``
:param symbols: Wether to plot symbols (letters), default ``False``
:param symbol_color: The color of the symbols, takes the same values as the color parameter,
default is ``'black'``
:param symbol_gap_color: the color of the gap symbol, default is ``'black'``
:param symbol_size: The font size of the symbols, by default a visually appealing size is calculated
:param scale_symbol_size: Scale factor for the automatically calculated symbol size, default is 1
:param symbol_kw: A dictionary of additional parameters passed to matplotlib's :meth:`~matplotlib.axes.Axes.annotate`
:param fts: Wether to highlight features, defaults to no highlighting,
might be a FeatureList object or just True to use the features which are attached to the sequences object.
:param fts_display: How to display the features, one of ``'facecolor'`` (default) and ``'box'``,
boxes will range over all sequences
:param fts_colorby: How to define the color of the features, might be any key in the metadata,
defaults to ``'type'``, but can also be a function taking a Feature and returning an identifier
:param fts_color: The color of the features, similarly as with the color parameter,
this might be a constant color,
a list of colors, or
None for the default matplotlib color cycle (the default), or
a dictionary mapping the feature identifiers to colors.
:param fts_color_gap_alpha:
The alpha value of the feature color for gaps (default: 1)
:param fts_alpha: Transparency of the features
:param fts_box_groups: For the ``fts_display='box'`` option,
we need to specify which features belong into the same box,
this parameter can be a list of FeatureList objects,
alternatively this parameter is passed to `.FeatureList.groupby()` to
define the groups, defaults to ``'type'``
:param fts_box_lw: linewidth of the the boxes, default: 5
:param fts_box_kw: Dictionary of additional parameters passed to matplotlib's `~matplotlib.patches.Rectangle` to create the feature boxes
:param show_spines,despine_offset: Parameters passed to seaborn's despine function,
the default ``show_spines=False`` removes axes spines
:param xticks: True leaves the xticks (default), False turns them off, can also be a list of xticks
:param dpi,transparent,bbox_inches: Parameters passed to :meth:`~matplotlib.figure.Figure.savefig` if the figure is saved
:param show: True shows the figure, default: False
:param \*\*kw: Other kwargs are passed to matplotlib's :meth:`~matplotlib.axes.Axes.pcolormesh`
:return: Axes object if ``fname=None``, otherwise the figure is saved and closed
.. rubric:: Example
>>> from sugar import read
>>> seqs = read('https://osf.io/download/j2wyv')
>>> seqs.plot_alignment(show=True, figsize=(10, 4))
.. image:: ../_static/ali1.png
:width: 60%
>>> seqs[:, 70:120].plot_alignment(show=True, color=None, figsize=(10,8),
... symbols=True, aspect=2, alpha=0.5)
.. image:: ../_static/ali2.png
:width: 40%
>>> seqs2 = seqs[:5, :150].copy()
>>> seqs2.translate(complete=True).plot_alignment(
... show=True, color='flower', figsize=(10,8), symbols=True,
... aspect=2, alpha=0.5, edgecolors='w')
.. image:: ../_static/ali3.png
:width: 40%
"""
if gap is None:
gap = ''
alphabet = sorted(set(''.join(str(seq) for seq in seqs)) - set(gap))
color = _get_colordict(color, alphabet, gap=gap, gap_color=gap_color)
if fts:
if fts is True:
fts = seqs.fts
fts_color, fts_colorby = _get_fts_colordict(fts, fts_color, fts_colorby)
if not 0 <= fts_color_gap_alpha <= 1:
raise ValueError('fts_color_gap_alpha has to be a number 0 <= alpha <= 1')
if fts_color_gap_alpha < 1 and fts_alpha is None:
fts_alpha = 1
lens = [len(seq) for seq in seqs]
n = max(lens)
if len(set(lens)) > 1:
warn('fill up short sequences with empty space')
seqs = seqs.copy().str.ljust(n)
data = [[color[l] for l in seq.data] for seq in seqs]
if fts and fts_display == 'facecolor':
ftsd = fts.groupby('seqid')
if fts_alpha is not None:
data_fts = [[(1, 1, 1, 0) for l in seq.data] for seq in seqs]
for i, seq in enumerate(seqs):
for ft in ftsd.get(seq.id, []):
slice = seq.slindex(gap=gap if _FTS_ACCOUNT_FOR_GAPS else None)[ft]
start, stop, _ = slice.indices(len(data[i]))
if fts_alpha is None:
data[i][slice] = [fts_color[fts_colorby(ft)]] * (stop - start)
else:
data_fts[i][slice] = [fts_color[fts_colorby(ft)] + (fts_alpha,)] * (stop - start)
if fts_color_gap_alpha < 1:
assert fts_alpha is not None
for si in range(start, stop):
if seq.data[si] in gap:
data_fts[i][si] = fts_color[fts_colorby(ft)] + (fts_color_gap_alpha,)
if ax is None:
fig = plt.figure(figsize=figsize)
ax = fig.add_axes([0, 0, 1, 1])
else:
fig = ax.get_figure()
x, y = _get_xy(extent, n, len(data))
ax.pcolormesh(x, y, np.array(data), **kw)
if fts and fts_alpha is not None and fts_display == 'facecolor':
ax.pcolormesh(x, y, np.array(data_fts), **kw)
if fts and fts_display == 'box':
if fts_box_kw is None:
fts_box_kw = {}
fts_box_kw.setdefault('fill', False)
if not isinstance(fts_box_groups, (tuple, list)):
fts_box_groups = fts.groupby(fts_box_groups).values()
for ftgroup in fts_box_groups:
bx1, bx2 = ftgroup.loc_range
ax.add_patch(Rectangle((x[bx1], min(y)), x[bx2] - x[bx1], abs(y[-1] - y[0]),
color=fts_color[fts_colorby(ftgroup[0])], alpha=fts_alpha,
lw=fts_box_lw, **fts_box_kw))
if aspect is not None:
aspect = abs(aspect * len(data) / n / (y[-1] - y[0]) * (x[-1] - x[0]))
ax.set_aspect(aspect)
if symbols:
if symbol_kw is None:
symbol_kw = {}
symbol_color = _get_colordict(symbol_color, alphabet, default='black', gap=gap, gap_color=symbol_gap_color)
symbol_kw.setdefault('family', 'monospace')
if 'verticalalignment' not in symbol_kw:
symbol_kw.setdefault('va', 'center_baseline')
if 'horizontalalignment' not in symbol_kw:
symbol_kw.setdefault('ha', 'center')
if symbol_size is None:
fig.draw_without_rendering() # to set automatic axis limits
# transform bounding box of ax to inch
bbox = ax.get_window_extent().transformed(fig.dpi_scale_trans.inverted())
ratio_covered = abs((x[-1] - x[0]) / (ax.get_xlim()[1] - ax.get_xlim()[0]))
# Calculate the width of a single symbol box in inch.
# Font size is the font height specified in pixels with PPI (pixels per inch) of 72.
# With a typical aspect ratio of monospace fonts of 5:3, we would need a factor of 120 to fill the width of the symbol box
# We take slightly lower value of 100 here.
symbol_size = bbox.width * ratio_covered / n * 100 * scale_symbol_size
elif scale_symbol_size != 1:
warn('scale_symbol_size parameter is ignored if symbol_size is specified')
for i in range(len(data)):
for j in range(n):
xy = 0.5 * (x[j] + x[j+1]), 0.5 * (y[i] + y[i+1])
l = seqs[i].data[j]
ax.annotate(l, xy, color=symbol_color[l], size=symbol_size, **symbol_kw)
_despine(ax, show_spines, spine_offset)
ax.set_yticks([])
if xticks is not True:
if xticks is False:
xticks = []
ax.set_xticks(xticks)
if fname is not None:
fig.savefig(fname, dpi=dpi, transparent=transparent, bbox_inches=bbox_inches)
if show:
plt.show()
plt.close(fig)
else:
if show:
plt.show()
return ax
if __name__ == '__main__':
from sugar import read
seqs = read().sl(update_fts=True)[:, :100]
seqs[1][:10] = '-' * 10
seqs[1][:5] = ' ' * 5
print(seqs)
seqs.fts = seqs.fts.slice(10, 15)[:1]
print(seqs.fts)
plot_alignment(seqs, fts=True, aspect=2, rasterized=True, color='0.8', symbols=True, symbol_color='flower')