msaexplorer.draw

The draw module

The draw module lets you draw alignments and statistic plots such as SNPs, ORFs, entropy and much more. For each plot a matplotlib axes has to be passed to the plotting function.

Importantly some of the plotting features can only be accessed for nucleotide alignments but not for amino acid alignments. The functions will raise the appropriate exception in such a case.

Functions

  1r"""
  2# The draw module
  3
  4The draw module lets you draw alignments and statistic plots such as SNPs, ORFs, entropy and much more. For each plot a
  5`matplotlib axes` has to be passed to the plotting function.
  6
  7Importantly some of the plotting features can only be accessed for nucleotide alignments but not for amino acid alignments.
  8The functions will raise the appropriate exception in such a case.
  9
 10## Functions
 11
 12"""
 13
 14# built-in
 15from itertools import chain
 16from typing import Callable, Dict
 17from copy import deepcopy
 18import os
 19
 20import matplotlib
 21from numpy import ndarray
 22
 23# MSAexplorer
 24from msaexplorer import explore, config
 25
 26# libs
 27import numpy as np
 28import matplotlib.pyplot as plt
 29import matplotlib.patches as patches
 30from matplotlib.cm import ScalarMappable
 31from matplotlib.colors import is_color_like, Normalize, to_rgba
 32from matplotlib.collections import PatchCollection, PolyCollection
 33
 34
 35# general helper functions
 36def _validate_input_parameters(aln: explore.MSA, ax: plt.Axes, annotation: explore.Annotation | None = None):
 37    """
 38    Validate MSA class and axis.
 39    """
 40    if not isinstance(aln, explore.MSA):
 41        raise ValueError('alignment has to be an MSA class. use explore.MSA() to read in alignment')
 42    if not isinstance(ax, plt.Axes):
 43        raise ValueError('ax has to be an matplotlib axis')
 44    if annotation is not None:
 45        if not isinstance(annotation, explore.Annotation):
 46            raise ValueError('annotation has to be an annotation class. use explore.Annotation() to read in annotation')
 47
 48
 49def _format_x_axis(aln: explore.MSA, ax: plt.Axes, show_x_label: bool, show_left: bool):
 50    """
 51    General axis formatting.
 52    """
 53    ax.set_xlim(
 54        (aln.zoom[0] - 0.5, aln.zoom[0] + aln.length - 0.5) if aln.zoom is not None else (-0.5, aln.length - 0.5)
 55    )
 56    ax.spines['top'].set_visible(False)
 57    ax.spines['right'].set_visible(False)
 58    if show_x_label:
 59        ax.set_xlabel('alignment position')
 60    if not show_left:
 61        ax.spines['left'].set_visible(False)
 62
 63
 64def _find_stretches(row, non_nan_only=False) -> list[tuple[int, int, int]] | list[tuple[int, int]] :
 65    """
 66    Finds consecutive stretches of values in an array, with an option to exclude NaN stretches.
 67
 68    :param row: NumPy array of values
 69    :param non_nan_only: If True, only returns stretches of non-NaN values.
 70    :return: List of stretches (start, end, value at start) for all values or non-NaN values only.
 71    """
 72    if row.size == 0:
 73        return []
 74
 75    if non_nan_only:
 76        # Create a boolean mask for non-NaN values
 77        non_nan_mask = ~np.isnan(row)
 78        # Find changes in the mask
 79        changes = np.diff(non_nan_mask.astype(int)) != 0
 80        change_idx = np.nonzero(changes)[0]
 81        starts = np.concatenate(([0], change_idx + 1))
 82        ends = np.concatenate((change_idx, [len(row) - 1]))
 83
 84        # Return only stretches that start with non-NaN values
 85        return [(start, end) for start, end in zip(starts, ends) if non_nan_mask[start]]
 86    else:
 87        # Find change points: where adjacent cells differ.
 88        changes = np.diff(row) != 0
 89        change_idx = np.nonzero(changes)[0]
 90        starts = np.concatenate(([0], change_idx + 1))
 91        ends = np.concatenate((change_idx, [len(row) - 1]))
 92
 93        return [(start, end, row[start]) for start, end in zip(starts, ends)]
 94
 95
 96def _seq_names(aln: explore.MSA, ax: plt.Axes, custom_seq_names: tuple, show_seq_names: bool):
 97    """
 98    Validate custom names and set show names to True. Format axis accordingly.
 99    """
100    if custom_seq_names:
101        show_seq_names = True
102        if not isinstance(custom_seq_names, tuple):
103            raise ValueError('configure your custom names list: custom_names=(name1, name2...)')
104        if len(custom_seq_names) != len(aln.alignment.keys()):
105            raise ValueError('length of sequences not equal to number of custom names')
106    if show_seq_names:
107        ax.yaxis.set_ticks_position('none')
108        ax.set_yticks(np.arange(len(aln.alignment)) + 0.6)
109        if custom_seq_names:
110            ax.set_yticklabels(custom_seq_names[::-1])
111        else:
112            names = [x.split(' ')[0] for x in list(aln.alignment.keys())[::-1]]
113            ax.set_yticklabels(names)
114    else:
115        ax.set_yticks([])
116
117
118def _create_identity_patch(row, aln: explore.MSA, col: list, zoom: tuple[int, int], y_position: float | int, reference_color: str, seq_name: str, identity_color: str | ndarray, show_gaps: bool):
119    """
120    Creates the initial patch.
121    """
122    if show_gaps:
123        # plot a rectangle for parts that do not have gaps
124        for stretch in _find_stretches(row, True):
125            col.append(patches.Rectangle((stretch[0] + zoom[0] - 0.5, y_position), stretch[1] - stretch[0] + 1, 0.8,
126                                                           facecolor=reference_color if seq_name == aln.reference_id else identity_color
127                                                           )
128
129                                         )
130    # just plot a rectangle
131    else:
132        col.append(patches.Rectangle((zoom[0] - 0.5, y_position), zoom[1] - zoom[0], 0.8,
133                                     facecolor=reference_color if seq_name == aln.reference_id else identity_color
134                                     )
135                   )
136
137
138def _create_polygons(stretches: list, identity_values: list | ndarray, zoom: tuple, y_position: int, polygons: list, aln_colors: dict | ScalarMappable, polygon_colors: list, detected_identity_values: set | None = None):
139    """
140    create the individual polygons
141    """
142
143    for start, end, value in stretches:
144        if value not in identity_values:
145            continue
146        if detected_identity_values is not None:
147            detected_identity_values.add(value)
148        width = end + 1 - start
149        # Calculate x coordinates adjusted for zoom and centering
150        x0 = start + zoom[0] - 0.5
151        x1 = x0 + width
152        # Define the rectangle corners
153        rect_coords = [
154            (x0, y_position),
155            (x1, y_position),
156            (x1, y_position + 0.8),
157            (x0, y_position + 0.8),
158            (x0, y_position)
159        ]
160        polygons.append(rect_coords)
161        if type(aln_colors) != ScalarMappable:
162            polygon_colors.append(aln_colors[value]['color'])
163        else:
164            polygon_colors.append(aln_colors.to_rgba(value))
165
166
167def _plot_annotation(annotation_dict: dict, ax: plt.Axes, direction_marker_size: int | None, color: str | ScalarMappable):
168    """
169    Plot annotations
170    :param annotation_dict: dict of annotations
171    :param ax: matplotlib Axes
172    :param direction_marker_size: size of marker
173    :param color: color of annotation (color or scalar)
174    """
175    for annotation in annotation_dict:
176        for locations in annotation_dict[annotation]['location']:
177            x_value = locations[0]
178            length = locations[1] - locations[0]
179            ax.add_patch(
180                patches.FancyBboxPatch(
181                    (x_value, annotation_dict[annotation]['track'] + 1),
182                    length,
183                    0.8,
184                    boxstyle="Round, pad=0",
185                    ec="black",
186                    fc=color.to_rgba(annotation_dict[annotation]['conservation']) if isinstance(color, ScalarMappable) else color,
187                )
188            )
189            if direction_marker_size is not None:
190                if annotation_dict[annotation]['strand'] == '-':
191                    marker = '<'
192                else:
193                    marker = '>'
194                ax.plot(x_value + length/2, annotation_dict[annotation]['track'] + 1.4, marker=marker, markersize=direction_marker_size, color='white', markeredgecolor='black')
195
196        # plot linked annotations (such as splicing)
197        if len(annotation_dict[annotation]['location']) > 1:
198            y_value = annotation_dict[annotation]['track'] + 1.4
199            start = None
200            for locations in annotation_dict[annotation]['location']:
201                if start is None:
202                    start = locations[1]
203                    continue
204                ax.plot([start, locations[0]], [y_value, y_value], '--', linewidth=2, color='black')
205                start = locations[1]
206
207
208def _add_track_positions(annotation_dic):
209    # create a dict and sort
210    annotation_dic = dict(sorted(annotation_dic.items(), key=lambda x: x[1]['location'][0][0]))
211
212    # remember for each track the largest stop
213    track_stops = [0]
214
215    for ann in annotation_dic:
216        flattened_locations = list(chain.from_iterable(annotation_dic[ann]['location']))  # flatten list
217        track = 0
218        # check if a start of a gene is smaller than the stop of the current track
219        # -> move to new track
220        while flattened_locations[0] < track_stops[track]:
221            track += 1
222            # if all prior tracks are potentially causing an overlap
223            # create a new track and break
224            if len(track_stops) <= track:
225                track_stops.append(0)
226                break
227        # in the current track remember the stop of the current gene
228        track_stops[track] = flattened_locations[-1]
229        # and indicate the track in the dict
230        annotation_dic[ann]['track'] = track
231
232    return annotation_dic
233
234
235def _get_contrast_text_color(rgba_color):
236    """
237    compute the brightness of a color
238    """
239    r, g, b, a = rgba_color
240    brightness = (r * 299 + g * 587 + b * 114) / 1000
241    return 'white' if brightness < 0.4 else 'black'
242
243
244def _plot_sequence_text(aln: explore.MSA, seq_name: str, ref_name: str, values: ndarray, matrix: ndarray, ax:plt.Axes, zoom: tuple, y_position:int, value_to_skip: int, ref_color:str, show_gaps: bool, cmap: None | ScalarMappable = None):
245    """
246    Plot sequence text - however this will be done even if there is not enough space.
247    Might need some rework in the future.
248    """
249    x_text = 0
250    if seq_name == ref_name:
251        different_cols = np.any((matrix != value_to_skip) & ~np.isnan(matrix), axis=0)
252    else:
253        different_cols = [False]*aln.length
254
255    for idx, (character, value) in enumerate(zip(aln.alignment[seq_name], values)):
256        if value != value_to_skip and character != '-' or seq_name == ref_name and character != '-' or character == '-'and not show_gaps:
257            # text color
258            if seq_name == ref_name:
259                text_color = _get_contrast_text_color(to_rgba(ref_color))
260            elif cmap is not None:
261                text_color = _get_contrast_text_color(cmap.to_rgba(value))
262            else:
263                text_color = 'black'
264
265            ax.text(
266                x=x_text + zoom[0] if zoom is not None else x_text,
267                y=y_position + 0.4,
268                s=character,
269                fontweight='bold' if different_cols[idx] else 'normal',
270                ha='center',
271                va='center',
272                c='grey' if not different_cols[idx] and seq_name == ref_name else text_color,
273            )
274        x_text += 1
275
276
277def identity_alignment(aln: explore.MSA, ax: plt.Axes, show_title: bool = True, show_sequence: bool = False, show_seq_names: bool = False, custom_seq_names: tuple | list = (), reference_color: str = 'lightsteelblue', show_mask:bool = True, show_gaps:bool = True, fancy_gaps:bool = False, show_mismatches: bool = True, show_ambiguities: bool = False, color_mismatching_chars: bool = False, show_x_label: bool = True, show_legend: bool = False, bbox_to_anchor: tuple[float|int, float|int] | list[float|int, float|int]= (1, 1)):
278    """
279    Generates an identity alignment overview plot.
280    :param aln: alignment MSA class
281    :param ax: matplotlib axes
282    :param show_title: whether to show title
283    :param show_seq_names: whether to show seq names
284    :param show_sequence: whether to show sequence for differences and reference - zoom in to avoid plotting issues
285    :param custom_seq_names: custom seq names
286    :param reference_color: color of reference sequence
287    :param show_mask: whether to show N or X chars otherwise it will be shown as match or mismatch
288    :param show_gaps: whether to show gaps otherwise it will be shown as match or mismatch
289    :param fancy_gaps: show gaps with a small black bar
290    :param show_mismatches: whether to show mismatches otherwise it will be shown as match
291    :param show_ambiguities: whether to show non-N ambiguities -> only relevant for RNA/DNA sequences
292    :param color_mismatching_chars: color mismatching chars with their unique color
293    :param show_x_label: whether to show x label
294    :param show_legend: whether to show the legend
295    :param bbox_to_anchor: bounding box coordinates for the legend - see: https://matplotlib.org/stable/api/legend_api.html
296    """
297
298    # Validate inputs and colors
299    _validate_input_parameters(aln, ax)
300    if not is_color_like(reference_color):
301        raise ValueError(f'{reference_color} for reference is not a color')
302
303    # Both options for gaps work hand in hand
304    if fancy_gaps:
305        show_gaps = True
306
307    # Determine zoom
308    zoom = (0, aln.length) if aln.zoom is None else aln.zoom
309
310    # Set up color mapping and identity values
311    aln_colors = config.IDENTITY_COLORS.copy()
312    identity_values = [-1, -2, -3]  # -1 = mismatch, -2 = mask, -3 ambiguity
313    if color_mismatching_chars:
314        colors_to_extend = config.CHAR_COLORS[aln.aln_type]
315        identity_values += [x + 1 for x in range(len(colors_to_extend))] # x+1 is needed to allow correct mapping
316        for idx, char in enumerate(colors_to_extend):
317            aln_colors[idx + 1] = {'type': char, 'color': colors_to_extend[char]}
318
319    # Compute identity alignment
320    identity_aln = aln.calc_identity_alignment(
321        encode_mask=show_mask,
322        encode_gaps=show_gaps,
323        encode_mismatches=show_mismatches,
324        encode_ambiguities=show_ambiguities,
325        encode_each_mismatch_char=color_mismatching_chars
326    )
327
328    # List to store polygons
329    detected_identity_values = {0}
330    polygons, polygon_colors, patch_list = [], [], []
331
332    for i, seq_name in enumerate(aln.alignment):
333        y_position = len(aln.alignment) - i - 1
334        row = identity_aln[i]
335
336        # plot a line below everything
337        if fancy_gaps:
338            ax.hlines(
339                y_position + 0.4,
340                xmin=zoom[0] - 0.5,
341                xmax=zoom[1] + 0.5,
342                color='black',
343                linestyle='-',
344                zorder=0,
345                linewidth=0.75
346            )
347
348        # plot the basic shape per sequence with gaps
349        _create_identity_patch(row, aln, patch_list, zoom, y_position, reference_color, seq_name, aln_colors[0]['color'], show_gaps)
350
351        # find consecutive stretches
352        stretches = _find_stretches(row)
353        # create polygons per stretch
354        _create_polygons(stretches, identity_values, zoom, y_position, polygons, aln_colors, polygon_colors, detected_identity_values)
355
356        # add sequence text
357        if show_sequence:
358            _plot_sequence_text(aln, list(aln.alignment.keys())[i], aln.reference_id, identity_aln[i], identity_aln, ax,
359                                zoom, y_position, 0, reference_color, show_gaps)
360
361    # Create the LineCollection: each segment is drawn in a single call.
362    ax.add_collection(PatchCollection(patch_list, match_original=True, linewidths='none', joinstyle='miter', capstyle='butt'))
363    ax.add_collection(PolyCollection(polygons, facecolors=polygon_colors, linewidths=0.5, edgecolors=polygon_colors))
364
365    # custom legend
366    if show_legend:
367        # create it
368        custom_legend = [
369            ax.add_line(
370                plt.Line2D(
371                    [],
372                    [],
373                    color=aln_colors[x]['color'], marker='s' ,markeredgecolor='grey', linestyle='', markersize=10)) for x in aln_colors if x in detected_identity_values
374        ]
375        # plot it
376        ax.legend(
377            custom_legend,
378            [aln_colors[x]['type'] for x in aln_colors if x in detected_identity_values],
379            loc='lower right',
380            bbox_to_anchor=bbox_to_anchor,
381            ncols=(len(detected_identity_values) + 1) / 2 if aln.aln_type == 'AA' and color_mismatching_chars else len(detected_identity_values),
382            frameon=False
383        )
384
385    _seq_names(aln, ax, custom_seq_names, show_seq_names)
386
387    # configure axis
388    ax.set_ylim(0, len(aln.alignment))
389    if show_title:
390        ax.set_title('identity', loc='left')
391    _format_x_axis(aln, ax, show_x_label, show_left=False)
392
393
394def similarity_alignment(aln: explore.MSA, ax: plt.Axes, matrix_type: str | None = None, show_title: bool = True, show_sequence: bool = False, show_seq_names: bool = False, custom_seq_names: tuple | list = (), reference_color: str = 'lightsteelblue', cmap: str = 'twilight_r', show_gaps:bool = True, fancy_gaps:bool = False, show_x_label: bool = True, show_cbar: bool = False, cbar_fraction: float = 0.1):
395    """
396    Generates a similarity alignment overview plot. Importantly the similarity values are normalized!
397    :param aln: alignment MSA class
398    :param ax: matplotlib axes
399    :param matrix_type: substitution matrix - see config.SUBS_MATRICES, standard: NT - TRANS, AA - BLOSUM65
400    :param show_title: whether to show title
401    :param show_sequence: whether to show sequence for differences and reference - zoom in to avoid plotting issues
402    :param show_seq_names: whether to show seq names
403    :param custom_seq_names: custom seq names
404    :param reference_color: color of reference sequence
405    :param cmap: color mapping for % identity - see https://matplotlib.org/stable/users/explain/colors/colormaps.html
406    :param gap_color: color for gaps
407    :param show_gaps: whether to show gaps otherwise it will be ignored
408    :param fancy_gaps: show gaps with a small black bar
409    :param show_x_label: whether to show x label
410    :param show_cbar: whether to show the legend - see https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.colorbar.html
411    :param cbar_fraction: fraction of the original ax reserved for the legend
412    """
413    # input check
414    _validate_input_parameters(aln, ax)
415
416    # validate colors
417    if not is_color_like(reference_color):
418        raise ValueError(f'{reference_color} for reference is not a color')
419
420    # Both options for gaps work hand in hand
421    if fancy_gaps:
422        show_gaps = True
423
424    # define zoom to plot
425    if aln.zoom is None:
426        zoom = (0, aln.length)
427    else:
428        zoom = aln.zoom
429
430    # get data
431    similarity_aln = aln.calc_similarity_alignment(matrix_type=matrix_type)  # use normalized values here
432    similarity_aln = similarity_aln.round(2)  # round data for good color mapping
433
434    # determine min max values of the underlying matrix and create cmap
435    min_value, max_value = 0, 1
436    cmap = ScalarMappable(
437        norm=Normalize(
438            vmin=min_value,
439            vmax=max_value
440        ),
441        cmap=plt.get_cmap(cmap)
442    )
443
444    # create similarity values
445    similarity_values = np.arange(start=min_value, stop=max_value, step=0.01)
446    # round it to be absolutely sure that values match with rounded sim alignment
447    similarity_values = similarity_values.round(2)
448
449    # create plot
450    polygons, polygon_colors, patch_list = [], [], []
451
452    for i, seq_name in enumerate(aln.alignment):
453        y_position = len(aln.alignment) - i - 1
454        row = similarity_aln[i]
455
456        # plot a line below everything
457        if fancy_gaps:
458            ax.hlines(
459                y_position + 0.4,
460                xmin=zoom[0] - 0.5,
461                xmax=zoom[1] + 0.5,
462                color='black',
463                linestyle='-',
464                zorder=0,
465                linewidth=0.75
466            )
467
468        # plot the basic shape per sequence with gaps
469        _create_identity_patch(row, aln, patch_list, zoom, y_position, reference_color, seq_name,
470                               cmap.to_rgba(max_value) if seq_name != aln.reference_id else reference_color,
471                               show_gaps)
472
473        # find consecutive stretches
474        stretches = _find_stretches(row)
475        # create polygons per stretch
476        _create_polygons(stretches, similarity_values, zoom, y_position, polygons, cmap, polygon_colors)
477
478        # add sequence text
479        if show_sequence:
480            _plot_sequence_text(aln, list(aln.alignment.keys())[i], aln.reference_id, similarity_aln[i], similarity_aln, ax,
481                                zoom, y_position, 1, reference_color, show_gaps, cmap)
482
483    # Create the LineCollection: each segment is drawn in a single call.
484    ax.add_collection(PatchCollection(patch_list, match_original=True, linewidths='none', joinstyle='miter', capstyle='butt'))
485    ax.add_collection(PolyCollection(polygons, facecolors=polygon_colors, linewidths=0.5, edgecolors=polygon_colors))
486
487    # legend
488    if show_cbar:
489        cbar = plt.colorbar(cmap, ax=ax, location= 'top', anchor=(1,0), shrink=0.2, pad=2/ax.bbox.height, fraction=cbar_fraction)
490        cbar.set_ticks([min_value, max_value])
491        cbar.set_ticklabels(['low', 'high'])
492
493    # format seq names
494    _seq_names(aln, ax, custom_seq_names, show_seq_names)
495
496    # configure axis
497    ax.set_ylim(0, len(aln.alignment))
498    if show_title:
499        ax.set_title('similarity', loc='left')
500    _format_x_axis(aln, ax, show_x_label, show_left=False)
501
502
503def stat_plot(aln: explore.MSA, ax: plt.Axes, stat_type: str, line_color: str = 'burlywood', line_width: int | float = 2, rolling_average: int = 20, show_x_label: bool = False, show_title: bool = True):
504    """
505    Generate a plot for the various alignment stats.
506    :param aln: alignment MSA class
507    :param ax: matplotlib axes
508    :param stat_type: 'entropy', 'gc', 'coverage', 'ts/tv', 'identity' or 'similarity' -> (here default matrices are used NT - TRANS, AA - BLOSUM65)
509    :param line_color: color of the line
510    :param line_width: width of the line
511    :param rolling_average: average rolling window size left and right of a position in nucleotides or amino acids
512    :param show_x_label: whether to show the x-axis label
513    :param show_title: whether to show the title
514    """
515
516    def moving_average(arr, window_size):
517        if window_size > 1:
518            i = 0
519            moving_averages, plotting_idx = [], []
520            while i < len(arr) + 1:
521                half_window_size = window_size // 2
522                window_left = arr[i - half_window_size : i] if i > half_window_size else arr[0:i]
523                window_right = arr[i: i + half_window_size] if i < len(arr) - half_window_size else arr[i: len(arr)]
524                moving_averages.append((sum(window_left) + sum(window_right)) / (len(window_left) + len(window_right)))
525                plotting_idx.append(i)
526                i += 1
527
528            return np.array(moving_averages), np.array(plotting_idx) if aln.zoom is None else np.array(plotting_idx) + aln.zoom[0]
529        else:
530            return arr, np.arange(aln.zoom[0], aln.zoom[1]) if aln.zoom is not None else np.arange(aln.length)
531
532    # define possible functions to calc here
533    stat_functions: Dict[str, Callable[[], list | ndarray]] = {
534        'gc': aln.calc_gc,
535        'entropy': aln.calc_entropy,
536        'coverage': aln.calc_coverage,
537        'identity': aln.calc_identity_alignment,
538        'similarity': aln.calc_similarity_alignment,
539        'ts tv score': aln.calc_transition_transversion_score
540    }
541
542    if stat_type not in stat_functions:
543        raise ValueError('stat_type must be one of {}'.format(list(stat_functions.keys())))
544
545    # input check
546    _validate_input_parameters(aln, ax)
547    if not is_color_like(line_color):
548        raise ValueError('line color is not a color')
549
550    # validate rolling average
551    if rolling_average < 1 or rolling_average > aln.length:
552        raise ValueError('rolling_average must be between 1 and length of sequence')
553
554    # generate input data
555    array = stat_functions[stat_type]()
556
557    if stat_type == 'identity':
558        min_value, max_value = -1, 0
559    elif stat_type == 'ts tv score':
560        min_value, max_value = -1, 1
561    else:
562        min_value, max_value = 0, 1
563    if stat_type in ['identity', 'similarity']:
564        # for the mean nan values get handled as the lowest possible number in the matrix
565        array = np.nan_to_num(array, True, min_value)
566        array = np.mean(array, axis=0)
567    data, plot_idx = moving_average(array, rolling_average)
568
569    # plot the data
570    ax.fill_between(
571        # this add dummy data left and right for better plotting
572        # otherwise only half of the step is shown
573        np.concatenate(([plot_idx[0] - 0.5], plot_idx, [plot_idx[-1] + 0.5])) if rolling_average == 1 else plot_idx,
574        np.concatenate(([data[0]], data, [data[-1]])) if rolling_average == 1 else data,
575        min_value,
576        linewidth = line_width,
577        edgecolor=line_color,
578        step='mid' if rolling_average == 1 else None,
579        facecolor=(line_color, 0.6) if stat_type not in ['ts tv score', 'gc'] else 'none'
580    )
581    if stat_type == 'gc':
582        ax.hlines(0.5, xmin=0, xmax=aln.zoom[0] + aln.length if aln.zoom is not None else aln.length, color='black', linestyles='--', linewidth=1)
583
584    # format axis
585    ax.set_ylim(min_value, max_value*0.1+max_value)
586    ax.set_yticks([min_value, max_value])
587    if stat_type == 'gc':
588        ax.set_yticklabels(['0', '100'])
589    elif stat_type == 'ts tv score':
590        ax.set_yticklabels(['tv', 'ts'])
591    else:
592        ax.set_yticklabels(['low', 'high'])
593
594    # show title
595    if show_title:
596        ax.set_title(
597            f'{stat_type} (average over {rolling_average} positions)' if rolling_average > 1 else f'{stat_type} for each position',
598            loc='left'
599        )
600
601    _format_x_axis(aln, ax, show_x_label, show_left=True)
602
603
604def variant_plot(aln: explore.MSA, ax: plt.Axes, lollisize: tuple[int, int] | list[int, int] = (1, 3), show_x_label: bool = False, show_legend: bool = True, bbox_to_anchor: tuple[float|int, float|int] | list[float|int, float|int] = (1, 1)):
605    """
606    Plots variants.
607    :param aln: alignment MSA class
608    :param ax: matplotlib axes
609    :param lollisize: (stem_size, head_size)
610    :param show_x_label:  whether to show the x-axis label
611    :param show_legend: whether to show the legend
612    :param bbox_to_anchor: bounding box coordinates for the legend - see: https://matplotlib.org/stable/api/legend_api.html
613    """
614
615    # validate input
616    _validate_input_parameters(aln, ax)
617    if not isinstance(lollisize, tuple) or len(lollisize) != 2:
618        raise ValueError('lollisize must be tuple of length 2 (stem, head)')
619    for _size in lollisize:
620        if not isinstance(_size, float | int) or _size <= 0:
621            raise ValueError('lollisize must be floats greater than zero')
622
623    # define colors
624    colors = config.CHAR_COLORS[aln.aln_type]
625    # get snps
626    snps = aln.get_snps()
627    # define where to plot (each ref type gets a separate line)
628    ref_y_positions, y_pos, detected_var = {}, 0, set()
629
630    # iterate over snp dict
631    for pos in snps['POS']:
632        for identifier in snps['POS'][pos]:
633            # fill in y pos dict
634            if identifier == 'ref':
635                if snps['POS'][pos]['ref'] not in ref_y_positions:
636                    ref_y_positions[snps['POS'][pos]['ref']] = y_pos
637                    y_pos += 1.1
638                    continue
639            # plot
640            if identifier == 'ALT':
641                for alt in snps['POS'][pos]['ALT']:
642                    ax.vlines(x=pos + aln.zoom[0] if aln.zoom is not None else pos,
643                              ymin=ref_y_positions[snps['POS'][pos]['ref']],
644                              ymax=ref_y_positions[snps['POS'][pos]['ref']] + snps['POS'][pos]['ALT'][alt]['AF'],
645                              color=colors[alt],
646                              zorder=100,
647                              linewidth=lollisize[0]
648                              )
649                    ax.plot(pos + aln.zoom[0] if aln.zoom is not None else pos,
650                            ref_y_positions[snps['POS'][pos]['ref']] + snps['POS'][pos]['ALT'][alt]['AF'],
651                            color=colors[alt],
652                            marker='o',
653                            markersize=lollisize[1]
654                            )
655                    detected_var.add(alt)
656
657    # plot hlines
658    for y_char in ref_y_positions:
659        ax.hlines(
660            ref_y_positions[y_char],
661            xmin=aln.zoom[0] - 0.5 if aln.zoom is not None else -0.5,
662            xmax=aln.zoom[0] + aln.length + 0.5 if aln.zoom is not None else aln.length + 0.5,
663            color='black',
664            linestyle='-',
665            zorder=0,
666            linewidth=0.75
667        )
668    # create a custom legend
669    if show_legend:
670        custom_legend = [
671            ax.add_line(
672                plt.Line2D(
673                    [],
674                    [],
675                    color=colors[char],
676                    marker='o',
677                    linestyle='',
678                    markersize=5
679                )
680            ) for char in colors if char in detected_var
681        ]
682        ax.legend(
683            custom_legend,
684            [char for char in colors if char in detected_var],  # ensures correct sorting
685            loc='lower right',
686            title='variant',
687            bbox_to_anchor=bbox_to_anchor,
688            ncols=len(detected_var)/2 if aln.aln_type == 'AA' else len(detected_var),
689            frameon=False
690        )
691
692    # format axis
693    _format_x_axis(aln, ax, show_x_label, show_left=False)
694    ax.spines['left'].set_visible(False)
695    ax.set_yticks([ref_y_positions[x] for x in ref_y_positions])
696    ax.set_yticklabels(ref_y_positions.keys())
697    ax.set_ylim(0, y_pos)
698    ax.set_ylabel('reference')
699
700
701def orf_plot(aln: explore.MSA, ax: plt.Axes, min_length: int = 500, non_overlapping_orfs: bool = True, cmap: str = 'Blues', direction_marker_size: int | None = 5, show_x_label: bool = False, show_cbar: bool = False, cbar_fraction: float = 0.1):
702    """
703    Plot conserved ORFs.
704    :param aln: alignment MSA class
705    :param ax: matplotlib axes
706    :param min_length: minimum length of orf
707    :param non_overlapping_orfs: whether to consider overlapping orfs
708    :param cmap: color mapping for % identity - see https://matplotlib.org/stable/users/explain/colors/colormaps.html
709    :param direction_marker_size: marker size for direction marker, not shown if marker_size == None
710    :param show_x_label: whether to show the x-axis label
711    :param show_cbar: whether to show the colorbar - see https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.colorbar.html
712    :param cbar_fraction: fraction of the original ax reserved for the colorbar
713    """
714
715    # normalize colorbar
716    cmap = ScalarMappable(norm=Normalize(0, 100), cmap=plt.get_cmap(cmap))
717
718    # validate input
719    _validate_input_parameters(aln, ax)
720
721    # get orfs --> first deepcopy and reset zoom that the orfs are also zoomed in (by default, the orfs are only
722    # searched within the zoomed region)
723    aln_temp = deepcopy(aln)
724    aln_temp.zoom = None
725    if non_overlapping_orfs:
726        annotation_dict = aln_temp.get_non_overlapping_conserved_orfs(min_length=min_length)
727    else:
728        annotation_dict = aln_temp.get_conserved_orfs(min_length=min_length)
729
730    # filter dict for zoom
731    if aln.zoom is not None:
732        annotation_dict = {key:val for key, val in annotation_dict.items() if max(val['location'][0][0], aln.zoom[0]) <= min(val['location'][0][1], aln.zoom[1])}
733
734    # add track for plotting
735    _add_track_positions(annotation_dict)
736
737    # plot
738    _plot_annotation(annotation_dict, ax, direction_marker_size=direction_marker_size, color=cmap)
739
740    # legend
741    if show_cbar:
742        cbar = plt.colorbar(cmap,ax=ax, location= 'top', orientation='horizontal', anchor=(1,0), shrink=0.2, pad=2/ax.bbox.height, fraction=cbar_fraction)
743        cbar.set_label('% identity')
744        cbar.set_ticks([0, 100])
745
746    # format axis
747    _format_x_axis(aln, ax, show_x_label, show_left=False)
748    ax.set_ylim(bottom=0.8)
749    ax.set_yticks([])
750    ax.set_yticklabels([])
751    ax.set_title('conserved orfs', loc='left')
752
753
754def annotation_plot(aln: explore.MSA, annotation: explore.Annotation | str, ax: plt.Axes, feature_to_plot: str, color: str = 'wheat', direction_marker_size: int | None = 5, show_x_label: bool = False):
755    """
756    Plot annotations from bed, gff or gb files. Are automatically mapped to alignment.
757    :param aln: alignment MSA class
758    :param annotation: annotation class | path to annotation file
759    :param ax: matplotlib axes
760    :param feature_to_plot: potential feature to plot (not for bed files as it is parsed as one feature)
761    :param color: color for the annotation
762    :param show_direction: show strand information
763    :param direction_marker_size: marker size for direction marker, only relevant if show_direction is True
764    :param show_x_label: whether to show the x-axis label
765    """
766    # helper function
767    def parse_annotation_from_string(path: str, msa: explore.MSA) -> explore.Annotation:
768        """
769        Parse annotation.
770        :param path: path to annotation
771        :param msa: msa object
772        :return: parsed annotation
773        """
774        if os.path.exists(path):
775            # reset zoom so the annotation is correctly parsed
776            msa_temp = deepcopy(msa)
777            msa_temp.zoom = None
778            return explore.Annotation(msa_temp, path)
779        else:
780            raise FileNotFoundError()
781
782    # parse from path
783    if type(annotation) is str:
784        annotation = parse_annotation_from_string(annotation, aln)
785
786    # validate input
787    _validate_input_parameters(aln, ax, annotation)
788    if not is_color_like(color):
789        raise ValueError(f'{color} for reference is not a color')
790
791    # ignore features to plot for bed files (here it is written into one feature)
792    if annotation.ann_type == 'bed':
793        annotation_dict = annotation.features['region']
794        feature_to_plot = 'bed regions'
795    else:
796        # try to subset the annotation dict
797        try:
798            annotation_dict = annotation.features[feature_to_plot]
799        except KeyError:
800            raise KeyError(f'Feature {feature_to_plot} not found. Use annotation.features.keys() to see available features.')
801
802    # plotting and formating
803    _add_track_positions(annotation_dict)
804    _plot_annotation(annotation_dict, ax, direction_marker_size=direction_marker_size, color=color)
805    _format_x_axis(aln, ax, show_x_label, show_left=False)
806    ax.set_ylim(bottom=0.8)
807    ax.set_yticks([])
808    ax.set_yticklabels([])
809    ax.set_title(f'{annotation.locus} ({feature_to_plot})', loc='left')
def identity_alignment( aln: msaexplorer.explore.MSA, ax: matplotlib.axes._axes.Axes, show_title: bool = True, show_sequence: bool = False, show_seq_names: bool = False, custom_seq_names: tuple | list = (), reference_color: str = 'lightsteelblue', show_mask: bool = True, show_gaps: bool = True, fancy_gaps: bool = False, show_mismatches: bool = True, show_ambiguities: bool = False, color_mismatching_chars: bool = False, show_x_label: bool = True, show_legend: bool = False, bbox_to_anchor: tuple[float | int, float | int] | list[float | int, float | int] = (1, 1)):
278def identity_alignment(aln: explore.MSA, ax: plt.Axes, show_title: bool = True, show_sequence: bool = False, show_seq_names: bool = False, custom_seq_names: tuple | list = (), reference_color: str = 'lightsteelblue', show_mask:bool = True, show_gaps:bool = True, fancy_gaps:bool = False, show_mismatches: bool = True, show_ambiguities: bool = False, color_mismatching_chars: bool = False, show_x_label: bool = True, show_legend: bool = False, bbox_to_anchor: tuple[float|int, float|int] | list[float|int, float|int]= (1, 1)):
279    """
280    Generates an identity alignment overview plot.
281    :param aln: alignment MSA class
282    :param ax: matplotlib axes
283    :param show_title: whether to show title
284    :param show_seq_names: whether to show seq names
285    :param show_sequence: whether to show sequence for differences and reference - zoom in to avoid plotting issues
286    :param custom_seq_names: custom seq names
287    :param reference_color: color of reference sequence
288    :param show_mask: whether to show N or X chars otherwise it will be shown as match or mismatch
289    :param show_gaps: whether to show gaps otherwise it will be shown as match or mismatch
290    :param fancy_gaps: show gaps with a small black bar
291    :param show_mismatches: whether to show mismatches otherwise it will be shown as match
292    :param show_ambiguities: whether to show non-N ambiguities -> only relevant for RNA/DNA sequences
293    :param color_mismatching_chars: color mismatching chars with their unique color
294    :param show_x_label: whether to show x label
295    :param show_legend: whether to show the legend
296    :param bbox_to_anchor: bounding box coordinates for the legend - see: https://matplotlib.org/stable/api/legend_api.html
297    """
298
299    # Validate inputs and colors
300    _validate_input_parameters(aln, ax)
301    if not is_color_like(reference_color):
302        raise ValueError(f'{reference_color} for reference is not a color')
303
304    # Both options for gaps work hand in hand
305    if fancy_gaps:
306        show_gaps = True
307
308    # Determine zoom
309    zoom = (0, aln.length) if aln.zoom is None else aln.zoom
310
311    # Set up color mapping and identity values
312    aln_colors = config.IDENTITY_COLORS.copy()
313    identity_values = [-1, -2, -3]  # -1 = mismatch, -2 = mask, -3 ambiguity
314    if color_mismatching_chars:
315        colors_to_extend = config.CHAR_COLORS[aln.aln_type]
316        identity_values += [x + 1 for x in range(len(colors_to_extend))] # x+1 is needed to allow correct mapping
317        for idx, char in enumerate(colors_to_extend):
318            aln_colors[idx + 1] = {'type': char, 'color': colors_to_extend[char]}
319
320    # Compute identity alignment
321    identity_aln = aln.calc_identity_alignment(
322        encode_mask=show_mask,
323        encode_gaps=show_gaps,
324        encode_mismatches=show_mismatches,
325        encode_ambiguities=show_ambiguities,
326        encode_each_mismatch_char=color_mismatching_chars
327    )
328
329    # List to store polygons
330    detected_identity_values = {0}
331    polygons, polygon_colors, patch_list = [], [], []
332
333    for i, seq_name in enumerate(aln.alignment):
334        y_position = len(aln.alignment) - i - 1
335        row = identity_aln[i]
336
337        # plot a line below everything
338        if fancy_gaps:
339            ax.hlines(
340                y_position + 0.4,
341                xmin=zoom[0] - 0.5,
342                xmax=zoom[1] + 0.5,
343                color='black',
344                linestyle='-',
345                zorder=0,
346                linewidth=0.75
347            )
348
349        # plot the basic shape per sequence with gaps
350        _create_identity_patch(row, aln, patch_list, zoom, y_position, reference_color, seq_name, aln_colors[0]['color'], show_gaps)
351
352        # find consecutive stretches
353        stretches = _find_stretches(row)
354        # create polygons per stretch
355        _create_polygons(stretches, identity_values, zoom, y_position, polygons, aln_colors, polygon_colors, detected_identity_values)
356
357        # add sequence text
358        if show_sequence:
359            _plot_sequence_text(aln, list(aln.alignment.keys())[i], aln.reference_id, identity_aln[i], identity_aln, ax,
360                                zoom, y_position, 0, reference_color, show_gaps)
361
362    # Create the LineCollection: each segment is drawn in a single call.
363    ax.add_collection(PatchCollection(patch_list, match_original=True, linewidths='none', joinstyle='miter', capstyle='butt'))
364    ax.add_collection(PolyCollection(polygons, facecolors=polygon_colors, linewidths=0.5, edgecolors=polygon_colors))
365
366    # custom legend
367    if show_legend:
368        # create it
369        custom_legend = [
370            ax.add_line(
371                plt.Line2D(
372                    [],
373                    [],
374                    color=aln_colors[x]['color'], marker='s' ,markeredgecolor='grey', linestyle='', markersize=10)) for x in aln_colors if x in detected_identity_values
375        ]
376        # plot it
377        ax.legend(
378            custom_legend,
379            [aln_colors[x]['type'] for x in aln_colors if x in detected_identity_values],
380            loc='lower right',
381            bbox_to_anchor=bbox_to_anchor,
382            ncols=(len(detected_identity_values) + 1) / 2 if aln.aln_type == 'AA' and color_mismatching_chars else len(detected_identity_values),
383            frameon=False
384        )
385
386    _seq_names(aln, ax, custom_seq_names, show_seq_names)
387
388    # configure axis
389    ax.set_ylim(0, len(aln.alignment))
390    if show_title:
391        ax.set_title('identity', loc='left')
392    _format_x_axis(aln, ax, show_x_label, show_left=False)

Generates an identity alignment overview plot.

Parameters
  • aln: alignment MSA class
  • ax: matplotlib axes
  • show_title: whether to show title
  • show_seq_names: whether to show seq names
  • show_sequence: whether to show sequence for differences and reference - zoom in to avoid plotting issues
  • custom_seq_names: custom seq names
  • reference_color: color of reference sequence
  • show_mask: whether to show N or X chars otherwise it will be shown as match or mismatch
  • show_gaps: whether to show gaps otherwise it will be shown as match or mismatch
  • fancy_gaps: show gaps with a small black bar
  • show_mismatches: whether to show mismatches otherwise it will be shown as match
  • show_ambiguities: whether to show non-N ambiguities -> only relevant for RNA/DNA sequences
  • color_mismatching_chars: color mismatching chars with their unique color
  • show_x_label: whether to show x label
  • show_legend: whether to show the legend
  • bbox_to_anchor: bounding box coordinates for the legend - see: https: //matplotlib.org/stable/api/legend_api.html
def similarity_alignment( aln: msaexplorer.explore.MSA, ax: matplotlib.axes._axes.Axes, matrix_type: str | None = None, show_title: bool = True, show_sequence: bool = False, show_seq_names: bool = False, custom_seq_names: tuple | list = (), reference_color: str = 'lightsteelblue', cmap: str = 'twilight_r', show_gaps: bool = True, fancy_gaps: bool = False, show_x_label: bool = True, show_cbar: bool = False, cbar_fraction: float = 0.1):
395def similarity_alignment(aln: explore.MSA, ax: plt.Axes, matrix_type: str | None = None, show_title: bool = True, show_sequence: bool = False, show_seq_names: bool = False, custom_seq_names: tuple | list = (), reference_color: str = 'lightsteelblue', cmap: str = 'twilight_r', show_gaps:bool = True, fancy_gaps:bool = False, show_x_label: bool = True, show_cbar: bool = False, cbar_fraction: float = 0.1):
396    """
397    Generates a similarity alignment overview plot. Importantly the similarity values are normalized!
398    :param aln: alignment MSA class
399    :param ax: matplotlib axes
400    :param matrix_type: substitution matrix - see config.SUBS_MATRICES, standard: NT - TRANS, AA - BLOSUM65
401    :param show_title: whether to show title
402    :param show_sequence: whether to show sequence for differences and reference - zoom in to avoid plotting issues
403    :param show_seq_names: whether to show seq names
404    :param custom_seq_names: custom seq names
405    :param reference_color: color of reference sequence
406    :param cmap: color mapping for % identity - see https://matplotlib.org/stable/users/explain/colors/colormaps.html
407    :param gap_color: color for gaps
408    :param show_gaps: whether to show gaps otherwise it will be ignored
409    :param fancy_gaps: show gaps with a small black bar
410    :param show_x_label: whether to show x label
411    :param show_cbar: whether to show the legend - see https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.colorbar.html
412    :param cbar_fraction: fraction of the original ax reserved for the legend
413    """
414    # input check
415    _validate_input_parameters(aln, ax)
416
417    # validate colors
418    if not is_color_like(reference_color):
419        raise ValueError(f'{reference_color} for reference is not a color')
420
421    # Both options for gaps work hand in hand
422    if fancy_gaps:
423        show_gaps = True
424
425    # define zoom to plot
426    if aln.zoom is None:
427        zoom = (0, aln.length)
428    else:
429        zoom = aln.zoom
430
431    # get data
432    similarity_aln = aln.calc_similarity_alignment(matrix_type=matrix_type)  # use normalized values here
433    similarity_aln = similarity_aln.round(2)  # round data for good color mapping
434
435    # determine min max values of the underlying matrix and create cmap
436    min_value, max_value = 0, 1
437    cmap = ScalarMappable(
438        norm=Normalize(
439            vmin=min_value,
440            vmax=max_value
441        ),
442        cmap=plt.get_cmap(cmap)
443    )
444
445    # create similarity values
446    similarity_values = np.arange(start=min_value, stop=max_value, step=0.01)
447    # round it to be absolutely sure that values match with rounded sim alignment
448    similarity_values = similarity_values.round(2)
449
450    # create plot
451    polygons, polygon_colors, patch_list = [], [], []
452
453    for i, seq_name in enumerate(aln.alignment):
454        y_position = len(aln.alignment) - i - 1
455        row = similarity_aln[i]
456
457        # plot a line below everything
458        if fancy_gaps:
459            ax.hlines(
460                y_position + 0.4,
461                xmin=zoom[0] - 0.5,
462                xmax=zoom[1] + 0.5,
463                color='black',
464                linestyle='-',
465                zorder=0,
466                linewidth=0.75
467            )
468
469        # plot the basic shape per sequence with gaps
470        _create_identity_patch(row, aln, patch_list, zoom, y_position, reference_color, seq_name,
471                               cmap.to_rgba(max_value) if seq_name != aln.reference_id else reference_color,
472                               show_gaps)
473
474        # find consecutive stretches
475        stretches = _find_stretches(row)
476        # create polygons per stretch
477        _create_polygons(stretches, similarity_values, zoom, y_position, polygons, cmap, polygon_colors)
478
479        # add sequence text
480        if show_sequence:
481            _plot_sequence_text(aln, list(aln.alignment.keys())[i], aln.reference_id, similarity_aln[i], similarity_aln, ax,
482                                zoom, y_position, 1, reference_color, show_gaps, cmap)
483
484    # Create the LineCollection: each segment is drawn in a single call.
485    ax.add_collection(PatchCollection(patch_list, match_original=True, linewidths='none', joinstyle='miter', capstyle='butt'))
486    ax.add_collection(PolyCollection(polygons, facecolors=polygon_colors, linewidths=0.5, edgecolors=polygon_colors))
487
488    # legend
489    if show_cbar:
490        cbar = plt.colorbar(cmap, ax=ax, location= 'top', anchor=(1,0), shrink=0.2, pad=2/ax.bbox.height, fraction=cbar_fraction)
491        cbar.set_ticks([min_value, max_value])
492        cbar.set_ticklabels(['low', 'high'])
493
494    # format seq names
495    _seq_names(aln, ax, custom_seq_names, show_seq_names)
496
497    # configure axis
498    ax.set_ylim(0, len(aln.alignment))
499    if show_title:
500        ax.set_title('similarity', loc='left')
501    _format_x_axis(aln, ax, show_x_label, show_left=False)

Generates a similarity alignment overview plot. Importantly the similarity values are normalized!

Parameters
  • aln: alignment MSA class
  • ax: matplotlib axes
  • matrix_type: substitution matrix - see config.SUBS_MATRICES, standard: NT - TRANS, AA - BLOSUM65
  • show_title: whether to show title
  • show_sequence: whether to show sequence for differences and reference - zoom in to avoid plotting issues
  • show_seq_names: whether to show seq names
  • custom_seq_names: custom seq names
  • reference_color: color of reference sequence
  • cmap: color mapping for % identity - see https: //matplotlib.org/stable/users/explain/colors/colormaps.html
  • gap_color: color for gaps
  • show_gaps: whether to show gaps otherwise it will be ignored
  • fancy_gaps: show gaps with a small black bar
  • show_x_label: whether to show x label
  • show_cbar: whether to show the legend - see https: //matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.colorbar.html
  • cbar_fraction: fraction of the original ax reserved for the legend
def stat_plot( aln: msaexplorer.explore.MSA, ax: matplotlib.axes._axes.Axes, stat_type: str, line_color: str = 'burlywood', line_width: int | float = 2, rolling_average: int = 20, show_x_label: bool = False, show_title: bool = True):
504def stat_plot(aln: explore.MSA, ax: plt.Axes, stat_type: str, line_color: str = 'burlywood', line_width: int | float = 2, rolling_average: int = 20, show_x_label: bool = False, show_title: bool = True):
505    """
506    Generate a plot for the various alignment stats.
507    :param aln: alignment MSA class
508    :param ax: matplotlib axes
509    :param stat_type: 'entropy', 'gc', 'coverage', 'ts/tv', 'identity' or 'similarity' -> (here default matrices are used NT - TRANS, AA - BLOSUM65)
510    :param line_color: color of the line
511    :param line_width: width of the line
512    :param rolling_average: average rolling window size left and right of a position in nucleotides or amino acids
513    :param show_x_label: whether to show the x-axis label
514    :param show_title: whether to show the title
515    """
516
517    def moving_average(arr, window_size):
518        if window_size > 1:
519            i = 0
520            moving_averages, plotting_idx = [], []
521            while i < len(arr) + 1:
522                half_window_size = window_size // 2
523                window_left = arr[i - half_window_size : i] if i > half_window_size else arr[0:i]
524                window_right = arr[i: i + half_window_size] if i < len(arr) - half_window_size else arr[i: len(arr)]
525                moving_averages.append((sum(window_left) + sum(window_right)) / (len(window_left) + len(window_right)))
526                plotting_idx.append(i)
527                i += 1
528
529            return np.array(moving_averages), np.array(plotting_idx) if aln.zoom is None else np.array(plotting_idx) + aln.zoom[0]
530        else:
531            return arr, np.arange(aln.zoom[0], aln.zoom[1]) if aln.zoom is not None else np.arange(aln.length)
532
533    # define possible functions to calc here
534    stat_functions: Dict[str, Callable[[], list | ndarray]] = {
535        'gc': aln.calc_gc,
536        'entropy': aln.calc_entropy,
537        'coverage': aln.calc_coverage,
538        'identity': aln.calc_identity_alignment,
539        'similarity': aln.calc_similarity_alignment,
540        'ts tv score': aln.calc_transition_transversion_score
541    }
542
543    if stat_type not in stat_functions:
544        raise ValueError('stat_type must be one of {}'.format(list(stat_functions.keys())))
545
546    # input check
547    _validate_input_parameters(aln, ax)
548    if not is_color_like(line_color):
549        raise ValueError('line color is not a color')
550
551    # validate rolling average
552    if rolling_average < 1 or rolling_average > aln.length:
553        raise ValueError('rolling_average must be between 1 and length of sequence')
554
555    # generate input data
556    array = stat_functions[stat_type]()
557
558    if stat_type == 'identity':
559        min_value, max_value = -1, 0
560    elif stat_type == 'ts tv score':
561        min_value, max_value = -1, 1
562    else:
563        min_value, max_value = 0, 1
564    if stat_type in ['identity', 'similarity']:
565        # for the mean nan values get handled as the lowest possible number in the matrix
566        array = np.nan_to_num(array, True, min_value)
567        array = np.mean(array, axis=0)
568    data, plot_idx = moving_average(array, rolling_average)
569
570    # plot the data
571    ax.fill_between(
572        # this add dummy data left and right for better plotting
573        # otherwise only half of the step is shown
574        np.concatenate(([plot_idx[0] - 0.5], plot_idx, [plot_idx[-1] + 0.5])) if rolling_average == 1 else plot_idx,
575        np.concatenate(([data[0]], data, [data[-1]])) if rolling_average == 1 else data,
576        min_value,
577        linewidth = line_width,
578        edgecolor=line_color,
579        step='mid' if rolling_average == 1 else None,
580        facecolor=(line_color, 0.6) if stat_type not in ['ts tv score', 'gc'] else 'none'
581    )
582    if stat_type == 'gc':
583        ax.hlines(0.5, xmin=0, xmax=aln.zoom[0] + aln.length if aln.zoom is not None else aln.length, color='black', linestyles='--', linewidth=1)
584
585    # format axis
586    ax.set_ylim(min_value, max_value*0.1+max_value)
587    ax.set_yticks([min_value, max_value])
588    if stat_type == 'gc':
589        ax.set_yticklabels(['0', '100'])
590    elif stat_type == 'ts tv score':
591        ax.set_yticklabels(['tv', 'ts'])
592    else:
593        ax.set_yticklabels(['low', 'high'])
594
595    # show title
596    if show_title:
597        ax.set_title(
598            f'{stat_type} (average over {rolling_average} positions)' if rolling_average > 1 else f'{stat_type} for each position',
599            loc='left'
600        )
601
602    _format_x_axis(aln, ax, show_x_label, show_left=True)

Generate a plot for the various alignment stats.

Parameters
  • aln: alignment MSA class
  • ax: matplotlib axes
  • stat_type: 'entropy', 'gc', 'coverage', 'ts/tv', 'identity' or 'similarity' -> (here default matrices are used NT - TRANS, AA - BLOSUM65)
  • line_color: color of the line
  • line_width: width of the line
  • rolling_average: average rolling window size left and right of a position in nucleotides or amino acids
  • show_x_label: whether to show the x-axis label
  • show_title: whether to show the title
def variant_plot( aln: msaexplorer.explore.MSA, ax: matplotlib.axes._axes.Axes, lollisize: tuple[int, int] | list[int, int] = (1, 3), show_x_label: bool = False, show_legend: bool = True, bbox_to_anchor: tuple[float | int, float | int] | list[float | int, float | int] = (1, 1)):
605def variant_plot(aln: explore.MSA, ax: plt.Axes, lollisize: tuple[int, int] | list[int, int] = (1, 3), show_x_label: bool = False, show_legend: bool = True, bbox_to_anchor: tuple[float|int, float|int] | list[float|int, float|int] = (1, 1)):
606    """
607    Plots variants.
608    :param aln: alignment MSA class
609    :param ax: matplotlib axes
610    :param lollisize: (stem_size, head_size)
611    :param show_x_label:  whether to show the x-axis label
612    :param show_legend: whether to show the legend
613    :param bbox_to_anchor: bounding box coordinates for the legend - see: https://matplotlib.org/stable/api/legend_api.html
614    """
615
616    # validate input
617    _validate_input_parameters(aln, ax)
618    if not isinstance(lollisize, tuple) or len(lollisize) != 2:
619        raise ValueError('lollisize must be tuple of length 2 (stem, head)')
620    for _size in lollisize:
621        if not isinstance(_size, float | int) or _size <= 0:
622            raise ValueError('lollisize must be floats greater than zero')
623
624    # define colors
625    colors = config.CHAR_COLORS[aln.aln_type]
626    # get snps
627    snps = aln.get_snps()
628    # define where to plot (each ref type gets a separate line)
629    ref_y_positions, y_pos, detected_var = {}, 0, set()
630
631    # iterate over snp dict
632    for pos in snps['POS']:
633        for identifier in snps['POS'][pos]:
634            # fill in y pos dict
635            if identifier == 'ref':
636                if snps['POS'][pos]['ref'] not in ref_y_positions:
637                    ref_y_positions[snps['POS'][pos]['ref']] = y_pos
638                    y_pos += 1.1
639                    continue
640            # plot
641            if identifier == 'ALT':
642                for alt in snps['POS'][pos]['ALT']:
643                    ax.vlines(x=pos + aln.zoom[0] if aln.zoom is not None else pos,
644                              ymin=ref_y_positions[snps['POS'][pos]['ref']],
645                              ymax=ref_y_positions[snps['POS'][pos]['ref']] + snps['POS'][pos]['ALT'][alt]['AF'],
646                              color=colors[alt],
647                              zorder=100,
648                              linewidth=lollisize[0]
649                              )
650                    ax.plot(pos + aln.zoom[0] if aln.zoom is not None else pos,
651                            ref_y_positions[snps['POS'][pos]['ref']] + snps['POS'][pos]['ALT'][alt]['AF'],
652                            color=colors[alt],
653                            marker='o',
654                            markersize=lollisize[1]
655                            )
656                    detected_var.add(alt)
657
658    # plot hlines
659    for y_char in ref_y_positions:
660        ax.hlines(
661            ref_y_positions[y_char],
662            xmin=aln.zoom[0] - 0.5 if aln.zoom is not None else -0.5,
663            xmax=aln.zoom[0] + aln.length + 0.5 if aln.zoom is not None else aln.length + 0.5,
664            color='black',
665            linestyle='-',
666            zorder=0,
667            linewidth=0.75
668        )
669    # create a custom legend
670    if show_legend:
671        custom_legend = [
672            ax.add_line(
673                plt.Line2D(
674                    [],
675                    [],
676                    color=colors[char],
677                    marker='o',
678                    linestyle='',
679                    markersize=5
680                )
681            ) for char in colors if char in detected_var
682        ]
683        ax.legend(
684            custom_legend,
685            [char for char in colors if char in detected_var],  # ensures correct sorting
686            loc='lower right',
687            title='variant',
688            bbox_to_anchor=bbox_to_anchor,
689            ncols=len(detected_var)/2 if aln.aln_type == 'AA' else len(detected_var),
690            frameon=False
691        )
692
693    # format axis
694    _format_x_axis(aln, ax, show_x_label, show_left=False)
695    ax.spines['left'].set_visible(False)
696    ax.set_yticks([ref_y_positions[x] for x in ref_y_positions])
697    ax.set_yticklabels(ref_y_positions.keys())
698    ax.set_ylim(0, y_pos)
699    ax.set_ylabel('reference')

Plots variants.

Parameters
  • aln: alignment MSA class
  • ax: matplotlib axes
  • lollisize: (stem_size, head_size)
  • show_x_label: whether to show the x-axis label
  • show_legend: whether to show the legend
  • bbox_to_anchor: bounding box coordinates for the legend - see: https: //matplotlib.org/stable/api/legend_api.html
def orf_plot( aln: msaexplorer.explore.MSA, ax: matplotlib.axes._axes.Axes, min_length: int = 500, non_overlapping_orfs: bool = True, cmap: str = 'Blues', direction_marker_size: int | None = 5, show_x_label: bool = False, show_cbar: bool = False, cbar_fraction: float = 0.1):
702def orf_plot(aln: explore.MSA, ax: plt.Axes, min_length: int = 500, non_overlapping_orfs: bool = True, cmap: str = 'Blues', direction_marker_size: int | None = 5, show_x_label: bool = False, show_cbar: bool = False, cbar_fraction: float = 0.1):
703    """
704    Plot conserved ORFs.
705    :param aln: alignment MSA class
706    :param ax: matplotlib axes
707    :param min_length: minimum length of orf
708    :param non_overlapping_orfs: whether to consider overlapping orfs
709    :param cmap: color mapping for % identity - see https://matplotlib.org/stable/users/explain/colors/colormaps.html
710    :param direction_marker_size: marker size for direction marker, not shown if marker_size == None
711    :param show_x_label: whether to show the x-axis label
712    :param show_cbar: whether to show the colorbar - see https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.colorbar.html
713    :param cbar_fraction: fraction of the original ax reserved for the colorbar
714    """
715
716    # normalize colorbar
717    cmap = ScalarMappable(norm=Normalize(0, 100), cmap=plt.get_cmap(cmap))
718
719    # validate input
720    _validate_input_parameters(aln, ax)
721
722    # get orfs --> first deepcopy and reset zoom that the orfs are also zoomed in (by default, the orfs are only
723    # searched within the zoomed region)
724    aln_temp = deepcopy(aln)
725    aln_temp.zoom = None
726    if non_overlapping_orfs:
727        annotation_dict = aln_temp.get_non_overlapping_conserved_orfs(min_length=min_length)
728    else:
729        annotation_dict = aln_temp.get_conserved_orfs(min_length=min_length)
730
731    # filter dict for zoom
732    if aln.zoom is not None:
733        annotation_dict = {key:val for key, val in annotation_dict.items() if max(val['location'][0][0], aln.zoom[0]) <= min(val['location'][0][1], aln.zoom[1])}
734
735    # add track for plotting
736    _add_track_positions(annotation_dict)
737
738    # plot
739    _plot_annotation(annotation_dict, ax, direction_marker_size=direction_marker_size, color=cmap)
740
741    # legend
742    if show_cbar:
743        cbar = plt.colorbar(cmap,ax=ax, location= 'top', orientation='horizontal', anchor=(1,0), shrink=0.2, pad=2/ax.bbox.height, fraction=cbar_fraction)
744        cbar.set_label('% identity')
745        cbar.set_ticks([0, 100])
746
747    # format axis
748    _format_x_axis(aln, ax, show_x_label, show_left=False)
749    ax.set_ylim(bottom=0.8)
750    ax.set_yticks([])
751    ax.set_yticklabels([])
752    ax.set_title('conserved orfs', loc='left')

Plot conserved ORFs.

Parameters
  • aln: alignment MSA class
  • ax: matplotlib axes
  • min_length: minimum length of orf
  • non_overlapping_orfs: whether to consider overlapping orfs
  • cmap: color mapping for % identity - see https: //matplotlib.org/stable/users/explain/colors/colormaps.html
  • direction_marker_size: marker size for direction marker, not shown if marker_size == None
  • show_x_label: whether to show the x-axis label
  • show_cbar: whether to show the colorbar - see https: //matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.colorbar.html
  • cbar_fraction: fraction of the original ax reserved for the colorbar
def annotation_plot( aln: msaexplorer.explore.MSA, annotation: msaexplorer.explore.Annotation | str, ax: matplotlib.axes._axes.Axes, feature_to_plot: str, color: str = 'wheat', direction_marker_size: int | None = 5, show_x_label: bool = False):
755def annotation_plot(aln: explore.MSA, annotation: explore.Annotation | str, ax: plt.Axes, feature_to_plot: str, color: str = 'wheat', direction_marker_size: int | None = 5, show_x_label: bool = False):
756    """
757    Plot annotations from bed, gff or gb files. Are automatically mapped to alignment.
758    :param aln: alignment MSA class
759    :param annotation: annotation class | path to annotation file
760    :param ax: matplotlib axes
761    :param feature_to_plot: potential feature to plot (not for bed files as it is parsed as one feature)
762    :param color: color for the annotation
763    :param show_direction: show strand information
764    :param direction_marker_size: marker size for direction marker, only relevant if show_direction is True
765    :param show_x_label: whether to show the x-axis label
766    """
767    # helper function
768    def parse_annotation_from_string(path: str, msa: explore.MSA) -> explore.Annotation:
769        """
770        Parse annotation.
771        :param path: path to annotation
772        :param msa: msa object
773        :return: parsed annotation
774        """
775        if os.path.exists(path):
776            # reset zoom so the annotation is correctly parsed
777            msa_temp = deepcopy(msa)
778            msa_temp.zoom = None
779            return explore.Annotation(msa_temp, path)
780        else:
781            raise FileNotFoundError()
782
783    # parse from path
784    if type(annotation) is str:
785        annotation = parse_annotation_from_string(annotation, aln)
786
787    # validate input
788    _validate_input_parameters(aln, ax, annotation)
789    if not is_color_like(color):
790        raise ValueError(f'{color} for reference is not a color')
791
792    # ignore features to plot for bed files (here it is written into one feature)
793    if annotation.ann_type == 'bed':
794        annotation_dict = annotation.features['region']
795        feature_to_plot = 'bed regions'
796    else:
797        # try to subset the annotation dict
798        try:
799            annotation_dict = annotation.features[feature_to_plot]
800        except KeyError:
801            raise KeyError(f'Feature {feature_to_plot} not found. Use annotation.features.keys() to see available features.')
802
803    # plotting and formating
804    _add_track_positions(annotation_dict)
805    _plot_annotation(annotation_dict, ax, direction_marker_size=direction_marker_size, color=color)
806    _format_x_axis(aln, ax, show_x_label, show_left=False)
807    ax.set_ylim(bottom=0.8)
808    ax.set_yticks([])
809    ax.set_yticklabels([])
810    ax.set_title(f'{annotation.locus} ({feature_to_plot})', loc='left')

Plot annotations from bed, gff or gb files. Are automatically mapped to alignment.

Parameters
  • aln: alignment MSA class
  • annotation: annotation class | path to annotation file
  • ax: matplotlib axes
  • feature_to_plot: potential feature to plot (not for bed files as it is parsed as one feature)
  • color: color for the annotation
  • show_direction: show strand information
  • direction_marker_size: marker size for direction marker, only relevant if show_direction is True
  • show_x_label: whether to show the x-axis label