msaexplorer.draw

The draw module

The draw module lets you draw alignments and statistic plots such as SNPs, ORFs, entropy and much more. For each plot a matplotlib axes has to be passed to the plotting function.

Importantly some of the plotting features can only be accessed for nucleotide alignments but not for amino acid alignments. The functions will raise the appropriate exception in such a case.

Functions

  1r"""
  2# The draw module
  3
  4The draw module lets you draw alignments and statistic plots such as SNPs, ORFs, entropy and much more. For each plot a
  5`matplotlib axes` has to be passed to the plotting function.
  6
  7Importantly some of the plotting features can only be accessed for nucleotide alignments but not for amino acid alignments.
  8The functions will raise the appropriate exception in such a case.
  9
 10## Functions
 11
 12"""
 13import pathlib
 14# built-in
 15from itertools import chain
 16from typing import Callable, Dict
 17from copy import deepcopy
 18import os
 19
 20import matplotlib
 21from numpy import ndarray
 22
 23# MSAexplorer
 24from msaexplorer import explore, config
 25
 26# libs
 27import numpy as np
 28import matplotlib.pyplot as plt
 29import matplotlib.patches as patches
 30from matplotlib.cm import ScalarMappable
 31from matplotlib.colors import is_color_like, Normalize, to_rgba
 32from matplotlib.collections import PatchCollection, PolyCollection
 33from matplotlib.text import TextPath
 34from matplotlib.patches import PathPatch
 35from matplotlib.font_manager import FontProperties
 36from matplotlib.transforms import Affine2D
 37
 38
 39# general helper functions
 40def _validate_input_parameters(aln: explore.MSA, ax: plt.Axes, annotation: explore.Annotation | None = None):
 41    """
 42    Validate MSA class and axis.
 43    """
 44    if not isinstance(aln, explore.MSA):
 45        raise ValueError('alignment has to be an MSA class. use explore.MSA() to read in alignment')
 46    if ax is not None and not isinstance(ax, plt.Axes):
 47            raise ValueError('ax has to be an matplotlib axis')
 48    if annotation is not None:
 49        if not isinstance(annotation, explore.Annotation):
 50            raise ValueError('annotation has to be an annotation class. use explore.Annotation() to read in annotation')
 51
 52
 53def _format_x_axis(aln: explore.MSA, ax: plt.Axes, show_x_label: bool, show_left: bool):
 54    """
 55    General axis formatting.
 56    """
 57    ax.set_xlim(
 58        (aln.zoom[0] - 0.5, aln.zoom[0] + aln.length - 0.5) if aln.zoom is not None else (-0.5, aln.length - 0.5)
 59    )
 60    ax.spines['top'].set_visible(False)
 61    ax.spines['right'].set_visible(False)
 62    if show_x_label:
 63        ax.set_xlabel('alignment position')
 64    if not show_left:
 65        ax.spines['left'].set_visible(False)
 66
 67
 68def _find_stretches(row, non_nan_only=False) -> list[tuple[int, int, int]] | list[tuple[int, int]] :
 69    """
 70    Finds consecutive stretches of values in an array, with an option to exclude NaN stretches.
 71
 72    :param row: NumPy array of values
 73    :param non_nan_only: If True, only returns stretches of non-NaN values.
 74    :return: List of stretches (start, end, value at start) for all values or non-NaN values only.
 75    """
 76    if row.size == 0:
 77        return []
 78
 79    if non_nan_only:
 80        # Create a boolean mask for non-NaN values
 81        non_nan_mask = ~np.isnan(row)
 82        # Find changes in the mask
 83        changes = np.diff(non_nan_mask.astype(int)) != 0
 84        change_idx = np.nonzero(changes)[0]
 85        starts = np.concatenate(([0], change_idx + 1))
 86        ends = np.concatenate((change_idx, [len(row) - 1]))
 87
 88        # Return only stretches that start with non-NaN values
 89        return [(start, end) for start, end in zip(starts, ends) if non_nan_mask[start]]
 90    else:
 91        # Find change points: where adjacent cells differ.
 92        changes = np.diff(row) != 0
 93        change_idx = np.nonzero(changes)[0]
 94        starts = np.concatenate(([0], change_idx + 1))
 95        ends = np.concatenate((change_idx, [len(row) - 1]))
 96
 97        return [(start, end, row[start]) for start, end in zip(starts, ends)]
 98
 99
100def _seq_names(aln: explore.MSA, ax: plt.Axes, custom_seq_names: tuple, show_seq_names: bool):
101    """
102    Validate custom names and set show names to True. Format axis accordingly.
103    """
104    if custom_seq_names:
105        show_seq_names = True
106        if not isinstance(custom_seq_names, tuple):
107            raise ValueError('configure your custom names list: custom_names=(name1, name2...)')
108        if len(custom_seq_names) != len(aln.alignment.keys()):
109            raise ValueError('length of sequences not equal to number of custom names')
110    if show_seq_names:
111        ax.yaxis.set_ticks_position('none')
112        ax.set_yticks(np.arange(len(aln.alignment)) + 0.4)
113        if custom_seq_names:
114            ax.set_yticklabels(custom_seq_names[::-1])
115        else:
116            names = [x.split(' ')[0] for x in list(aln.alignment.keys())[::-1]]
117            ax.set_yticklabels(names)
118    else:
119        ax.set_yticks([])
120
121
122def _create_identity_patch(row, aln: explore.MSA, col: list, zoom: tuple[int, int], y_position: float | int, reference_color: str, seq_name: str, identity_color: str | ndarray, show_gaps: bool):
123    """
124    Creates the initial patch.
125    """
126    if show_gaps:
127        # plot a rectangle for parts that do not have gaps
128        for stretch in _find_stretches(row, True):
129            col.append(patches.Rectangle((stretch[0] + zoom[0] - 0.5, y_position), stretch[1] - stretch[0] + 1, 0.8,
130                                                           facecolor=reference_color if seq_name == aln.reference_id else identity_color
131                                                           )
132
133                                         )
134    # just plot a rectangle
135    else:
136        col.append(patches.Rectangle((zoom[0] - 0.5, y_position), zoom[1] - zoom[0], 0.8,
137                                     facecolor=reference_color if seq_name == aln.reference_id else identity_color
138                                     )
139                   )
140
141
142def _create_polygons(stretches: list, identity_values: list | ndarray, zoom: tuple, y_position: int, polygons: list, aln_colors: dict | ScalarMappable, polygon_colors: list, detected_identity_values: set | None = None):
143    """
144    create the individual polygons
145    """
146
147    for start, end, value in stretches:
148        if value not in identity_values:
149            continue
150        if detected_identity_values is not None:
151            detected_identity_values.add(value)
152        width = end + 1 - start
153        # Calculate x coordinates adjusted for zoom and centering
154        x0 = start + zoom[0] - 0.5
155        x1 = x0 + width
156        # Define the rectangle corners
157        rect_coords = [
158            (x0, y_position),
159            (x1, y_position),
160            (x1, y_position + 0.8),
161            (x0, y_position + 0.8),
162            (x0, y_position)
163        ]
164        polygons.append(rect_coords)
165        if type(aln_colors) != ScalarMappable:
166            polygon_colors.append(aln_colors[value]['color'])
167        else:
168            polygon_colors.append(aln_colors.to_rgba(value))
169
170
171def _plot_annotation(annotation_dict: dict, ax: plt.Axes, direction_marker_size: int | None, color: str | ScalarMappable):
172    """
173    Plot annotations
174    :param annotation_dict: dict of annotations
175    :param ax: matplotlib Axes
176    :param direction_marker_size: size of marker
177    :param color: color of annotation (color or scalar)
178    """
179    for annotation in annotation_dict:
180        for locations in annotation_dict[annotation]['location']:
181            x_value = locations[0]
182            length = locations[1] - locations[0]
183            ax.add_patch(
184                patches.FancyBboxPatch(
185                    (x_value, annotation_dict[annotation]['track'] + 1),
186                    length,
187                    0.8,
188                    boxstyle="Round, pad=0",
189                    ec="black",
190                    fc=color.to_rgba(annotation_dict[annotation]['conservation']) if isinstance(color, ScalarMappable) else color,
191                )
192            )
193            if direction_marker_size is not None:
194                if annotation_dict[annotation]['strand'] == '-':
195                    marker = '<'
196                else:
197                    marker = '>'
198                ax.plot(x_value + length/2, annotation_dict[annotation]['track'] + 1.4, marker=marker, markersize=direction_marker_size, color='white', markeredgecolor='black')
199
200        # plot linked annotations (such as splicing)
201        if len(annotation_dict[annotation]['location']) > 1:
202            y_value = annotation_dict[annotation]['track'] + 1.4
203            start = None
204            for locations in annotation_dict[annotation]['location']:
205                if start is None:
206                    start = locations[1]
207                    continue
208                ax.plot([start, locations[0]], [y_value, y_value], '--', linewidth=2, color='black')
209                start = locations[1]
210
211
212def _add_track_positions(annotation_dic):
213    # create a dict and sort
214    annotation_dic = dict(sorted(annotation_dic.items(), key=lambda x: x[1]['location'][0][0]))
215
216    # remember for each track the largest stop
217    track_stops = [0]
218
219    for ann in annotation_dic:
220        flattened_locations = list(chain.from_iterable(annotation_dic[ann]['location']))  # flatten list
221        track = 0
222        # check if a start of a gene is smaller than the stop of the current track
223        # -> move to new track
224        while flattened_locations[0] < track_stops[track]:
225            track += 1
226            # if all prior tracks are potentially causing an overlap
227            # create a new track and break
228            if len(track_stops) <= track:
229                track_stops.append(0)
230                break
231        # in the current track remember the stop of the current gene
232        track_stops[track] = flattened_locations[-1]
233        # and indicate the track in the dict
234        annotation_dic[ann]['track'] = track
235
236    return annotation_dic
237
238
239def _get_contrast_text_color(rgba_color):
240    """
241    compute the brightness of a color
242    """
243    r, g, b, a = rgba_color
244    brightness = (r * 299 + g * 587 + b * 114) / 1000
245
246    return 'white' if brightness < 0.5 else 'black'
247
248
249def _plot_sequence_text(aln: explore.MSA, seq_name: str, ref_name: str, always_text: bool, values: ndarray, matrix: ndarray, ax:plt.Axes, zoom: tuple, y_position:int, value_to_skip: int, ref_color:str, show_gaps: bool, cmap: None | ScalarMappable = None, colorscheme: None | dict = None):
250    """
251    Plot sequence text - however this will be done even if there is not enough space.
252    Might need some rework in the future.
253    """
254    x_text = 0
255    if seq_name == ref_name:
256        different_cols = np.any((matrix != value_to_skip) & ~np.isnan(matrix), axis=0)
257    else:
258        different_cols = [False]*aln.length
259
260    for idx, (character, value) in enumerate(zip(aln.alignment[seq_name], values)):
261        if value != value_to_skip and character != '-' or seq_name == ref_name and character != '-' or character == '-'and not show_gaps or always_text and character != '-':
262            # text color
263            if seq_name == ref_name:
264                text_color = _get_contrast_text_color(to_rgba(ref_color))
265            elif cmap is not None:
266                text_color = _get_contrast_text_color(cmap.to_rgba(value))
267            else:
268                text_color = _get_contrast_text_color(to_rgba(colorscheme[value]['color']))
269
270            ax.text(
271                x=x_text + zoom[0] if zoom is not None else x_text,
272                y=y_position + 0.4,
273                s=character,
274                fontweight='bold' if different_cols[idx] else 'normal',
275                ha='center',
276                va='center_baseline',
277                c=text_color if value != value_to_skip or seq_name == ref_name else 'dimgrey'
278            )
279        x_text += 1
280
281
282def identity_alignment(aln: explore.MSA | str, ax: plt.Axes | None = None, show_title: bool = True, show_identity_sequence: bool = False, show_sequence_all: bool = False, show_seq_names: bool = False, custom_seq_names: tuple | list = (), reference_color: str = 'lightsteelblue', show_mask:bool = True, show_gaps:bool = True, fancy_gaps:bool = False, show_mismatches: bool = True, show_ambiguities: bool = False, color_scheme: str | None = None, show_x_label: bool = True, show_legend: bool = False, bbox_to_anchor: tuple[float|int, float|int] | list[float|int, float|int]= (1, 1)):
283    """
284    Generates an identity alignment overview plot.
285    :param aln: alignment MSA class or path
286    :param ax: matplotlib axes
287    :param show_title: whether to show title
288    :param show_seq_names: whether to show seq names
289    :param show_identity_sequence: whether to show sequence for only differences and reference - zoom in to avoid plotting issues
290    :param show_sequence_all: whether to show all sequences - zoom in to avoid plotting issues
291    :param custom_seq_names: custom seq names
292    :param reference_color: color of reference sequence
293    :param show_mask: whether to show N or X chars otherwise it will be shown as match or mismatch
294    :param show_gaps: whether to show gaps otherwise it will be shown as match or mismatch
295    :param fancy_gaps: show gaps with a small black bar
296    :param show_mismatches: whether to show mismatches otherwise it will be shown as match
297    :param show_ambiguities: whether to show non-N ambiguities -> only relevant for RNA/DNA sequences
298    :param color_scheme: color mismatching chars with their unique color. Options for DNA/RNA are: standard, purine_pyrimidine, strong_weak; and for AS: standard, clustal, zappo, hydrophobicity
299    :param show_x_label: whether to show x label
300    :param show_legend: whether to show the legend
301    :param bbox_to_anchor: bounding box coordinates for the legend - see: https://matplotlib.org/stable/api/legend_api.html
302    """
303
304    # Validate inputs and colors
305    if type(aln) is str:
306        if os.path.exists(aln):
307            aln = explore.MSA(aln)
308        else:
309            raise FileNotFoundError(f'File {aln} does not exist')
310
311    _validate_input_parameters(aln, ax)
312    if ax is None:
313        ax = plt.gca()
314    if not is_color_like(reference_color):
315        raise ValueError(f'{reference_color} for reference is not a color')
316
317    if color_scheme is not None:
318        if aln.aln_type == 'AA' and color_scheme not in ['standard', 'clustal', 'zappo', 'hydrophobicity']:
319            raise ValueError(f'{color_scheme} is not a supported coloring scheme for {aln.aln_type} alignments. Supported are: standard, clustal, zappo, hydrophobicity')
320        elif aln.aln_type in ['DNA', 'RNA'] and color_scheme not in ['standard', 'purine_pyrimidine', 'strong_weak']:
321            raise ValueError(f'{color_scheme} is not a supported coloring scheme for {aln.aln_type} alignments. Supported are: standard, purine_pyrimidine, strong_weak')
322
323    # Both options for gaps work hand in hand
324    if fancy_gaps:
325        show_gaps = True
326
327    # Determine zoom
328    zoom = (0, aln.length) if aln.zoom is None else aln.zoom
329
330    # Set up color mapping and identity values
331    aln_colors = config.IDENTITY_COLORS.copy()
332    identity_values = [-1, -2, -3]  # -1 = mismatch, -2 = mask, -3 ambiguity
333    if color_scheme is not None:
334        colors_to_extend = config.CHAR_COLORS[aln.aln_type][color_scheme]
335        identity_values += [x + 1 for x in range(len(colors_to_extend))] # x+1 is needed to allow correct mapping
336        # use the standard setting for the index (same as in aln.calc_identity_alignment)
337        # and map the corresponding color scheme to it
338        for idx, char in enumerate(config.CHAR_COLORS[aln.aln_type]['standard']):
339            aln_colors[idx + 1] = {'type': char, 'color': colors_to_extend[char]}
340
341    # Compute identity alignment
342    identity_aln = aln.calc_identity_alignment(
343        encode_mask=show_mask,
344        encode_gaps=show_gaps,
345        encode_mismatches=show_mismatches,
346        encode_ambiguities=show_ambiguities,
347        encode_each_mismatch_char=True if color_scheme is not None else False
348    )
349
350    # List to store polygons
351    detected_identity_values = {0}
352    polygons, polygon_colors, patch_list = [], [], []
353
354    for i, seq_name in enumerate(aln.alignment):
355        y_position = len(aln.alignment) - i - 1
356        row = identity_aln[i]
357
358        # plot a line below everything
359        if fancy_gaps:
360            ax.hlines(
361                y_position + 0.4,
362                xmin=zoom[0] - 0.5,
363                xmax=zoom[1] + 0.5,
364                color='black',
365                linestyle='-',
366                zorder=0,
367                linewidth=0.75
368            )
369
370        # plot the basic shape per sequence with gaps
371        _create_identity_patch(row, aln, patch_list, zoom, y_position, reference_color, seq_name, aln_colors[0]['color'], show_gaps)
372
373        # find consecutive stretches
374        stretches = _find_stretches(row)
375        # create polygons per stretch
376        _create_polygons(stretches, identity_values, zoom, y_position, polygons, aln_colors, polygon_colors, detected_identity_values)
377
378        # add sequence text
379        if show_identity_sequence or show_sequence_all:
380            _plot_sequence_text(aln, list(aln.alignment.keys())[i], aln.reference_id, show_sequence_all, identity_aln[i], identity_aln, ax, zoom, y_position, 0, reference_color, show_gaps, colorscheme=aln_colors)
381
382    # Create the LineCollection: each segment is drawn in a single call.
383    ax.add_collection(PatchCollection(patch_list, match_original=True, linewidths='none', joinstyle='miter', capstyle='butt'))
384    ax.add_collection(PolyCollection(polygons, facecolors=polygon_colors, linewidths=0.5, edgecolors=polygon_colors))
385
386    # custom legend
387    if show_legend:
388        if color_scheme is not None and color_scheme != 'standard':
389            for x in aln_colors:
390                for group in config.CHAR_GROUPS[aln.aln_type][color_scheme]:
391                    if aln_colors[x]['type'] in config.CHAR_GROUPS[aln.aln_type][color_scheme][group]:
392                        aln_colors[x]['type'] = group
393                        break
394        # create it
395        handels, labels, detected_groups = [], [], set()
396        for x in aln_colors:
397            if x in detected_identity_values and aln_colors[x]['type'] not in detected_groups:
398                handels.append(
399                    ax.add_line(
400                        plt.Line2D(
401                            [],
402                            [],
403                            color=aln_colors[x]['color'] if color_scheme != 'hydrophobicity' or x == 0 else config.CHAR_COLORS[aln.aln_type]['hydrophobicity'][
404                                config.CHAR_GROUPS[aln.aln_type]['hydrophobicity'][aln_colors[x]['type']][0]
405                            ],
406                            marker='s',
407                            markeredgecolor='grey',
408                            linestyle='',
409                            markersize=10))
410                )
411                labels.append(aln_colors[x]['type'])
412                detected_groups.add(aln_colors[x]['type'])
413
414        # ncols
415        if color_scheme is None or aln.aln_type != 'AA':
416            ncols = len(detected_identity_values)
417        elif color_scheme == 'standard':
418            ncols = (len(detected_identity_values) + 1) / 2
419        else:
420            ncols = (len(detected_groups) + 1) / 2
421
422        # plot it
423        ax.legend(
424            handels,
425            labels,
426            loc='lower right',
427            bbox_to_anchor=bbox_to_anchor,
428            ncols=ncols,
429            frameon=False
430        )
431    _seq_names(aln, ax, custom_seq_names, show_seq_names)
432
433    # configure axis
434    ax.set_ylim(0, len(aln.alignment))
435    if show_title:
436        ax.set_title('identity', loc='left')
437    _format_x_axis(aln, ax, show_x_label, show_left=False)
438
439    return ax
440
441
442def similarity_alignment(aln: explore.MSA | str, ax: plt.Axes | None = None, matrix_type: str | None = None, show_title: bool = True, show_similarity_sequence: bool = False, show_sequence_all: bool = False, show_seq_names: bool = False, custom_seq_names: tuple | list = (), reference_color: str = 'lightsteelblue', cmap: str = 'twilight_r', show_gaps:bool = True, fancy_gaps:bool = False, show_x_label: bool = True, show_cbar: bool = False, cbar_fraction: float = 0.1):
443    """
444    Generates a similarity alignment overview plot. Importantly the similarity values are normalized!
445    :param aln: alignment MSA class or path
446    :param ax: matplotlib axes
447    :param matrix_type: substitution matrix - see config.SUBS_MATRICES, standard: NT - TRANS, AA - BLOSUM65
448    :param show_title: whether to show title
449    :param show_similarity_sequence: whether to show sequence only for differences and reference - zoom in to avoid plotting issues
450    :param show_sequence_all: whether to show all sequences - zoom in to avoid plotting issues
451    :param show_seq_names: whether to show seq names
452    :param custom_seq_names: custom seq names
453    :param reference_color: color of reference sequence
454    :param cmap: color mapping for % identity - see https://matplotlib.org/stable/users/explain/colors/colormaps.html
455    :param show_gaps: whether to show gaps otherwise it will be ignored
456    :param fancy_gaps: show gaps with a small black bar
457    :param show_x_label: whether to show x label
458    :param show_cbar: whether to show the legend - see https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.colorbar.html
459    :param cbar_fraction: fraction of the original ax reserved for the legend
460    """
461    # input check
462    if type(aln) is str:
463        if os.path.exists(aln):
464            aln = explore.MSA(aln)
465        else:
466            raise FileNotFoundError(f'File {aln} does not exist')
467    _validate_input_parameters(aln, ax)
468    if ax is None:
469        ax = plt.gca()
470    # validate colors
471    if not is_color_like(reference_color):
472        raise ValueError(f'{reference_color} for reference is not a color')
473
474    # Both options for gaps work hand in hand
475    if fancy_gaps:
476        show_gaps = True
477
478    # define zoom to plot
479    if aln.zoom is None:
480        zoom = (0, aln.length)
481    else:
482        zoom = aln.zoom
483
484    # get data
485    similarity_aln = aln.calc_similarity_alignment(matrix_type=matrix_type)  # use normalized values here
486    similarity_aln = similarity_aln.round(2)  # round data for good color mapping
487
488    # determine min max values of the underlying matrix and create cmap
489    min_value, max_value = 0, 1
490    cmap = ScalarMappable(
491        norm=Normalize(
492            vmin=min_value,
493            vmax=max_value
494        ),
495        cmap=plt.get_cmap(cmap)
496    )
497
498    # create similarity values
499    similarity_values = np.arange(start=min_value, stop=max_value, step=0.01)
500    # round it to be absolutely sure that values match with rounded sim alignment
501    similarity_values = similarity_values.round(2)
502
503    # create plot
504    polygons, polygon_colors, patch_list = [], [], []
505
506    for i, seq_name in enumerate(aln.alignment):
507        y_position = len(aln.alignment) - i - 1
508        row = similarity_aln[i]
509
510        # plot a line below everything
511        if fancy_gaps:
512            ax.hlines(
513                y_position + 0.4,
514                xmin=zoom[0] - 0.5,
515                xmax=zoom[1] + 0.5,
516                color='black',
517                linestyle='-',
518                zorder=0,
519                linewidth=0.75
520            )
521
522        # plot the basic shape per sequence with gaps
523        _create_identity_patch(row, aln, patch_list, zoom, y_position, reference_color, seq_name,
524                               cmap.to_rgba(max_value) if seq_name != aln.reference_id else reference_color,
525                               show_gaps)
526
527        # find consecutive stretches
528        stretches = _find_stretches(row)
529        # create polygons per stretch
530        _create_polygons(stretches, similarity_values, zoom, y_position, polygons, cmap, polygon_colors)
531
532        # add sequence text
533        if show_sequence_all or show_similarity_sequence:
534            _plot_sequence_text(aln, list(aln.alignment.keys())[i], aln.reference_id, show_sequence_all, similarity_aln[i], similarity_aln, ax,
535                                zoom, y_position, 1, reference_color, show_gaps, cmap)
536
537    # Create the LineCollection: each segment is drawn in a single call.
538    ax.add_collection(PatchCollection(patch_list, match_original=True, linewidths='none', joinstyle='miter', capstyle='butt'))
539    ax.add_collection(PolyCollection(polygons, facecolors=polygon_colors, linewidths=0.5, edgecolors=polygon_colors))
540
541    # legend
542    if show_cbar:
543        cbar = plt.colorbar(cmap, ax=ax, location= 'top', anchor=(1,0), shrink=0.2, pad=2/ax.bbox.height, fraction=cbar_fraction)
544        cbar.set_ticks([min_value, max_value])
545        cbar.set_ticklabels(['low', 'high'])
546
547    # format seq names
548    _seq_names(aln, ax, custom_seq_names, show_seq_names)
549
550    # configure axis
551    ax.set_ylim(0, len(aln.alignment))
552    if show_title:
553        ax.set_title('similarity', loc='left')
554    _format_x_axis(aln, ax, show_x_label, show_left=False)
555
556    return ax
557
558
559def _moving_average(arr: ndarray, window_size: int, zoom: tuple | None, aln_length: int) -> tuple[ndarray, ndarray]:
560    """
561    Calculate the moving average of an array.
562    :param arr: array with values
563    :param window_size: size of the moving average
564    :param zoom: zoom of the alignment
565    :param aln_length: length of the alignment
566    :return: new array with moving average
567    """
568    if window_size > 1:
569        i = 0
570        moving_averages, plotting_idx = [], []
571        while i < len(arr) + 1:
572            half_window_size = window_size // 2
573            window_left = arr[i - half_window_size : i] if i > half_window_size else arr[0:i]
574            window_right = arr[i: i + half_window_size] if i < len(arr) - half_window_size else arr[i: len(arr)]
575            moving_averages.append((sum(window_left) + sum(window_right)) / (len(window_left) + len(window_right)))
576            plotting_idx.append(i)
577            i += 1
578
579        return np.array(moving_averages), np.array(plotting_idx) if zoom is None else np.array(plotting_idx) + zoom[0]
580    else:
581        return arr, np.arange(zoom[0], zoom[1]) if zoom is not None else np.arange(aln_length)
582
583
584def stat_plot(aln: explore.MSA | str, stat_type: str, ax: plt.Axes | None = None, line_color: str = 'burlywood', line_width: int | float = 2, rolling_average: int = 20, show_x_label: bool = False, show_title: bool = True):
585    """
586    Generate a plot for the various alignment stats.
587    :param aln: alignment MSA class or path
588    :param ax: matplotlib axes
589    :param stat_type: 'entropy', 'gc', 'coverage', 'ts/tv', 'identity' or 'similarity' -> (here default matrices are used NT - TRANS, AA - BLOSUM65)
590    :param line_color: color of the line
591    :param line_width: width of the line
592    :param rolling_average: average rolling window size left and right of a position in nucleotides or amino acids
593    :param show_x_label: whether to show the x-axis label
594    :param show_title: whether to show the title
595    """
596
597    # define possible functions to calc here
598    stat_functions: Dict[str, Callable[[], list | ndarray]] = {
599        'gc': aln.calc_gc,
600        'entropy': aln.calc_entropy,
601        'coverage': aln.calc_coverage,
602        'identity': aln.calc_identity_alignment,
603        'similarity': aln.calc_similarity_alignment,
604        'ts tv score': aln.calc_transition_transversion_score
605    }
606
607    if stat_type not in stat_functions:
608        raise ValueError('stat_type must be one of {}'.format(list(stat_functions.keys())))
609
610    # input check
611    if type(aln) is str:
612        if os.path.exists(aln):
613            aln = explore.MSA(aln)
614        else:
615            raise FileNotFoundError(f'File {aln} does not exist')
616    _validate_input_parameters(aln, ax)
617    if ax is None:
618        ax = plt.gca()
619    if not is_color_like(line_color):
620        raise ValueError('line color is not a color')
621
622    # validate rolling average
623    if rolling_average < 1 or rolling_average > aln.length:
624        raise ValueError('rolling_average must be between 1 and length of sequence')
625
626    # generate input data
627    array = stat_functions[stat_type]()
628
629    if stat_type == 'identity':
630        min_value, max_value = -1, 0
631    elif stat_type == 'ts tv score':
632        min_value, max_value = -1, 1
633    else:
634        min_value, max_value = 0, 1
635    if stat_type in ['identity', 'similarity']:
636        # for the mean nan values get handled as the lowest possible number in the matrix
637        array = np.nan_to_num(array, True, min_value)
638        array = np.mean(array, axis=0)
639    data, plot_idx = _moving_average(array, rolling_average, aln.zoom, aln.length)
640
641    # plot the data
642    ax.fill_between(
643        # this add dummy data left and right for better plotting
644        # otherwise only half of the step is shown
645        np.concatenate(([plot_idx[0] - 0.5], plot_idx, [plot_idx[-1] + 0.5])) if rolling_average == 1 else plot_idx,
646        np.concatenate(([data[0]], data, [data[-1]])) if rolling_average == 1 else data,
647        min_value,
648        linewidth = line_width,
649        edgecolor=line_color,
650        step='mid' if rolling_average == 1 else None,
651        facecolor=(line_color, 0.6) if stat_type not in ['ts tv score', 'gc'] else 'none'
652    )
653    if stat_type == 'gc':
654        ax.hlines(0.5, xmin=0, xmax=aln.zoom[0] + aln.length if aln.zoom is not None else aln.length, color='black', linestyles='--', linewidth=1)
655
656    # format axis
657    ax.set_ylim(min_value, max_value*0.1+max_value)
658    ax.set_yticks([min_value, max_value])
659    if stat_type == 'gc':
660        ax.set_yticklabels(['0', '100'])
661    elif stat_type == 'ts tv score':
662        ax.set_yticklabels(['tv', 'ts'])
663    else:
664        ax.set_yticklabels(['low', 'high'])
665
666    # show title
667    if show_title:
668        ax.set_title(
669            f'{stat_type} (average over {rolling_average} positions)' if rolling_average > 1 else f'{stat_type} for each position',
670            loc='left'
671        )
672
673    _format_x_axis(aln, ax, show_x_label, show_left=True)
674
675    return ax
676
677
678def variant_plot(aln: explore.MSA | str, ax: plt.Axes | None = None, lollisize: tuple[int, int] | list[int, int] = (1, 3), color_scheme: str = 'standard', show_x_label: bool = False, show_legend: bool = True, bbox_to_anchor: tuple[float|int, float|int] | list[float|int, float|int] = (1, 1)):
679    """
680    Plots variants.
681    :param aln: alignment MSA class or path
682    :param ax: matplotlib axes
683    :param lollisize: (stem_size, head_size)
684    :param color_scheme: color scheme for characters. see config.CHAR_COLORS for available options
685    :param show_x_label:  whether to show the x-axis label
686    :param show_legend: whether to show the legend
687    :param bbox_to_anchor: bounding box coordinates for the legend - see: https://matplotlib.org/stable/api/legend_api.html
688    """
689
690    # validate input
691    if type(aln) is str:
692        if os.path.exists(aln):
693            aln = explore.MSA(aln)
694        else:
695            raise FileNotFoundError(f'File {aln} does not exist')
696    _validate_input_parameters(aln, ax)
697    if ax is None:
698        ax = plt.gca()
699    if not isinstance(lollisize, tuple) or len(lollisize) != 2:
700        raise ValueError('lollisize must be tuple of length 2 (stem, head)')
701    for _size in lollisize:
702        if not isinstance(_size, float | int) or _size <= 0:
703            raise ValueError('lollisize must be floats greater than zero')
704
705    # define colors
706    colors = config.CHAR_COLORS[aln.aln_type][color_scheme]
707    # get snps
708    snps = aln.get_snps()
709    # define where to plot (each ref type gets a separate line)
710    ref_y_positions, y_pos, detected_var = {}, 0, set()
711
712    # iterate over snp dict
713    for pos in snps['POS']:
714        for identifier in snps['POS'][pos]:
715            # fill in y pos dict
716            if identifier == 'ref':
717                if snps['POS'][pos]['ref'] not in ref_y_positions:
718                    ref_y_positions[snps['POS'][pos]['ref']] = y_pos
719                    y_pos += 1.1
720                    continue
721            # plot
722            if identifier == 'ALT':
723                for alt in snps['POS'][pos]['ALT']:
724                    ax.vlines(x=pos + aln.zoom[0] if aln.zoom is not None else pos,
725                              ymin=ref_y_positions[snps['POS'][pos]['ref']],
726                              ymax=ref_y_positions[snps['POS'][pos]['ref']] + snps['POS'][pos]['ALT'][alt]['AF'],
727                              color=colors[alt],
728                              zorder=100,
729                              linewidth=lollisize[0]
730                              )
731                    ax.plot(pos + aln.zoom[0] if aln.zoom is not None else pos,
732                            ref_y_positions[snps['POS'][pos]['ref']] + snps['POS'][pos]['ALT'][alt]['AF'],
733                            color=colors[alt],
734                            marker='o',
735                            markersize=lollisize[1]
736                            )
737                    detected_var.add(alt)
738
739    # plot hlines
740    for y_char in ref_y_positions:
741        ax.hlines(
742            ref_y_positions[y_char],
743            xmin=aln.zoom[0] - 0.5 if aln.zoom is not None else -0.5,
744            xmax=aln.zoom[0] + aln.length + 0.5 if aln.zoom is not None else aln.length + 0.5,
745            color='black',
746            linestyle='-',
747            zorder=0,
748            linewidth=0.75
749        )
750    # create a custom legend
751    if show_legend:
752        custom_legend = [
753            ax.add_line(
754                plt.Line2D(
755                    [],
756                    [],
757                    color=colors[char],
758                    marker='o',
759                    linestyle='',
760                    markersize=5
761                )
762            ) for char in colors if char in detected_var
763        ]
764        ax.legend(
765            custom_legend,
766            [char for char in colors if char in detected_var],  # ensures correct sorting
767            loc='lower right',
768            title='variant',
769            bbox_to_anchor=bbox_to_anchor,
770            ncols=len(detected_var)/2 if aln.aln_type == 'AA' else len(detected_var),
771            frameon=False
772        )
773
774    # format axis
775    _format_x_axis(aln, ax, show_x_label, show_left=False)
776    ax.spines['left'].set_visible(False)
777    ax.set_yticks([ref_y_positions[x] for x in ref_y_positions])
778    ax.set_yticklabels(ref_y_positions.keys())
779    ax.set_ylim(0, y_pos)
780    ax.set_ylabel('reference')
781
782    return ax
783
784
785def orf_plot(aln: explore.MSA | str, ax: plt.Axes | None = None, min_length: int = 500, non_overlapping_orfs: bool = True, cmap: str = 'Blues', direction_marker_size: int | None = 5, show_x_label: bool = False, show_cbar: bool = False, cbar_fraction: float = 0.1):
786    """
787    Plot conserved ORFs.
788    :param aln: alignment MSA class or path
789    :param ax: matplotlib axes
790    :param min_length: minimum length of orf
791    :param non_overlapping_orfs: whether to consider overlapping orfs
792    :param cmap: color mapping for % identity - see https://matplotlib.org/stable/users/explain/colors/colormaps.html
793    :param direction_marker_size: marker size for direction marker, not shown if marker_size == None
794    :param show_x_label: whether to show the x-axis label
795    :param show_cbar: whether to show the colorbar - see https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.colorbar.html
796    :param cbar_fraction: fraction of the original ax reserved for the colorbar
797    """
798
799    # normalize colorbar
800    cmap = ScalarMappable(norm=Normalize(0, 100), cmap=plt.get_cmap(cmap))
801
802    # validate input
803    if type(aln) is str:
804        if os.path.exists(aln):
805            aln = explore.MSA(aln)
806        else:
807            raise FileNotFoundError(f'File {aln} does not exist')
808    _validate_input_parameters(aln, ax)
809    _validate_input_parameters(aln, ax)
810    if ax is None:
811        ax = plt.gca()
812
813    # get orfs --> first deepcopy and reset zoom that the orfs are also zoomed in (by default, the orfs are only
814    # searched within the zoomed region)
815    aln_temp = deepcopy(aln)
816    aln_temp.zoom = None
817    if non_overlapping_orfs:
818        annotation_dict = aln_temp.get_non_overlapping_conserved_orfs(min_length=min_length)
819    else:
820        annotation_dict = aln_temp.get_conserved_orfs(min_length=min_length)
821
822    # filter dict for zoom
823    if aln.zoom is not None:
824        annotation_dict = {key:val for key, val in annotation_dict.items() if max(val['location'][0][0], aln.zoom[0]) <= min(val['location'][0][1], aln.zoom[1])}
825
826    # add track for plotting
827    _add_track_positions(annotation_dict)
828
829    # plot
830    _plot_annotation(annotation_dict, ax, direction_marker_size=direction_marker_size, color=cmap)
831
832    # legend
833    if show_cbar:
834        cbar = plt.colorbar(cmap,ax=ax, location= 'top', orientation='horizontal', anchor=(1,0), shrink=0.2, pad=2/ax.bbox.height, fraction=cbar_fraction)
835        cbar.set_label('% identity')
836        cbar.set_ticks([0, 100])
837
838    # format axis
839    _format_x_axis(aln, ax, show_x_label, show_left=False)
840    ax.set_ylim(bottom=0.8)
841    ax.set_yticks([])
842    ax.set_yticklabels([])
843    ax.set_title('conserved orfs', loc='left')
844
845    return ax
846
847
848def annotation_plot(aln: explore.MSA | str, annotation: explore.Annotation | str, feature_to_plot: str, ax: plt.Axes | None = None, color: str = 'wheat', direction_marker_size: int | None = 5, show_x_label: bool = False):
849    """
850    Plot annotations from bed, gff or gb files. Are automatically mapped to alignment.
851    :param aln: alignment MSA class
852    :param annotation: annotation class | path to annotation file
853    :param ax: matplotlib axes
854    :param feature_to_plot: potential feature to plot (not for bed files as it is parsed as one feature)
855    :param color: color for the annotation
856    :param direction_marker_size: marker size for direction marker, only relevant if show_direction is True
857    :param show_x_label: whether to show the x-axis label
858    """
859    # helper function
860    def parse_annotation_from_string(path: str, msa: explore.MSA) -> explore.Annotation:
861        """
862        Parse annotation.
863        :param path: path to annotation
864        :param msa: msa object
865        :return: parsed annotation
866        """
867        if os.path.exists(path):
868            # reset zoom so the annotation is correctly parsed
869            msa_temp = deepcopy(msa)
870            msa_temp.zoom = None
871            return explore.Annotation(msa_temp, path)
872        else:
873            raise FileNotFoundError()
874
875    # validate input
876    if type(aln) is str:
877        if os.path.exists(aln):
878            aln = explore.MSA(aln)
879        else:
880            raise FileNotFoundError(f'File {aln} does not exist')
881    if type(annotation) is str:
882        annotation = parse_annotation_from_string(annotation, aln)
883    _validate_input_parameters(aln, ax, annotation)
884    if ax is None:
885        ax = plt.gca()
886    if not is_color_like(color):
887        raise ValueError(f'{color} for reference is not a color')
888
889    # ignore features to plot for bed files (here it is written into one feature)
890    if annotation.ann_type == 'bed':
891        annotation_dict = annotation.features['region']
892        feature_to_plot = 'bed regions'
893    else:
894        # try to subset the annotation dict
895        try:
896            annotation_dict = annotation.features[feature_to_plot]
897        except KeyError:
898            raise KeyError(f'Feature {feature_to_plot} not found. Use annotation.features.keys() to see available features.')
899
900    # plotting and formating
901    _add_track_positions(annotation_dict)
902    _plot_annotation(annotation_dict, ax, direction_marker_size=direction_marker_size, color=color)
903    _format_x_axis(aln, ax, show_x_label, show_left=False)
904    ax.set_ylim(bottom=0.8)
905    ax.set_yticks([])
906    ax.set_yticklabels([])
907    ax.set_title(f'{annotation.locus} ({feature_to_plot})', loc='left')
908
909    return ax
910
911
912def sequence_logo(aln:explore.MSA | str, ax:plt.Axes | None = None, color_scheme: str = 'standard', plot_type: str = 'stacked', show_x_label:bool = False):
913    """
914    Plot sequence logo or stacked area plot (use the first one with appropriate zoom levels). The
915    logo visualizes the relative frequency of nt or aa characters in the alignment. The char frequency
916    is scaled to the information content at each position. --> identical to how Geneious calculates it.
917
918    :param aln: alignment MSA class or path
919    :param ax: matplotlib axes
920    :param color_scheme: color scheme for characters. see config.CHAR_COLORS for available options
921    :param plot_type: 'logo' for sequence logo, 'stacked' for stacked area plot
922    :param show_x_label: whether to show the x-axis label
923    """
924
925    if type(aln) is str:
926        if os.path.exists(aln):
927            aln = explore.MSA(aln)
928        else:
929            raise FileNotFoundError(f'File {aln} does not exist')
930    _validate_input_parameters(aln, ax)
931    if ax is None:
932        ax = plt.gca()
933    # calc matrix
934    matrix = aln.calc_position_matrix('IC') * aln.calc_position_matrix('PPM')
935    letters_to_plot = list(config.CHAR_COLORS[aln.aln_type]['standard'].keys())[:-1]
936
937    #matrix = np.clip(matrix, 0, None)  # only positive values
938    # plot
939    if plot_type == 'logo':
940        for pos in range(matrix.shape[1]):
941            # sort the positive matrix row values by size
942            items = [(letters_to_plot[i], matrix[i, pos]) for i in range(len(letters_to_plot)) if matrix[i, pos] > 0]
943            items.sort(key=lambda x: x[1])
944            # plot each position
945            y_offset = 0
946            for letter, h in items:
947                tp = TextPath((aln.zoom[0] - 0.325 if aln.zoom is not None else - 0.325, 0), letter, size=1, prop=FontProperties(weight='bold'))
948                bb = tp.get_extents()
949                glyph_height = bb.height if bb.height > 0 else 1e-6  # avoid div by zero
950                scale_to_1 = 1.0 / glyph_height
951
952                transform = (Affine2D()
953                             .scale(1.0, h * scale_to_1)  # scale manually by IC and normalize font
954                             .translate(pos, y_offset)
955                             + ax.transData)
956
957                patch = PathPatch(tp, transform=transform,
958                                  facecolor=config.CHAR_COLORS[aln.aln_type][color_scheme][letter],
959                                  edgecolor='none')
960                ax.add_patch(patch)
961                y_offset += h
962    elif plot_type == 'stacked':
963        y_values = np.zeros(matrix.shape[1])
964        x_values = np.arange(0, matrix.shape[1]) if aln.zoom is None else np.arange(aln.zoom[0], aln.zoom[1])
965        for row in range(matrix.shape[0]):
966            y = matrix[row]
967            ax.fill_between(x_values,
968                            y_values,
969                            y_values + y,
970                            fc=config.CHAR_COLORS[aln.aln_type][color_scheme].get(letters_to_plot[row]),
971                            ec='None',
972                            label=letters_to_plot[row],
973                            step='mid')
974            y_values += y
975
976    # adjust limits & labels
977    _format_x_axis(aln, ax, show_x_label, show_left=True)
978    if aln.aln_type == 'AA':
979        ax.set_ylim(bottom=0, top=5)
980    else:
981        ax.set_ylim(bottom=0, top=2)
982    ax.spines['top'].set_visible(False)
983    ax.spines['right'].set_visible(False)
984    ax.set_ylabel('bits')
985
986    return ax
def identity_alignment( aln: msaexplorer.explore.MSA | str, ax: matplotlib.axes._axes.Axes | None = None, show_title: bool = True, show_identity_sequence: bool = False, show_sequence_all: bool = False, show_seq_names: bool = False, custom_seq_names: tuple | list = (), reference_color: str = 'lightsteelblue', show_mask: bool = True, show_gaps: bool = True, fancy_gaps: bool = False, show_mismatches: bool = True, show_ambiguities: bool = False, color_scheme: str | None = None, show_x_label: bool = True, show_legend: bool = False, bbox_to_anchor: tuple[float | int, float | int] | list[float | int, float | int] = (1, 1)):
283def identity_alignment(aln: explore.MSA | str, ax: plt.Axes | None = None, show_title: bool = True, show_identity_sequence: bool = False, show_sequence_all: bool = False, show_seq_names: bool = False, custom_seq_names: tuple | list = (), reference_color: str = 'lightsteelblue', show_mask:bool = True, show_gaps:bool = True, fancy_gaps:bool = False, show_mismatches: bool = True, show_ambiguities: bool = False, color_scheme: str | None = None, show_x_label: bool = True, show_legend: bool = False, bbox_to_anchor: tuple[float|int, float|int] | list[float|int, float|int]= (1, 1)):
284    """
285    Generates an identity alignment overview plot.
286    :param aln: alignment MSA class or path
287    :param ax: matplotlib axes
288    :param show_title: whether to show title
289    :param show_seq_names: whether to show seq names
290    :param show_identity_sequence: whether to show sequence for only differences and reference - zoom in to avoid plotting issues
291    :param show_sequence_all: whether to show all sequences - zoom in to avoid plotting issues
292    :param custom_seq_names: custom seq names
293    :param reference_color: color of reference sequence
294    :param show_mask: whether to show N or X chars otherwise it will be shown as match or mismatch
295    :param show_gaps: whether to show gaps otherwise it will be shown as match or mismatch
296    :param fancy_gaps: show gaps with a small black bar
297    :param show_mismatches: whether to show mismatches otherwise it will be shown as match
298    :param show_ambiguities: whether to show non-N ambiguities -> only relevant for RNA/DNA sequences
299    :param color_scheme: color mismatching chars with their unique color. Options for DNA/RNA are: standard, purine_pyrimidine, strong_weak; and for AS: standard, clustal, zappo, hydrophobicity
300    :param show_x_label: whether to show x label
301    :param show_legend: whether to show the legend
302    :param bbox_to_anchor: bounding box coordinates for the legend - see: https://matplotlib.org/stable/api/legend_api.html
303    """
304
305    # Validate inputs and colors
306    if type(aln) is str:
307        if os.path.exists(aln):
308            aln = explore.MSA(aln)
309        else:
310            raise FileNotFoundError(f'File {aln} does not exist')
311
312    _validate_input_parameters(aln, ax)
313    if ax is None:
314        ax = plt.gca()
315    if not is_color_like(reference_color):
316        raise ValueError(f'{reference_color} for reference is not a color')
317
318    if color_scheme is not None:
319        if aln.aln_type == 'AA' and color_scheme not in ['standard', 'clustal', 'zappo', 'hydrophobicity']:
320            raise ValueError(f'{color_scheme} is not a supported coloring scheme for {aln.aln_type} alignments. Supported are: standard, clustal, zappo, hydrophobicity')
321        elif aln.aln_type in ['DNA', 'RNA'] and color_scheme not in ['standard', 'purine_pyrimidine', 'strong_weak']:
322            raise ValueError(f'{color_scheme} is not a supported coloring scheme for {aln.aln_type} alignments. Supported are: standard, purine_pyrimidine, strong_weak')
323
324    # Both options for gaps work hand in hand
325    if fancy_gaps:
326        show_gaps = True
327
328    # Determine zoom
329    zoom = (0, aln.length) if aln.zoom is None else aln.zoom
330
331    # Set up color mapping and identity values
332    aln_colors = config.IDENTITY_COLORS.copy()
333    identity_values = [-1, -2, -3]  # -1 = mismatch, -2 = mask, -3 ambiguity
334    if color_scheme is not None:
335        colors_to_extend = config.CHAR_COLORS[aln.aln_type][color_scheme]
336        identity_values += [x + 1 for x in range(len(colors_to_extend))] # x+1 is needed to allow correct mapping
337        # use the standard setting for the index (same as in aln.calc_identity_alignment)
338        # and map the corresponding color scheme to it
339        for idx, char in enumerate(config.CHAR_COLORS[aln.aln_type]['standard']):
340            aln_colors[idx + 1] = {'type': char, 'color': colors_to_extend[char]}
341
342    # Compute identity alignment
343    identity_aln = aln.calc_identity_alignment(
344        encode_mask=show_mask,
345        encode_gaps=show_gaps,
346        encode_mismatches=show_mismatches,
347        encode_ambiguities=show_ambiguities,
348        encode_each_mismatch_char=True if color_scheme is not None else False
349    )
350
351    # List to store polygons
352    detected_identity_values = {0}
353    polygons, polygon_colors, patch_list = [], [], []
354
355    for i, seq_name in enumerate(aln.alignment):
356        y_position = len(aln.alignment) - i - 1
357        row = identity_aln[i]
358
359        # plot a line below everything
360        if fancy_gaps:
361            ax.hlines(
362                y_position + 0.4,
363                xmin=zoom[0] - 0.5,
364                xmax=zoom[1] + 0.5,
365                color='black',
366                linestyle='-',
367                zorder=0,
368                linewidth=0.75
369            )
370
371        # plot the basic shape per sequence with gaps
372        _create_identity_patch(row, aln, patch_list, zoom, y_position, reference_color, seq_name, aln_colors[0]['color'], show_gaps)
373
374        # find consecutive stretches
375        stretches = _find_stretches(row)
376        # create polygons per stretch
377        _create_polygons(stretches, identity_values, zoom, y_position, polygons, aln_colors, polygon_colors, detected_identity_values)
378
379        # add sequence text
380        if show_identity_sequence or show_sequence_all:
381            _plot_sequence_text(aln, list(aln.alignment.keys())[i], aln.reference_id, show_sequence_all, identity_aln[i], identity_aln, ax, zoom, y_position, 0, reference_color, show_gaps, colorscheme=aln_colors)
382
383    # Create the LineCollection: each segment is drawn in a single call.
384    ax.add_collection(PatchCollection(patch_list, match_original=True, linewidths='none', joinstyle='miter', capstyle='butt'))
385    ax.add_collection(PolyCollection(polygons, facecolors=polygon_colors, linewidths=0.5, edgecolors=polygon_colors))
386
387    # custom legend
388    if show_legend:
389        if color_scheme is not None and color_scheme != 'standard':
390            for x in aln_colors:
391                for group in config.CHAR_GROUPS[aln.aln_type][color_scheme]:
392                    if aln_colors[x]['type'] in config.CHAR_GROUPS[aln.aln_type][color_scheme][group]:
393                        aln_colors[x]['type'] = group
394                        break
395        # create it
396        handels, labels, detected_groups = [], [], set()
397        for x in aln_colors:
398            if x in detected_identity_values and aln_colors[x]['type'] not in detected_groups:
399                handels.append(
400                    ax.add_line(
401                        plt.Line2D(
402                            [],
403                            [],
404                            color=aln_colors[x]['color'] if color_scheme != 'hydrophobicity' or x == 0 else config.CHAR_COLORS[aln.aln_type]['hydrophobicity'][
405                                config.CHAR_GROUPS[aln.aln_type]['hydrophobicity'][aln_colors[x]['type']][0]
406                            ],
407                            marker='s',
408                            markeredgecolor='grey',
409                            linestyle='',
410                            markersize=10))
411                )
412                labels.append(aln_colors[x]['type'])
413                detected_groups.add(aln_colors[x]['type'])
414
415        # ncols
416        if color_scheme is None or aln.aln_type != 'AA':
417            ncols = len(detected_identity_values)
418        elif color_scheme == 'standard':
419            ncols = (len(detected_identity_values) + 1) / 2
420        else:
421            ncols = (len(detected_groups) + 1) / 2
422
423        # plot it
424        ax.legend(
425            handels,
426            labels,
427            loc='lower right',
428            bbox_to_anchor=bbox_to_anchor,
429            ncols=ncols,
430            frameon=False
431        )
432    _seq_names(aln, ax, custom_seq_names, show_seq_names)
433
434    # configure axis
435    ax.set_ylim(0, len(aln.alignment))
436    if show_title:
437        ax.set_title('identity', loc='left')
438    _format_x_axis(aln, ax, show_x_label, show_left=False)
439
440    return ax

Generates an identity alignment overview plot.

Parameters
  • aln: alignment MSA class or path
  • ax: matplotlib axes
  • show_title: whether to show title
  • show_seq_names: whether to show seq names
  • show_identity_sequence: whether to show sequence for only differences and reference - zoom in to avoid plotting issues
  • show_sequence_all: whether to show all sequences - zoom in to avoid plotting issues
  • custom_seq_names: custom seq names
  • reference_color: color of reference sequence
  • show_mask: whether to show N or X chars otherwise it will be shown as match or mismatch
  • show_gaps: whether to show gaps otherwise it will be shown as match or mismatch
  • fancy_gaps: show gaps with a small black bar
  • show_mismatches: whether to show mismatches otherwise it will be shown as match
  • show_ambiguities: whether to show non-N ambiguities -> only relevant for RNA/DNA sequences
  • color_scheme: color mismatching chars with their unique color. Options for DNA/RNA are: standard, purine_pyrimidine, strong_weak; and for AS: standard, clustal, zappo, hydrophobicity
  • show_x_label: whether to show x label
  • show_legend: whether to show the legend
  • bbox_to_anchor: bounding box coordinates for the legend - see: https: //matplotlib.org/stable/api/legend_api.html
def similarity_alignment( aln: msaexplorer.explore.MSA | str, ax: matplotlib.axes._axes.Axes | None = None, matrix_type: str | None = None, show_title: bool = True, show_similarity_sequence: bool = False, show_sequence_all: bool = False, show_seq_names: bool = False, custom_seq_names: tuple | list = (), reference_color: str = 'lightsteelblue', cmap: str = 'twilight_r', show_gaps: bool = True, fancy_gaps: bool = False, show_x_label: bool = True, show_cbar: bool = False, cbar_fraction: float = 0.1):
443def similarity_alignment(aln: explore.MSA | str, ax: plt.Axes | None = None, matrix_type: str | None = None, show_title: bool = True, show_similarity_sequence: bool = False, show_sequence_all: bool = False, show_seq_names: bool = False, custom_seq_names: tuple | list = (), reference_color: str = 'lightsteelblue', cmap: str = 'twilight_r', show_gaps:bool = True, fancy_gaps:bool = False, show_x_label: bool = True, show_cbar: bool = False, cbar_fraction: float = 0.1):
444    """
445    Generates a similarity alignment overview plot. Importantly the similarity values are normalized!
446    :param aln: alignment MSA class or path
447    :param ax: matplotlib axes
448    :param matrix_type: substitution matrix - see config.SUBS_MATRICES, standard: NT - TRANS, AA - BLOSUM65
449    :param show_title: whether to show title
450    :param show_similarity_sequence: whether to show sequence only for differences and reference - zoom in to avoid plotting issues
451    :param show_sequence_all: whether to show all sequences - zoom in to avoid plotting issues
452    :param show_seq_names: whether to show seq names
453    :param custom_seq_names: custom seq names
454    :param reference_color: color of reference sequence
455    :param cmap: color mapping for % identity - see https://matplotlib.org/stable/users/explain/colors/colormaps.html
456    :param show_gaps: whether to show gaps otherwise it will be ignored
457    :param fancy_gaps: show gaps with a small black bar
458    :param show_x_label: whether to show x label
459    :param show_cbar: whether to show the legend - see https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.colorbar.html
460    :param cbar_fraction: fraction of the original ax reserved for the legend
461    """
462    # input check
463    if type(aln) is str:
464        if os.path.exists(aln):
465            aln = explore.MSA(aln)
466        else:
467            raise FileNotFoundError(f'File {aln} does not exist')
468    _validate_input_parameters(aln, ax)
469    if ax is None:
470        ax = plt.gca()
471    # validate colors
472    if not is_color_like(reference_color):
473        raise ValueError(f'{reference_color} for reference is not a color')
474
475    # Both options for gaps work hand in hand
476    if fancy_gaps:
477        show_gaps = True
478
479    # define zoom to plot
480    if aln.zoom is None:
481        zoom = (0, aln.length)
482    else:
483        zoom = aln.zoom
484
485    # get data
486    similarity_aln = aln.calc_similarity_alignment(matrix_type=matrix_type)  # use normalized values here
487    similarity_aln = similarity_aln.round(2)  # round data for good color mapping
488
489    # determine min max values of the underlying matrix and create cmap
490    min_value, max_value = 0, 1
491    cmap = ScalarMappable(
492        norm=Normalize(
493            vmin=min_value,
494            vmax=max_value
495        ),
496        cmap=plt.get_cmap(cmap)
497    )
498
499    # create similarity values
500    similarity_values = np.arange(start=min_value, stop=max_value, step=0.01)
501    # round it to be absolutely sure that values match with rounded sim alignment
502    similarity_values = similarity_values.round(2)
503
504    # create plot
505    polygons, polygon_colors, patch_list = [], [], []
506
507    for i, seq_name in enumerate(aln.alignment):
508        y_position = len(aln.alignment) - i - 1
509        row = similarity_aln[i]
510
511        # plot a line below everything
512        if fancy_gaps:
513            ax.hlines(
514                y_position + 0.4,
515                xmin=zoom[0] - 0.5,
516                xmax=zoom[1] + 0.5,
517                color='black',
518                linestyle='-',
519                zorder=0,
520                linewidth=0.75
521            )
522
523        # plot the basic shape per sequence with gaps
524        _create_identity_patch(row, aln, patch_list, zoom, y_position, reference_color, seq_name,
525                               cmap.to_rgba(max_value) if seq_name != aln.reference_id else reference_color,
526                               show_gaps)
527
528        # find consecutive stretches
529        stretches = _find_stretches(row)
530        # create polygons per stretch
531        _create_polygons(stretches, similarity_values, zoom, y_position, polygons, cmap, polygon_colors)
532
533        # add sequence text
534        if show_sequence_all or show_similarity_sequence:
535            _plot_sequence_text(aln, list(aln.alignment.keys())[i], aln.reference_id, show_sequence_all, similarity_aln[i], similarity_aln, ax,
536                                zoom, y_position, 1, reference_color, show_gaps, cmap)
537
538    # Create the LineCollection: each segment is drawn in a single call.
539    ax.add_collection(PatchCollection(patch_list, match_original=True, linewidths='none', joinstyle='miter', capstyle='butt'))
540    ax.add_collection(PolyCollection(polygons, facecolors=polygon_colors, linewidths=0.5, edgecolors=polygon_colors))
541
542    # legend
543    if show_cbar:
544        cbar = plt.colorbar(cmap, ax=ax, location= 'top', anchor=(1,0), shrink=0.2, pad=2/ax.bbox.height, fraction=cbar_fraction)
545        cbar.set_ticks([min_value, max_value])
546        cbar.set_ticklabels(['low', 'high'])
547
548    # format seq names
549    _seq_names(aln, ax, custom_seq_names, show_seq_names)
550
551    # configure axis
552    ax.set_ylim(0, len(aln.alignment))
553    if show_title:
554        ax.set_title('similarity', loc='left')
555    _format_x_axis(aln, ax, show_x_label, show_left=False)
556
557    return ax

Generates a similarity alignment overview plot. Importantly the similarity values are normalized!

Parameters
  • aln: alignment MSA class or path
  • ax: matplotlib axes
  • matrix_type: substitution matrix - see config.SUBS_MATRICES, standard: NT - TRANS, AA - BLOSUM65
  • show_title: whether to show title
  • show_similarity_sequence: whether to show sequence only for differences and reference - zoom in to avoid plotting issues
  • show_sequence_all: whether to show all sequences - zoom in to avoid plotting issues
  • show_seq_names: whether to show seq names
  • custom_seq_names: custom seq names
  • reference_color: color of reference sequence
  • cmap: color mapping for % identity - see https: //matplotlib.org/stable/users/explain/colors/colormaps.html
  • show_gaps: whether to show gaps otherwise it will be ignored
  • fancy_gaps: show gaps with a small black bar
  • show_x_label: whether to show x label
  • show_cbar: whether to show the legend - see https: //matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.colorbar.html
  • cbar_fraction: fraction of the original ax reserved for the legend
def stat_plot( aln: msaexplorer.explore.MSA | str, stat_type: str, ax: matplotlib.axes._axes.Axes | None = None, line_color: str = 'burlywood', line_width: int | float = 2, rolling_average: int = 20, show_x_label: bool = False, show_title: bool = True):
585def stat_plot(aln: explore.MSA | str, stat_type: str, ax: plt.Axes | None = None, line_color: str = 'burlywood', line_width: int | float = 2, rolling_average: int = 20, show_x_label: bool = False, show_title: bool = True):
586    """
587    Generate a plot for the various alignment stats.
588    :param aln: alignment MSA class or path
589    :param ax: matplotlib axes
590    :param stat_type: 'entropy', 'gc', 'coverage', 'ts/tv', 'identity' or 'similarity' -> (here default matrices are used NT - TRANS, AA - BLOSUM65)
591    :param line_color: color of the line
592    :param line_width: width of the line
593    :param rolling_average: average rolling window size left and right of a position in nucleotides or amino acids
594    :param show_x_label: whether to show the x-axis label
595    :param show_title: whether to show the title
596    """
597
598    # define possible functions to calc here
599    stat_functions: Dict[str, Callable[[], list | ndarray]] = {
600        'gc': aln.calc_gc,
601        'entropy': aln.calc_entropy,
602        'coverage': aln.calc_coverage,
603        'identity': aln.calc_identity_alignment,
604        'similarity': aln.calc_similarity_alignment,
605        'ts tv score': aln.calc_transition_transversion_score
606    }
607
608    if stat_type not in stat_functions:
609        raise ValueError('stat_type must be one of {}'.format(list(stat_functions.keys())))
610
611    # input check
612    if type(aln) is str:
613        if os.path.exists(aln):
614            aln = explore.MSA(aln)
615        else:
616            raise FileNotFoundError(f'File {aln} does not exist')
617    _validate_input_parameters(aln, ax)
618    if ax is None:
619        ax = plt.gca()
620    if not is_color_like(line_color):
621        raise ValueError('line color is not a color')
622
623    # validate rolling average
624    if rolling_average < 1 or rolling_average > aln.length:
625        raise ValueError('rolling_average must be between 1 and length of sequence')
626
627    # generate input data
628    array = stat_functions[stat_type]()
629
630    if stat_type == 'identity':
631        min_value, max_value = -1, 0
632    elif stat_type == 'ts tv score':
633        min_value, max_value = -1, 1
634    else:
635        min_value, max_value = 0, 1
636    if stat_type in ['identity', 'similarity']:
637        # for the mean nan values get handled as the lowest possible number in the matrix
638        array = np.nan_to_num(array, True, min_value)
639        array = np.mean(array, axis=0)
640    data, plot_idx = _moving_average(array, rolling_average, aln.zoom, aln.length)
641
642    # plot the data
643    ax.fill_between(
644        # this add dummy data left and right for better plotting
645        # otherwise only half of the step is shown
646        np.concatenate(([plot_idx[0] - 0.5], plot_idx, [plot_idx[-1] + 0.5])) if rolling_average == 1 else plot_idx,
647        np.concatenate(([data[0]], data, [data[-1]])) if rolling_average == 1 else data,
648        min_value,
649        linewidth = line_width,
650        edgecolor=line_color,
651        step='mid' if rolling_average == 1 else None,
652        facecolor=(line_color, 0.6) if stat_type not in ['ts tv score', 'gc'] else 'none'
653    )
654    if stat_type == 'gc':
655        ax.hlines(0.5, xmin=0, xmax=aln.zoom[0] + aln.length if aln.zoom is not None else aln.length, color='black', linestyles='--', linewidth=1)
656
657    # format axis
658    ax.set_ylim(min_value, max_value*0.1+max_value)
659    ax.set_yticks([min_value, max_value])
660    if stat_type == 'gc':
661        ax.set_yticklabels(['0', '100'])
662    elif stat_type == 'ts tv score':
663        ax.set_yticklabels(['tv', 'ts'])
664    else:
665        ax.set_yticklabels(['low', 'high'])
666
667    # show title
668    if show_title:
669        ax.set_title(
670            f'{stat_type} (average over {rolling_average} positions)' if rolling_average > 1 else f'{stat_type} for each position',
671            loc='left'
672        )
673
674    _format_x_axis(aln, ax, show_x_label, show_left=True)
675
676    return ax

Generate a plot for the various alignment stats.

Parameters
  • aln: alignment MSA class or path
  • ax: matplotlib axes
  • stat_type: 'entropy', 'gc', 'coverage', 'ts/tv', 'identity' or 'similarity' -> (here default matrices are used NT - TRANS, AA - BLOSUM65)
  • line_color: color of the line
  • line_width: width of the line
  • rolling_average: average rolling window size left and right of a position in nucleotides or amino acids
  • show_x_label: whether to show the x-axis label
  • show_title: whether to show the title
def variant_plot( aln: msaexplorer.explore.MSA | str, ax: matplotlib.axes._axes.Axes | None = None, lollisize: tuple[int, int] | list[int, int] = (1, 3), color_scheme: str = 'standard', show_x_label: bool = False, show_legend: bool = True, bbox_to_anchor: tuple[float | int, float | int] | list[float | int, float | int] = (1, 1)):
679def variant_plot(aln: explore.MSA | str, ax: plt.Axes | None = None, lollisize: tuple[int, int] | list[int, int] = (1, 3), color_scheme: str = 'standard', show_x_label: bool = False, show_legend: bool = True, bbox_to_anchor: tuple[float|int, float|int] | list[float|int, float|int] = (1, 1)):
680    """
681    Plots variants.
682    :param aln: alignment MSA class or path
683    :param ax: matplotlib axes
684    :param lollisize: (stem_size, head_size)
685    :param color_scheme: color scheme for characters. see config.CHAR_COLORS for available options
686    :param show_x_label:  whether to show the x-axis label
687    :param show_legend: whether to show the legend
688    :param bbox_to_anchor: bounding box coordinates for the legend - see: https://matplotlib.org/stable/api/legend_api.html
689    """
690
691    # validate input
692    if type(aln) is str:
693        if os.path.exists(aln):
694            aln = explore.MSA(aln)
695        else:
696            raise FileNotFoundError(f'File {aln} does not exist')
697    _validate_input_parameters(aln, ax)
698    if ax is None:
699        ax = plt.gca()
700    if not isinstance(lollisize, tuple) or len(lollisize) != 2:
701        raise ValueError('lollisize must be tuple of length 2 (stem, head)')
702    for _size in lollisize:
703        if not isinstance(_size, float | int) or _size <= 0:
704            raise ValueError('lollisize must be floats greater than zero')
705
706    # define colors
707    colors = config.CHAR_COLORS[aln.aln_type][color_scheme]
708    # get snps
709    snps = aln.get_snps()
710    # define where to plot (each ref type gets a separate line)
711    ref_y_positions, y_pos, detected_var = {}, 0, set()
712
713    # iterate over snp dict
714    for pos in snps['POS']:
715        for identifier in snps['POS'][pos]:
716            # fill in y pos dict
717            if identifier == 'ref':
718                if snps['POS'][pos]['ref'] not in ref_y_positions:
719                    ref_y_positions[snps['POS'][pos]['ref']] = y_pos
720                    y_pos += 1.1
721                    continue
722            # plot
723            if identifier == 'ALT':
724                for alt in snps['POS'][pos]['ALT']:
725                    ax.vlines(x=pos + aln.zoom[0] if aln.zoom is not None else pos,
726                              ymin=ref_y_positions[snps['POS'][pos]['ref']],
727                              ymax=ref_y_positions[snps['POS'][pos]['ref']] + snps['POS'][pos]['ALT'][alt]['AF'],
728                              color=colors[alt],
729                              zorder=100,
730                              linewidth=lollisize[0]
731                              )
732                    ax.plot(pos + aln.zoom[0] if aln.zoom is not None else pos,
733                            ref_y_positions[snps['POS'][pos]['ref']] + snps['POS'][pos]['ALT'][alt]['AF'],
734                            color=colors[alt],
735                            marker='o',
736                            markersize=lollisize[1]
737                            )
738                    detected_var.add(alt)
739
740    # plot hlines
741    for y_char in ref_y_positions:
742        ax.hlines(
743            ref_y_positions[y_char],
744            xmin=aln.zoom[0] - 0.5 if aln.zoom is not None else -0.5,
745            xmax=aln.zoom[0] + aln.length + 0.5 if aln.zoom is not None else aln.length + 0.5,
746            color='black',
747            linestyle='-',
748            zorder=0,
749            linewidth=0.75
750        )
751    # create a custom legend
752    if show_legend:
753        custom_legend = [
754            ax.add_line(
755                plt.Line2D(
756                    [],
757                    [],
758                    color=colors[char],
759                    marker='o',
760                    linestyle='',
761                    markersize=5
762                )
763            ) for char in colors if char in detected_var
764        ]
765        ax.legend(
766            custom_legend,
767            [char for char in colors if char in detected_var],  # ensures correct sorting
768            loc='lower right',
769            title='variant',
770            bbox_to_anchor=bbox_to_anchor,
771            ncols=len(detected_var)/2 if aln.aln_type == 'AA' else len(detected_var),
772            frameon=False
773        )
774
775    # format axis
776    _format_x_axis(aln, ax, show_x_label, show_left=False)
777    ax.spines['left'].set_visible(False)
778    ax.set_yticks([ref_y_positions[x] for x in ref_y_positions])
779    ax.set_yticklabels(ref_y_positions.keys())
780    ax.set_ylim(0, y_pos)
781    ax.set_ylabel('reference')
782
783    return ax

Plots variants.

Parameters
  • aln: alignment MSA class or path
  • ax: matplotlib axes
  • lollisize: (stem_size, head_size)
  • color_scheme: color scheme for characters. see config.CHAR_COLORS for available options
  • show_x_label: whether to show the x-axis label
  • show_legend: whether to show the legend
  • bbox_to_anchor: bounding box coordinates for the legend - see: https: //matplotlib.org/stable/api/legend_api.html
def orf_plot( aln: msaexplorer.explore.MSA | str, ax: matplotlib.axes._axes.Axes | None = None, min_length: int = 500, non_overlapping_orfs: bool = True, cmap: str = 'Blues', direction_marker_size: int | None = 5, show_x_label: bool = False, show_cbar: bool = False, cbar_fraction: float = 0.1):
786def orf_plot(aln: explore.MSA | str, ax: plt.Axes | None = None, min_length: int = 500, non_overlapping_orfs: bool = True, cmap: str = 'Blues', direction_marker_size: int | None = 5, show_x_label: bool = False, show_cbar: bool = False, cbar_fraction: float = 0.1):
787    """
788    Plot conserved ORFs.
789    :param aln: alignment MSA class or path
790    :param ax: matplotlib axes
791    :param min_length: minimum length of orf
792    :param non_overlapping_orfs: whether to consider overlapping orfs
793    :param cmap: color mapping for % identity - see https://matplotlib.org/stable/users/explain/colors/colormaps.html
794    :param direction_marker_size: marker size for direction marker, not shown if marker_size == None
795    :param show_x_label: whether to show the x-axis label
796    :param show_cbar: whether to show the colorbar - see https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.colorbar.html
797    :param cbar_fraction: fraction of the original ax reserved for the colorbar
798    """
799
800    # normalize colorbar
801    cmap = ScalarMappable(norm=Normalize(0, 100), cmap=plt.get_cmap(cmap))
802
803    # validate input
804    if type(aln) is str:
805        if os.path.exists(aln):
806            aln = explore.MSA(aln)
807        else:
808            raise FileNotFoundError(f'File {aln} does not exist')
809    _validate_input_parameters(aln, ax)
810    _validate_input_parameters(aln, ax)
811    if ax is None:
812        ax = plt.gca()
813
814    # get orfs --> first deepcopy and reset zoom that the orfs are also zoomed in (by default, the orfs are only
815    # searched within the zoomed region)
816    aln_temp = deepcopy(aln)
817    aln_temp.zoom = None
818    if non_overlapping_orfs:
819        annotation_dict = aln_temp.get_non_overlapping_conserved_orfs(min_length=min_length)
820    else:
821        annotation_dict = aln_temp.get_conserved_orfs(min_length=min_length)
822
823    # filter dict for zoom
824    if aln.zoom is not None:
825        annotation_dict = {key:val for key, val in annotation_dict.items() if max(val['location'][0][0], aln.zoom[0]) <= min(val['location'][0][1], aln.zoom[1])}
826
827    # add track for plotting
828    _add_track_positions(annotation_dict)
829
830    # plot
831    _plot_annotation(annotation_dict, ax, direction_marker_size=direction_marker_size, color=cmap)
832
833    # legend
834    if show_cbar:
835        cbar = plt.colorbar(cmap,ax=ax, location= 'top', orientation='horizontal', anchor=(1,0), shrink=0.2, pad=2/ax.bbox.height, fraction=cbar_fraction)
836        cbar.set_label('% identity')
837        cbar.set_ticks([0, 100])
838
839    # format axis
840    _format_x_axis(aln, ax, show_x_label, show_left=False)
841    ax.set_ylim(bottom=0.8)
842    ax.set_yticks([])
843    ax.set_yticklabels([])
844    ax.set_title('conserved orfs', loc='left')
845
846    return ax

Plot conserved ORFs.

Parameters
  • aln: alignment MSA class or path
  • ax: matplotlib axes
  • min_length: minimum length of orf
  • non_overlapping_orfs: whether to consider overlapping orfs
  • cmap: color mapping for % identity - see https: //matplotlib.org/stable/users/explain/colors/colormaps.html
  • direction_marker_size: marker size for direction marker, not shown if marker_size == None
  • show_x_label: whether to show the x-axis label
  • show_cbar: whether to show the colorbar - see https: //matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.colorbar.html
  • cbar_fraction: fraction of the original ax reserved for the colorbar
def annotation_plot( aln: msaexplorer.explore.MSA | str, annotation: msaexplorer.explore.Annotation | str, feature_to_plot: str, ax: matplotlib.axes._axes.Axes | None = None, color: str = 'wheat', direction_marker_size: int | None = 5, show_x_label: bool = False):
849def annotation_plot(aln: explore.MSA | str, annotation: explore.Annotation | str, feature_to_plot: str, ax: plt.Axes | None = None, color: str = 'wheat', direction_marker_size: int | None = 5, show_x_label: bool = False):
850    """
851    Plot annotations from bed, gff or gb files. Are automatically mapped to alignment.
852    :param aln: alignment MSA class
853    :param annotation: annotation class | path to annotation file
854    :param ax: matplotlib axes
855    :param feature_to_plot: potential feature to plot (not for bed files as it is parsed as one feature)
856    :param color: color for the annotation
857    :param direction_marker_size: marker size for direction marker, only relevant if show_direction is True
858    :param show_x_label: whether to show the x-axis label
859    """
860    # helper function
861    def parse_annotation_from_string(path: str, msa: explore.MSA) -> explore.Annotation:
862        """
863        Parse annotation.
864        :param path: path to annotation
865        :param msa: msa object
866        :return: parsed annotation
867        """
868        if os.path.exists(path):
869            # reset zoom so the annotation is correctly parsed
870            msa_temp = deepcopy(msa)
871            msa_temp.zoom = None
872            return explore.Annotation(msa_temp, path)
873        else:
874            raise FileNotFoundError()
875
876    # validate input
877    if type(aln) is str:
878        if os.path.exists(aln):
879            aln = explore.MSA(aln)
880        else:
881            raise FileNotFoundError(f'File {aln} does not exist')
882    if type(annotation) is str:
883        annotation = parse_annotation_from_string(annotation, aln)
884    _validate_input_parameters(aln, ax, annotation)
885    if ax is None:
886        ax = plt.gca()
887    if not is_color_like(color):
888        raise ValueError(f'{color} for reference is not a color')
889
890    # ignore features to plot for bed files (here it is written into one feature)
891    if annotation.ann_type == 'bed':
892        annotation_dict = annotation.features['region']
893        feature_to_plot = 'bed regions'
894    else:
895        # try to subset the annotation dict
896        try:
897            annotation_dict = annotation.features[feature_to_plot]
898        except KeyError:
899            raise KeyError(f'Feature {feature_to_plot} not found. Use annotation.features.keys() to see available features.')
900
901    # plotting and formating
902    _add_track_positions(annotation_dict)
903    _plot_annotation(annotation_dict, ax, direction_marker_size=direction_marker_size, color=color)
904    _format_x_axis(aln, ax, show_x_label, show_left=False)
905    ax.set_ylim(bottom=0.8)
906    ax.set_yticks([])
907    ax.set_yticklabels([])
908    ax.set_title(f'{annotation.locus} ({feature_to_plot})', loc='left')
909
910    return ax

Plot annotations from bed, gff or gb files. Are automatically mapped to alignment.

Parameters
  • aln: alignment MSA class
  • annotation: annotation class | path to annotation file
  • ax: matplotlib axes
  • feature_to_plot: potential feature to plot (not for bed files as it is parsed as one feature)
  • color: color for the annotation
  • direction_marker_size: marker size for direction marker, only relevant if show_direction is True
  • show_x_label: whether to show the x-axis label