msaexplorer.draw

The draw module

The draw module lets you draw alignments and statistic plots such as SNPs, ORFs, entropy and much more. For each plot a matplotlib axes has to be passed to the plotting function.

Importantly some of the plotting features can only be accessed for nucleotide alignments but not for amino acid alignments. The functions will raise the appropriate exception in such a case.

Functions

   1r"""
   2# The draw module
   3
   4The draw module lets you draw alignments and statistic plots such as SNPs, ORFs, entropy and much more. For each plot a
   5`matplotlib axes` has to be passed to the plotting function.
   6
   7Importantly some of the plotting features can only be accessed for nucleotide alignments but not for amino acid alignments.
   8The functions will raise the appropriate exception in such a case.
   9
  10## Functions
  11
  12"""
  13# built-in
  14from itertools import chain
  15from typing import Callable, Dict
  16from copy import deepcopy
  17import os
  18
  19# MSAexplorer
  20from msaexplorer import explore, config
  21from msaexplorer._data_classes import AlignmentStats
  22from msaexplorer._helpers import _validate_color, _get_contrast_text_color
  23
  24# libs
  25import numpy as np
  26from numpy import ndarray
  27import matplotlib.pyplot as plt
  28import matplotlib.patches as patches
  29from matplotlib.cm import ScalarMappable
  30from matplotlib.colors import Normalize, to_rgba, LinearSegmentedColormap
  31from matplotlib.collections import PatchCollection, PolyCollection
  32from matplotlib.text import TextPath
  33from matplotlib.patches import PathPatch
  34from matplotlib.font_manager import FontProperties
  35from matplotlib.transforms import Affine2D
  36
  37
  38def _validate_input_parameters(aln: explore.MSA | str, ax: plt.Axes, annotation: explore.Annotation | str | None = None) \
  39        -> tuple[explore.MSA, plt.Axes, explore.Annotation] | tuple[explore.MSA, plt.Axes]:
  40    """
  41    Validate MSA class and axis.
  42    """
  43    # check if alignment is path
  44    if type(aln) is str:
  45        if os.path.exists(aln):
  46            aln = explore.MSA(aln)
  47        else:
  48            raise FileNotFoundError(f'File {aln} does not exist')
  49    # check if alignment is correct type
  50    if not isinstance(aln, explore.MSA):
  51        raise ValueError('alignment has to be an MSA class. use explore.MSA() to read in alignment')
  52    # check if a axis was created
  53    # if ax not provided generate one from scratch
  54    if ax is None:
  55        ax = plt.gca()
  56    elif not isinstance(ax, plt.Axes):
  57            raise ValueError('ax has to be an matplotlib axis')
  58    # check if annotation is correct type
  59    if annotation is not None:
  60        if type(annotation) is str:
  61            if os.path.exists(annotation):
  62                # reset zoom so the annotation is correctly parsed
  63                msa_temp = deepcopy(aln)
  64                msa_temp.zoom = None
  65                annotation = explore.Annotation(msa_temp, annotation)
  66            else:
  67                raise FileNotFoundError()
  68        if not isinstance(annotation, explore.Annotation):
  69            raise ValueError('annotation has to be an annotation class. use explore.Annotation() to read in annotation')
  70
  71    if annotation is None:
  72        return aln, ax
  73    else:
  74        return aln, ax, annotation
  75
  76
  77def _validate_color_scheme(scheme: str | None, aln: explore.MSA):
  78    """
  79    validates colorscheme
  80    """
  81    if scheme is not None:
  82        if scheme not in config.CHAR_COLORS[aln.aln_type].keys():
  83            raise ValueError(f'Scheme not supported. Supported: {config.CHAR_COLORS[aln.aln_type].keys()}')
  84
  85
  86def _create_color_dictionary(aln: explore.MSA, color_scheme: str, identical_char_color: str | None = None, different_char_color: str | None = None, mask_color: str | None = None, ambiguity_color: str | None = None):
  87    """
  88    create the colorscheme dictionary -> this functions adds the respective colors to a dictionary. Some have fixed values and colors of defined
  89    schemes are always the idx + 1
  90    """
  91    # basic color mapping
  92    aln_colors = {}
  93
  94    if identical_char_color is not None:
  95        aln_colors[0] = {'type': 'identical', 'color': identical_char_color}
  96    if different_char_color is not None:
  97        aln_colors[-1] = {'type': 'different', 'color': different_char_color}
  98    if mask_color is not None:
  99        aln_colors[-2] = {'type': 'mask', 'color': mask_color}
 100    if ambiguity_color is not None:
 101        aln_colors[-3] = {'type': 'ambiguity', 'color': ambiguity_color}
 102
 103    # use the standard setting for the index (same as in aln.calc_identity_alignment)
 104    # and map the corresponding color scheme to it
 105    if color_scheme is not None:
 106        for idx, char in enumerate(config.CHAR_COLORS[aln.aln_type]['standard']):
 107            aln_colors[idx + 1] = {'type': char, 'color': config.CHAR_COLORS[aln.aln_type][color_scheme][char]}
 108
 109    return aln_colors
 110
 111
 112def _format_x_axis(aln: explore.MSA, ax: plt.Axes, show_x_label: bool, show_left: bool):
 113    """
 114    General x axis formatting.
 115    """
 116    ax.set_xlim(
 117        (aln.zoom[0] - 0.5, aln.zoom[0] + aln.length - 0.5) if aln.zoom is not None else (-0.5, aln.length - 0.5)
 118    )
 119    ax.spines['top'].set_visible(False)
 120    ax.spines['right'].set_visible(False)
 121    if show_x_label:
 122        ax.set_xlabel('alignment position')
 123    if not show_left:
 124        ax.spines['left'].set_visible(False)
 125
 126
 127def _seq_names(aln: explore.MSA, ax: plt.Axes, custom_seq_names: tuple, show_seq_names: bool, include_consensus: bool = False):
 128    """
 129    Validate custom names and set show names to True. Format axis accordingly.
 130    """
 131    if custom_seq_names:
 132        show_seq_names = True
 133        if not isinstance(custom_seq_names, tuple):
 134            raise ValueError('configure your custom names list: custom_names=(name1, name2...)')
 135        if len(custom_seq_names) != len(aln):
 136            raise ValueError('length of sequences not equal to number of custom names')
 137    if show_seq_names:
 138        ax.yaxis.set_ticks_position('none')
 139        ax.set_yticks(np.arange(len(aln)))
 140        if custom_seq_names:
 141            names = custom_seq_names[::-1]
 142        else:
 143            names = [x.split(' ')[0] for x in aln.sequence_ids[::-1]]
 144        if include_consensus:
 145            names = names + ['consensus']
 146            y_ticks = np.arange(len(aln) + 1)
 147        else:
 148            y_ticks = np.arange(len(aln))
 149        ax.set_yticks(y_ticks)
 150        ax.set_yticklabels(names)
 151    else:
 152        ax.set_yticks([])
 153
 154
 155def _create_legend(color_scheme: str, aln_colors: dict, aln: explore.MSA, detected_identity_values: set, ax: plt.Axes, bbox_to_anchor: tuple | list):
 156    """
 157    create the legend for the alignment and identity alignment plot
 158    """
 159    if color_scheme is not None and color_scheme != 'standard':
 160        for x in aln_colors:
 161            for group in config.CHAR_GROUPS[aln.aln_type][color_scheme]:
 162                if aln_colors[x]['type'] in config.CHAR_GROUPS[aln.aln_type][color_scheme][group]:
 163                    aln_colors[x]['type'] = group
 164                    break
 165    # create it
 166    handels, labels, detected_groups = [], [], set()
 167    for x in aln_colors:
 168        if x in detected_identity_values and aln_colors[x]['type'] not in detected_groups:
 169            handels.append(
 170                ax.add_line(
 171                    plt.Line2D(
 172                        [],
 173                        [],
 174                        color=aln_colors[x]['color'] if color_scheme != 'hydrophobicity' or x == 0 else
 175                        config.CHAR_COLORS[aln.aln_type]['hydrophobicity'][
 176                            config.CHAR_GROUPS[aln.aln_type]['hydrophobicity'][aln_colors[x]['type']][0]
 177                        ],
 178                        marker='s',
 179                        markeredgecolor='grey',
 180                        linestyle='',
 181                        markersize=10))
 182            )
 183            labels.append(aln_colors[x]['type'])
 184            detected_groups.add(aln_colors[x]['type'])
 185
 186    # ncols
 187    if color_scheme is None or aln.aln_type != 'AA':
 188        ncols = len(detected_identity_values)
 189    elif color_scheme == 'standard':
 190        ncols = (len(detected_identity_values) + 1) / 2
 191    else:
 192        ncols = (len(detected_groups) + 1) / 2
 193
 194    # plot it
 195    ax.legend(
 196        handels,
 197        labels,
 198        loc='lower right',
 199        bbox_to_anchor=bbox_to_anchor,
 200        ncols=ncols,
 201        frameon=False
 202    )
 203
 204
 205def _create_alignment(aln: explore.MSA, ax: plt.Axes, matrix: ndarray, aln_colors: dict | ScalarMappable, fancy_gaps: bool, create_identity_patch: bool,
 206                      show_gaps: bool, show_different_sequence: bool, show_sequence_all: bool, reference_color: str | None, values_to_plot: list,
 207                      identical_value: int | float = 0):
 208    """
 209    create the polygon patch collection for an alignment
 210    """
 211
 212    # helper functions
 213    def _create_identity_patch(row, aln: explore.MSA, col: list, zoom: tuple[int, int], y_position: float | int,
 214                               reference_color: str | None, seq_name: str, identity_color: str | ndarray,
 215                               show_gaps: bool):
 216        """
 217        Creates the initial patch and adds it to the collection.
 218        """
 219
 220        color = reference_color if seq_name == aln.reference_id and reference_color is not None else identity_color
 221
 222        if show_gaps:
 223            # plot a rectangle for parts that do not have gaps
 224            for stretch in _find_stretches(row, True):
 225                col.append(
 226                    patches.Rectangle(
 227                        (stretch[0] + zoom[0] - 0.5, y_position), stretch[1] - stretch[0] + 1, 0.8,
 228                        facecolor=color,
 229                        edgecolor=color,
 230                        linewidth=0.5
 231                    )
 232                )
 233        # just plot a rectangle
 234        else:
 235            col.append(
 236                patches.Rectangle(
 237                    (zoom[0] - 0.5, y_position), zoom[1] - zoom[0], 0.8,
 238                    facecolor=color,
 239                    edgecolor=color,
 240                    linewidth=0.5
 241                )
 242            )
 243
 244        return col
 245
 246    def _find_stretches(row, non_nan_only=False) -> list[tuple[int, int, int]] | list[tuple[int, int]]:
 247        """
 248        Finds consecutive stretches of values in an array, with an option to exclude NaN stretches and return start, end, value at start
 249        """
 250        if row.size == 0:
 251            return []
 252
 253        if non_nan_only:
 254            # Create a boolean mask for non-NaN values
 255            non_nan_mask = ~np.isnan(row)
 256            # Find changes in the mask
 257            changes = np.diff(non_nan_mask.astype(int)) != 0
 258            change_idx = np.nonzero(changes)[0]
 259            starts = np.concatenate(([0], change_idx + 1))
 260            ends = np.concatenate((change_idx, [len(row) - 1]))
 261
 262            # Return only stretches that start with non-NaN values
 263            return [(start, end) for start, end in zip(starts, ends) if non_nan_mask[start]]
 264
 265        else:
 266            # Find change points: where adjacent cells differ.
 267            changes = np.diff(row) != 0
 268            change_idx = np.nonzero(changes)[0]
 269            starts = np.concatenate(([0], change_idx + 1))
 270            ends = np.concatenate((change_idx, [len(row) - 1]))
 271
 272            return [(start, end, row[start]) for start, end in zip(starts, ends)]
 273
 274    def _create_polygons(stretches: list, values_to_plot: list | ndarray, zoom: tuple, y_position: int | float,
 275                         polygons: list, aln_colors: dict | ScalarMappable, polygon_colors: list,
 276                         detected_identity_values: set | None = None):
 277        """
 278        create the individual polygons for the heatmap (do not plot each cell but pre-compute if cells are the same and adjacent to each other)
 279        """
 280
 281        for start, end, value in stretches:
 282            # check if this is a value for which we might want to create a polygon
 283            # e.g. we might not want to create a polygon for identical values
 284            if value not in values_to_plot:
 285                continue
 286            # add values to this set - > this is important for correct legend creation
 287            if detected_identity_values is not None:
 288                detected_identity_values.add(value)
 289            width = end + 1 - start
 290            # Calculate x coordinates adjusted for zoom and centering
 291            x0 = start + zoom[0] - 0.5
 292            x1 = x0 + width
 293            # Define the rectangle corners
 294            rect_coords = [
 295                (x0, y_position),
 296                (x1, y_position),
 297                (x1, y_position + 0.8),
 298                (x0, y_position + 0.8),
 299                (x0, y_position)
 300            ]
 301            polygons.append(rect_coords)
 302            if type(aln_colors) != ScalarMappable:
 303                polygon_colors.append(aln_colors[value]['color'])
 304            else:
 305                polygon_colors.append(aln_colors.to_rgba(value))
 306
 307        return detected_identity_values, polygons, polygon_colors
 308
 309    def _plot_sequence_text(aln: explore.MSA, seq_name: str, ref_name: str | None, always_text: bool, values: ndarray,
 310                            matrix: ndarray, ax: plt.Axes, zoom: tuple, y_position: int | float, value_to_skip: int | None,
 311                            ref_color: str, show_gaps: bool, aln_colors: dict | ScalarMappable = None):
 312        """
 313        Plot sequence text - however this will be done even if there is not enough space.
 314        """
 315        x_text = 0
 316        if seq_name == ref_name and ref_color is not None:
 317            different_cols = np.any((matrix != value_to_skip) & ~np.isnan(matrix), axis=0)
 318        else:
 319            different_cols = [False] * aln.length
 320
 321        for idx, (character, value) in enumerate(zip(aln[seq_name], values)):
 322            if value != value_to_skip and character != '-' or seq_name == ref_name and character != '-' or character == '-' and not show_gaps or always_text and character != '-':
 323                if seq_name == ref_name and ref_color is not None:
 324                    text_color = _get_contrast_text_color(to_rgba(ref_color))
 325                elif type(aln_colors) is ScalarMappable:
 326                    text_color = _get_contrast_text_color(aln_colors.to_rgba(value))
 327                else:
 328                    text_color = _get_contrast_text_color(to_rgba(aln_colors[value]['color']))
 329
 330                ax.text(
 331                    x=x_text + zoom[0] if zoom is not None else x_text,
 332                    y=y_position + 0.4,
 333                    s=character,
 334                    fontweight='bold' if different_cols[idx] else 'normal',
 335                    ha='center',
 336                    va='center_baseline',
 337                    c=text_color if value != value_to_skip or seq_name == ref_name and ref_color is not None else 'dimgrey'
 338                )
 339            x_text += 1
 340
 341    # List to store polygons
 342    detected_identity_values = {0}
 343    polygons, polygon_colors, patch_list = [], [], []
 344    # determine zoom
 345    zoom = (0, aln.length) if aln.zoom is None else aln.zoom
 346    for i, seq_name in enumerate(aln):
 347        # define initial y position
 348        y_position = len(aln) - i - 1.4
 349        # now plot relevant stuff for the current row
 350        row = matrix[i]
 351        # plot a line below everything for fancy gaps
 352        if fancy_gaps:
 353            ax.hlines(
 354                y_position + 0.4,
 355                xmin=zoom[0] - 0.5,
 356                xmax=zoom[1] + 0.5,
 357                color='black',
 358                linestyle='-',
 359                zorder=0,
 360                linewidth=0.75
 361            )
 362        # plot the basic shape per sequence with gaps
 363        if create_identity_patch:
 364            if type(aln_colors) == dict:
 365                identity_color = aln_colors[0]['color']
 366            else:
 367                identity_color = aln_colors.to_rgba(1)  # for similarity alignment
 368            patch_list = _create_identity_patch(row, aln, patch_list, zoom, y_position, reference_color, seq_name, identity_color, show_gaps)
 369        # find consecutive stretches
 370        stretches = _find_stretches(row)
 371        # create polygons per stretch
 372        detected_identity_values, polygons, polygon_colors = _create_polygons(
 373            stretches=stretches, values_to_plot=values_to_plot, zoom=zoom, y_position=y_position, polygons=polygons,
 374            aln_colors=aln_colors, polygon_colors=polygon_colors, detected_identity_values=detected_identity_values
 375        )
 376        # add sequence text
 377        if show_different_sequence or show_sequence_all:
 378            _plot_sequence_text(
 379                aln=aln, seq_name=seq_name, ref_name=aln.reference_id, always_text=show_sequence_all,
 380                values=matrix[i], matrix=matrix, ax=ax, zoom=zoom, y_position=y_position, value_to_skip=identical_value, ref_color=reference_color,
 381                show_gaps=show_gaps, aln_colors=aln_colors
 382            )
 383    # Create the LineCollection: each segment is drawn in a single call.
 384    ax.add_collection(PatchCollection(patch_list, match_original=True, linewidths='none', joinstyle='miter', capstyle='butt'))
 385    # NOTE: linewidth are important so in large overview plot the differences are still visible
 386    ax.add_collection(PolyCollection(polygons, facecolors=polygon_colors, linewidths=0.5, edgecolors=polygon_colors))
 387
 388
 389    return detected_identity_values
 390
 391
 392def alignment(aln: explore.MSA | str, ax: plt.Axes | None = None, show_sequence_all: bool = False, show_seq_names: bool = False,
 393              custom_seq_names: tuple | list = (), mask_color: str = 'dimgrey', ambiguity_color: str = 'black', basic_color: str = 'lightgrey',
 394              show_mask:bool = True, fancy_gaps:bool = False, show_ambiguities: bool = False, color_scheme: str | None = 'standard',
 395              show_x_label: bool = True, show_legend: bool = False, bbox_to_anchor: tuple[float|int, float|int] | list= (1, 1),
 396              show_consensus: bool = False) -> plt.Axes:
 397    """
 398
 399    Plot an alignment with each character colored as defined in the scheme. This is computationally more intensive as 
 400    the identity alignments and similarity alignment function as each square for each character is individually plotted.
 401
 402    :param aln: alignment MSA class or path
 403    :param ax: matplotlib axes
 404    :param show_seq_names: whether to show seq names
 405    :param show_sequence_all: whether to show all sequences - zoom in to avoid plotting issues
 406    :param custom_seq_names: custom seq names
 407    :param mask_color: color for masked nucleotides/aminoacids
 408    :param ambiguity_color: color for ambiguous nucleotides/aminoacids
 409    :param basic_color: color that will be used for all normal chars if the colorscheme is None
 410    :param show_mask: whether to show N or X chars otherwise it will be shown as match or mismatch
 411    :param fancy_gaps: show gaps with a small black bar
 412    :param show_ambiguities: whether to show non-N ambiguities -> only relevant for RNA/DNA sequences
 413    :param color_scheme: color mismatching chars with their unique color. Options for DNA/RNA are: standard, purine_pyrimidine, strong_weak; and for AS: standard, clustal, zappo, hydrophobicity. Will overwrite different_char_color.
 414    :param show_x_label: whether to show x label
 415    :param show_legend: whether to show the legend
 416    :param bbox_to_anchor: bounding box coordinates for the legend - see: https://matplotlib.org/stable/api/legend_api.html
 417    :param show_consensus: whether to show the majority consensus sequence (standard-color scheme)
 418    :return  matplotlib axis
 419    """
 420    # Validate aln, ax inputs
 421    aln, ax = _validate_input_parameters(aln=aln, ax=ax)
 422    # Validate colors
 423    for c in [mask_color, ambiguity_color, basic_color]:
 424        _validate_color(c)
 425    # Validate color scheme
 426    _validate_color_scheme(scheme=color_scheme, aln=aln)
 427    # create color mapping
 428    aln_colors = _create_color_dictionary(
 429        aln=aln, color_scheme=color_scheme, identical_char_color=basic_color,
 430        different_char_color=None, mask_color=mask_color, ambiguity_color=ambiguity_color
 431    )
 432    # Compute identity alignment
 433    numerical_alignment = aln.calc_numerical_alignment(encode_ambiguities=show_ambiguities, encode_mask=show_mask)
 434    if color_scheme is None:
 435        numerical_alignment[np.where(numerical_alignment > 0)] = 0
 436    # create the alignment
 437    detected_identity_values = _create_alignment(
 438        aln=aln, ax=ax, matrix=numerical_alignment, fancy_gaps=fancy_gaps, create_identity_patch=True if color_scheme is None else False, show_gaps=True,
 439        show_different_sequence=False, show_sequence_all=show_sequence_all, reference_color=None, aln_colors=aln_colors,
 440        values_to_plot = list(aln_colors.keys())
 441    )
 442    # custom legend
 443    if show_legend:
 444        aln_colors.pop(0)  # otherwise legend is wrong (here there is no check if a position is identical or not)
 445        _create_legend(
 446            color_scheme=color_scheme, aln_colors=aln_colors, aln=aln,
 447            detected_identity_values=detected_identity_values,
 448            ax=ax, bbox_to_anchor=bbox_to_anchor
 449        )
 450    if show_consensus:
 451        consensus_plot(aln=aln, ax=ax, show_x_label=show_x_label, show_name=False, show_sequence=show_sequence_all,
 452            color_scheme='standard', basic_color=basic_color, mask_color=mask_color, ambiguity_color=ambiguity_color
 453        )
 454        ax.set_ylim(-0.5, len(aln) + 1)
 455    else:
 456        ax.set_ylim(-0.5, len(aln))
 457
 458    _seq_names(aln=aln, ax=ax, custom_seq_names=custom_seq_names, show_seq_names=show_seq_names, include_consensus=show_consensus)
 459    # configure axis
 460    _format_x_axis(aln=aln, ax=ax, show_x_label=show_x_label, show_left=False)
 461
 462    return ax
 463
 464
 465
 466def identity_alignment(aln: explore.MSA | str, ax: plt.Axes | None = None, show_title: bool = True, show_identity_sequence: bool = False,
 467                       show_sequence_all: bool = False, show_seq_names: bool = False, custom_seq_names: tuple | list = (),
 468                       reference_color: str = 'lightsteelblue', basic_color: str = 'lightgrey', different_char_color: str = 'peru',
 469                       mask_color: str = 'dimgrey', ambiguity_color: str = 'black', show_mask:bool = True, show_gaps:bool = True,
 470                       fancy_gaps:bool = False, show_mismatches: bool = True, show_ambiguities: bool = False,
 471                       color_scheme: str | None = None, show_x_label: bool = True, show_legend: bool = False,
 472                       bbox_to_anchor: tuple[float|int, float|int] | list = (1, 1), show_consensus: bool = False) -> plt.Axes:
 473    """
 474    Generates an identity alignment plot.
 475    :param aln: alignment MSA class or path
 476    :param ax: matplotlib axes
 477    :param show_title: whether to show title
 478    :param show_seq_names: whether to show seq names
 479    :param show_identity_sequence: whether to show sequence for only differences and reference - zoom in to avoid plotting issues
 480    :param show_sequence_all: whether to show all sequences - zoom in to avoid plotting issues
 481    :param custom_seq_names: custom seq names
 482    :param reference_color: color of reference sequence
 483    :param basic_color: color for identical nucleotides/aminoacids
 484    :param different_char_color: color for different nucleotides/aminoacids
 485    :param mask_color: color for masked nucleotides/aminoacids
 486    :param ambiguity_color: color for ambiguous nucleotides/aminoacids
 487    :param show_mask: whether to show N or X chars otherwise it will be shown as match or mismatch
 488    :param show_gaps: whether to show gaps otherwise it will be shown as match or mismatch
 489    :param fancy_gaps: show gaps with a small black bar
 490    :param show_mismatches: whether to show mismatches otherwise it will be shown as match
 491    :param show_ambiguities: whether to show non-N ambiguities -> only relevant for RNA/DNA sequences
 492    :param color_scheme: color mismatching chars with their unique color. Options for DNA/RNA are: standard, purine_pyrimidine, strong_weak; and for AS: standard, clustal, zappo, hydrophobicity. Will overwrite different_char_color.
 493    :param show_x_label: whether to show x label
 494    :param show_legend: whether to show the legend
 495    :param bbox_to_anchor: bounding box coordinates for the legend - see: https://matplotlib.org/stable/api/legend_api.html
 496    :param show_consensus: whether to show the majority consensus sequence (standard-color scheme)
 497
 498    :return  matplotlib axis
 499    """
 500
 501    # Both options for gaps work hand in hand
 502    if fancy_gaps:
 503        show_gaps = True
 504    # Validate aln, ax inputs
 505    aln, ax = _validate_input_parameters(aln=aln, ax=ax)
 506    # Validate colors
 507    for c in [reference_color, basic_color, different_char_color, mask_color, ambiguity_color]:
 508        _validate_color(c)
 509    # Validate color scheme
 510    _validate_color_scheme(scheme=color_scheme, aln=aln)
 511    # create color mapping
 512    aln_colors = _create_color_dictionary(
 513        aln=aln, color_scheme=color_scheme, identical_char_color=basic_color, different_char_color=different_char_color,
 514        mask_color=mask_color, ambiguity_color=ambiguity_color
 515    )
 516    # Compute identity alignment
 517    identity_aln = aln.calc_identity_alignment(
 518        encode_mask=show_mask, encode_gaps=show_gaps, encode_mismatches=show_mismatches,encode_ambiguities=show_ambiguities,
 519        encode_each_mismatch_char=True if color_scheme is not None else False
 520    )
 521    # create the alignment
 522    detected_identity_values = _create_alignment(
 523        aln=aln, ax=ax, matrix=identity_aln, fancy_gaps=fancy_gaps, create_identity_patch=True, show_gaps=show_gaps,
 524        show_different_sequence=show_identity_sequence, show_sequence_all=show_sequence_all, reference_color=reference_color,
 525        aln_colors=aln_colors, values_to_plot=list(aln_colors.keys())[1:]
 526    )
 527    # custom legend
 528    if show_legend:
 529        _create_legend(
 530            color_scheme=color_scheme, aln_colors=aln_colors, aln=aln, detected_identity_values=detected_identity_values,
 531            ax=ax, bbox_to_anchor=bbox_to_anchor
 532        )
 533    _seq_names(aln=aln, ax=ax, custom_seq_names=custom_seq_names, show_seq_names=show_seq_names)
 534    # configure axis
 535    if show_consensus:
 536        consensus_plot(aln=aln, ax=ax, show_x_label=show_x_label, show_name=False, show_sequence=any([show_sequence_all, show_identity_sequence]),
 537                       color_scheme='standard', basic_color=basic_color, mask_color=mask_color,
 538                       ambiguity_color=ambiguity_color
 539                       )
 540        ax.set_ylim(-0.5, len(aln) + 1)
 541    else:
 542        ax.set_ylim(-0.5, len(aln))
 543
 544    _seq_names(aln=aln, ax=ax, custom_seq_names=custom_seq_names, show_seq_names=show_seq_names,
 545               include_consensus=show_consensus)
 546    if show_title:
 547        ax.set_title('identity', loc='left')
 548    _format_x_axis(aln=aln, ax=ax, show_x_label=show_x_label, show_left=False)
 549
 550    return ax
 551
 552
 553def similarity_alignment(aln: explore.MSA | str, ax: plt.Axes | None = None, matrix_type: str | None = None, show_title: bool = True,
 554                         show_similarity_sequence: bool = False, show_sequence_all: bool = False, show_seq_names: bool = False,
 555                         custom_seq_names: tuple | list = (), reference_color: str = 'lightsteelblue', different_char_color: str = 'peru', basic_color: str = 'lightgrey',
 556                         show_gaps:bool = True, fancy_gaps:bool = False, show_x_label: bool = True, show_cbar: bool = False,
 557                         cbar_fraction: float = 0.1, show_consensus: bool = False)  -> plt.Axes:
 558    """
 559    Generates a similarity alignment plot. Importantly the similarity values are normalized!
 560    :param aln: alignment MSA class or path
 561    :param ax: matplotlib axes
 562    :param matrix_type: substitution matrix - see config.SUBS_MATRICES, standard: NT - TRANS, AA - BLOSUM65
 563    :param show_title: whether to show title
 564    :param show_similarity_sequence: whether to show sequence only for differences and reference - zoom in to avoid plotting issues
 565    :param show_sequence_all: whether to show all sequences - zoom in to avoid plotting issues
 566    :param show_seq_names: whether to show seq names
 567    :param custom_seq_names: custom seq names
 568    :param reference_color: color of reference sequence
 569    :param different_char_color: color for the lowest similarity
 570    :param basic_color: basic color for the highest similarity
 571    :param show_gaps: whether to show gaps otherwise it will be ignored
 572    :param fancy_gaps: show gaps with a small black bar
 573    :param show_x_label: whether to show x label
 574    :param show_cbar: whether to show the legend - see https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.colorbar.html
 575    :param cbar_fraction: fraction of the original ax reserved for the legend
 576    :param show_consensus: whether to show the majority consensus sequence (standard color scheme, no specical handling for special characters)
 577    :return  matplotlib axis
 578    """
 579    # Both options for gaps work hand in hand
 580    if fancy_gaps:
 581        show_gaps = True
 582    # Validate aln, ax inputs
 583    aln, ax = _validate_input_parameters(aln=aln, ax=ax)
 584    # Validate colors
 585    for c in [reference_color, different_char_color, basic_color]:
 586        _validate_color(c)
 587    # Compute similarity alignment
 588    similarity_aln = aln.calc_similarity_alignment(matrix_type=matrix_type)  # use normalized values here
 589    similarity_aln = similarity_aln.round(2)  # round data for good color mapping
 590    # create cmap
 591    cmap = LinearSegmentedColormap.from_list(
 592        "extended",
 593        [
 594            (0.0, different_char_color),
 595            (1.0, basic_color)
 596        ],
 597    )
 598    cmap = ScalarMappable(norm=Normalize(vmin=0, vmax=1), cmap=cmap)
 599    # create similarity values
 600    similarity_values = np.arange(start=0, stop=1, step=0.01)
 601    # round it to be absolutely sure that values match with rounded sim alignment
 602    similarity_values = list(similarity_values.round(2))
 603    # create the alignment
 604    _create_alignment(
 605        aln=aln, ax=ax, matrix=similarity_aln, fancy_gaps=fancy_gaps, create_identity_patch=True, show_gaps=show_gaps,
 606        show_different_sequence=show_similarity_sequence, show_sequence_all=show_sequence_all, reference_color=reference_color,
 607        aln_colors=cmap, values_to_plot=similarity_values, identical_value=1
 608    )
 609    # legend
 610    if show_cbar:
 611        cbar = plt.colorbar(cmap, ax=ax, location= 'top', anchor=(1,0), shrink=0.2, pad=2/ax.bbox.height, fraction=cbar_fraction)
 612        cbar.set_ticks([0, 1])
 613        cbar.set_ticklabels(['low', 'high'])
 614
 615    # format seq names
 616    _seq_names(aln, ax, custom_seq_names, show_seq_names)
 617
 618    # configure axis
 619    if show_consensus:
 620        consensus_plot(aln=aln, ax=ax, show_x_label=show_x_label, show_name=False, show_sequence=any([show_sequence_all, show_similarity_sequence]),
 621                       color_scheme='standard', basic_color=basic_color)
 622        ax.set_ylim(-0.5, len(aln) + 1)
 623    else:
 624        ax.set_ylim(-0.5, len(aln))
 625
 626    _seq_names(aln=aln, ax=ax, custom_seq_names=custom_seq_names, show_seq_names=show_seq_names,
 627               include_consensus=show_consensus)
 628    if show_title:
 629        ax.set_title('similarity', loc='left')
 630    _format_x_axis(aln, ax, show_x_label, show_left=False)
 631
 632    return ax
 633
 634
 635def _moving_average(arr: ndarray, window_size: int, zoom: tuple | None, aln_length: int) -> tuple[ndarray, ndarray]:
 636    """
 637    Calculate the moving average of an array.
 638    :param arr: array with values
 639    :param window_size: size of the moving average
 640    :param zoom: zoom of the alignment
 641    :param aln_length: length of the alignment
 642    :return: new array with moving average
 643    """
 644    if window_size > 1:
 645        i = 0
 646        moving_averages, plotting_idx = [], []
 647        while i < len(arr) + 1:
 648            half_window_size = window_size // 2
 649            window_left = arr[i - half_window_size : i] if i > half_window_size else arr[0:i]
 650            window_right = arr[i: i + half_window_size] if i < len(arr) - half_window_size else arr[i: len(arr)]
 651            moving_averages.append((sum(window_left) + sum(window_right)) / (len(window_left) + len(window_right)))
 652            plotting_idx.append(i)
 653            i += 1
 654
 655        return np.array(moving_averages), np.array(plotting_idx) if zoom is None else np.array(plotting_idx) + zoom[0]
 656    else:
 657        return arr, np.arange(zoom[0], zoom[1]) if zoom is not None else np.arange(aln_length)
 658
 659
 660def stat_plot(aln: explore.MSA | str, stat_type: str, ax: plt.Axes | None = None, line_color: str = 'burlywood',
 661              line_width: int | float = 2, rolling_average: int = 20, show_x_label: bool = False, show_title: bool = True) -> plt.Axes:
 662    """
 663    Generate a plot for the various alignment stats.
 664    :param aln: alignment MSA class or path
 665    :param ax: matplotlib axes
 666    :param stat_type: 'entropy', 'gc', 'coverage', 'ts tv score', gap frequency, 'identity' or 'similarity' -> (here default matrices are used NT - TRANS, AA - BLOSUM65)
 667    :param line_color: color of the line
 668    :param line_width: width of the line
 669    :param rolling_average: average rolling window size left and right of a position in nucleotides or amino acids
 670    :param show_x_label: whether to show the x-axis label
 671    :param show_title: whether to show the title
 672    :return matplotlib axes
 673    """
 674
 675    # input check
 676    aln, ax = _validate_input_parameters(aln, ax)
 677
 678    # define possible functions to calc here
 679    stat_functions: Dict[str, Callable[[], AlignmentStats | ndarray]] = {
 680        'gc': aln.calc_gc,
 681        'entropy': aln.calc_entropy,
 682        'coverage': aln.calc_coverage,
 683        'identity': aln.calc_identity_alignment,
 684        'similarity': aln.calc_similarity_alignment,
 685        'ts tv score': aln.calc_transition_transversion_score,
 686        'gap frequency': aln.calc_gap_frequency
 687    }
 688
 689    if stat_type not in stat_functions:
 690        raise ValueError('stat_type must be one of {}'.format(list(stat_functions.keys())))
 691
 692    _validate_color(line_color)
 693    if rolling_average < 1 or rolling_average > aln.length:
 694        raise ValueError('rolling_average must be between 1 and length of sequence')
 695
 696    # generate input data
 697    array = stat_functions[stat_type]()
 698    if isinstance(array, AlignmentStats):
 699        array = array.values
 700    # define possible spans for values
 701    if stat_type == 'identity':
 702        min_value, max_value = -1, 0
 703    elif stat_type == 'ts tv score':
 704        min_value, max_value = -1, 1
 705    else:
 706        min_value, max_value = 0, 1
 707    if stat_type in ['identity', 'similarity']:
 708        # for the mean nan values get handled as the lowest possible number in the matrix
 709        array = np.nan_to_num(array, True, min_value)
 710        array = np.mean(array, axis=0)
 711    data, plot_idx = _moving_average(array, rolling_average, aln.zoom, aln.length)
 712
 713    # plot the data
 714    ax.fill_between(
 715        # this add dummy data left and right for better plotting
 716        # otherwise only half of the step is shown
 717        np.concatenate(([plot_idx[0] - 0.5], plot_idx, [plot_idx[-1] + 0.5])) if rolling_average == 1 else plot_idx,
 718        np.concatenate(([data[0]], data, [data[-1]])) if rolling_average == 1 else data,
 719        min_value,
 720        linewidth = line_width,
 721        edgecolor=line_color,
 722        step='mid' if rolling_average == 1 else None,
 723        facecolor=(line_color, 0.6) if stat_type not in ['ts tv score', 'gc'] else 'none'
 724    )
 725    if stat_type == 'gc':
 726        ax.hlines(0.5, xmin=0, xmax=aln.zoom[0] + aln.length if aln.zoom is not None else aln.length, color='black', linestyles='--', linewidth=1)
 727
 728    # format axis
 729    ax.set_ylim(min_value, max_value*0.1+max_value)
 730    ax.set_yticks([min_value, max_value])
 731    if stat_type == 'gc':
 732        ax.set_yticklabels(['0', '100'])
 733    elif stat_type == 'ts tv score':
 734        ax.set_yticklabels(['tv', 'ts'])
 735    else:
 736        ax.set_yticklabels(['low', 'high'])
 737
 738    # show title
 739    if show_title:
 740        ax.set_title(
 741            f'{stat_type} (average over {rolling_average} positions)' if rolling_average > 1 else f'{stat_type} for each position',
 742            loc='left'
 743        )
 744
 745    _format_x_axis(aln, ax, show_x_label, show_left=True)
 746
 747    return ax
 748
 749
 750def variant_plot(aln: explore.MSA | str, ax: plt.Axes | None = None, lollisize: tuple[int, int] | list = (1, 3),
 751                 color_scheme: str = 'standard', show_x_label: bool = False, show_legend: bool = True,
 752                 bbox_to_anchor: tuple[float|int, float|int] | list = (1, 1)) -> plt.Axes:
 753    """
 754    Plots variants.
 755    :param aln: alignment MSA class or path
 756    :param ax: matplotlib axes
 757    :param lollisize: (stem_size, head_size)
 758    :param color_scheme: color scheme for characters. see config.CHAR_COLORS for available options
 759    :param show_x_label:  whether to show the x-axis label
 760    :param show_legend: whether to show the legend
 761    :param bbox_to_anchor: bounding box coordinates for the legend - see: https://matplotlib.org/stable/api/legend_api.html
 762    :return matplotlib axes
 763    """
 764
 765    # validate input
 766    aln, ax = _validate_input_parameters(aln, ax)
 767    _validate_color_scheme(color_scheme, aln)
 768    if not isinstance(lollisize, tuple) or len(lollisize) != 2:
 769        raise ValueError('lollisize must be tuple of length 2 (stem, head)')
 770    for _size in lollisize:
 771        if not isinstance(_size, float | int) or _size <= 0:
 772            raise ValueError('lollisize must be floats greater than zero')
 773
 774    # define colors
 775    colors = config.CHAR_COLORS[aln.aln_type][color_scheme]
 776    # get snps
 777    snps = aln.get_snps()
 778    # define where to plot (each ref type gets a separate line)
 779    ref_y_positions, y_pos, detected_var = {}, 0, set()
 780
 781    # iterate over SNPs
 782    for pos, snp_pos in snps.positions.items():
 783        if snp_pos.ref not in ref_y_positions:
 784            ref_y_positions[snp_pos.ref] = y_pos
 785            y_pos += 1.1
 786
 787        for alt, (af, _) in snp_pos.alt.items():
 788            x_pos = pos + aln.zoom[0] if aln.zoom is not None else pos
 789            y_ref = ref_y_positions[snp_pos.ref]
 790            ax.vlines(
 791                x=x_pos,
 792                ymin=y_ref,
 793                ymax=y_ref + af,
 794                color=colors[alt],
 795                zorder=100,
 796                linewidth=lollisize[0]
 797            )
 798            ax.plot(
 799                x_pos,
 800                y_ref + af,
 801                color=colors[alt],
 802                marker='o',
 803                markersize=lollisize[1]
 804            )
 805            detected_var.add(alt)
 806
 807    # plot hlines
 808    for y_char in ref_y_positions:
 809        ax.hlines(
 810            ref_y_positions[y_char],
 811            xmin=aln.zoom[0] - 0.5 if aln.zoom is not None else -0.5,
 812            xmax=aln.zoom[0] + aln.length + 0.5 if aln.zoom is not None else aln.length + 0.5,
 813            color='black',
 814            linestyle='-',
 815            zorder=0,
 816            linewidth=0.75
 817        )
 818    # create a custom legend
 819    if show_legend:
 820        custom_legend = [
 821            ax.add_line(
 822                plt.Line2D(
 823                    [],
 824                    [],
 825                    color=colors[char],
 826                    marker='o',
 827                    linestyle='',
 828                    markersize=5
 829                )
 830            ) for char in colors if char in detected_var
 831        ]
 832        ax.legend(
 833            custom_legend,
 834            [char for char in colors if char in detected_var],  # ensures correct sorting
 835            loc='lower right',
 836            title='variant',
 837            bbox_to_anchor=bbox_to_anchor,
 838            ncols=len(detected_var)/2 if aln.aln_type == 'AA' else len(detected_var),
 839            frameon=False
 840        )
 841
 842    # format axis
 843    _format_x_axis(aln, ax, show_x_label, show_left=False)
 844    ax.spines['left'].set_visible(False)
 845    ax.set_yticks([ref_y_positions[x] for x in ref_y_positions])
 846    ax.set_yticklabels(ref_y_positions.keys())
 847    ax.set_ylim(0, y_pos)
 848    ax.set_ylabel('reference')
 849
 850    return ax
 851
 852
 853def _plot_annotation(annotation_dict: dict, ax: plt.Axes, direction_marker_size: int | None, color: str | ScalarMappable):
 854    """
 855    Plot annotation rectangles
 856    """
 857    for annotation in annotation_dict:
 858        for locations in annotation_dict[annotation]['location']:
 859            x_value = locations[0]
 860            length = locations[1] - locations[0]
 861            ax.add_patch(
 862                patches.FancyBboxPatch(
 863                    (x_value, annotation_dict[annotation]['track'] + 1),
 864                    length,
 865                    0.8,
 866                    boxstyle="Round, pad=0",
 867                    ec="black",
 868                    fc=color.to_rgba(annotation_dict[annotation]['conservation']) if isinstance(color, ScalarMappable) else color,
 869                )
 870            )
 871            if direction_marker_size is not None:
 872                if annotation_dict[annotation]['strand'] == '-':
 873                    marker = '<'
 874                else:
 875                    marker = '>'
 876                ax.plot(x_value + length/2, annotation_dict[annotation]['track'] + 1.4, marker=marker, markersize=direction_marker_size, color='white', markeredgecolor='black')
 877
 878        # plot linked annotations (such as splicing)
 879        if len(annotation_dict[annotation]['location']) > 1:
 880            y_value = annotation_dict[annotation]['track'] + 1.4
 881            start = None
 882            for locations in annotation_dict[annotation]['location']:
 883                if start is None:
 884                    start = locations[1]
 885                    continue
 886                ax.plot([start, locations[0]], [y_value, y_value], '--', linewidth=2, color='black')
 887                start = locations[1]
 888
 889
 890def _add_track_positions(annotation_dic):
 891    """
 892    define the position for annotations square so that overlapping annotations do not overlap in the plot
 893    """
 894    # create a dict and sort
 895    annotation_dic = dict(sorted(annotation_dic.items(), key=lambda x: x[1]['location'][0][0]))
 896
 897    # remember for each track the largest stop
 898    track_stops = [0]
 899
 900    for ann in annotation_dic:
 901        flattened_locations = list(chain.from_iterable(annotation_dic[ann]['location']))  # flatten list
 902        track = 0
 903        # check if a start of a gene is smaller than the stop of the current track
 904        # -> move to new track
 905        while flattened_locations[0] < track_stops[track]:
 906            track += 1
 907            # if all prior tracks are potentially causing an overlap
 908            # create a new track and break
 909            if len(track_stops) <= track:
 910                track_stops.append(0)
 911                break
 912        # in the current track remember the stop of the current gene
 913        track_stops[track] = flattened_locations[-1]
 914        # and indicate the track in the dict
 915        annotation_dic[ann]['track'] = track
 916
 917    return annotation_dic
 918
 919
 920def orf_plot(aln: explore.MSA | str, ax: plt.Axes | None = None, min_length: int = 500, non_overlapping_orfs: bool = True,
 921             cmap: str = 'Blues', direction_marker_size: int | None = 5, show_x_label: bool = False, show_cbar: bool = False,
 922             cbar_fraction: float = 0.1) -> plt.Axes:
 923    """
 924    Plot conserved ORFs.
 925    :param aln: alignment MSA class or path
 926    :param ax: matplotlib axes
 927    :param min_length: minimum length of orf
 928    :param non_overlapping_orfs: whether to consider overlapping orfs
 929    :param cmap: color mapping for % identity - see https://matplotlib.org/stable/users/explain/colors/colormaps.html
 930    :param direction_marker_size: marker size for direction marker, not shown if marker_size == None
 931    :param show_x_label: whether to show the x-axis label
 932    :param show_cbar: whether to show the colorbar - see https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.colorbar.html
 933    :param cbar_fraction: fraction of the original ax reserved for the colorbar
 934    :return matplotlib axes
 935    """
 936
 937    # normalize colorbar
 938    cmap = ScalarMappable(norm=Normalize(0, 100), cmap=plt.get_cmap(cmap))
 939    # validate input
 940    aln, ax = _validate_input_parameters(aln, ax)
 941
 942    # get orfs --> first deepcopy and reset zoom that the orfs are also zoomed in (by default, the orfs are only
 943    # searched within the zoomed region)
 944    aln_temp = deepcopy(aln)
 945    aln_temp.zoom = None
 946    if non_overlapping_orfs:
 947        orf_collection = aln_temp.get_non_overlapping_conserved_orfs(min_length=min_length)
 948    else:
 949        orf_collection = aln_temp.get_conserved_orfs(min_length=min_length)
 950
 951    # Normalize ORF dataclasses to the annotation dict shape consumed by plotting helpers.
 952    annotation_dict = {}
 953    for orf_id, orf_data in orf_collection.items():
 954        annotation_dict[orf_id] = {
 955            'location': orf_data.location,
 956            'strand': orf_data.strand,
 957            'conservation': orf_data.conservation,
 958        }
 959    # filter dict for zoom
 960    if aln.zoom is not None:
 961        annotation_dict = {
 962            key: val
 963            for key, val in annotation_dict.items()
 964            if max(val['location'][0][0], aln.zoom[0]) <= min(val['location'][0][1], aln.zoom[1])
 965        }
 966    # add track for plotting
 967    _add_track_positions(annotation_dict)
 968    # plot
 969    _plot_annotation(annotation_dict, ax, direction_marker_size=direction_marker_size, color=cmap)
 970
 971    # legend
 972    if show_cbar:
 973        cbar = plt.colorbar(cmap,ax=ax, location= 'top', orientation='horizontal', anchor=(1,0), shrink=0.2, pad=2/ax.bbox.height, fraction=cbar_fraction)
 974        cbar.set_label('% identity')
 975        cbar.set_ticks([0, 100])
 976
 977    # format axis
 978    _format_x_axis(aln, ax, show_x_label, show_left=False)
 979    ax.set_ylim(bottom=0.8)
 980    ax.set_yticks([])
 981    ax.set_yticklabels([])
 982    ax.set_title('conserved orfs', loc='left')
 983
 984    return ax
 985
 986
 987def annotation_plot(aln: explore.MSA | str, annotation: explore.Annotation | str, feature_to_plot: str, ax: plt.Axes | None = None,
 988                    color: str = 'wheat', direction_marker_size: int | None = 5, show_x_label: bool = False) -> plt.Axes:
 989    """
 990    Plot annotations from bed, gff or gb files. Are automatically mapped to alignment.
 991    :param aln: alignment MSA class
 992    :param annotation: annotation class | path to annotation file
 993    :param ax: matplotlib axes
 994    :param feature_to_plot: potential feature to plot (not for bed files as it is parsed as one feature)
 995    :param color: color for the annotation
 996    :param direction_marker_size: marker size for direction marker, only relevant if show_direction is True
 997    :param show_x_label: whether to show the x-axis label
 998    :return matplotlib axes
 999    """
1000    # validate input
1001    aln, ax, annotation = _validate_input_parameters(aln, ax, annotation)
1002    _validate_color(color)
1003
1004    # ignore features to plot for bed files (here it is written into one feature)
1005    if annotation.ann_type == 'bed':
1006        annotation_dict = annotation.features['region']
1007        feature_to_plot = 'bed regions'
1008    else:
1009        # try to subset the annotation dict
1010        try:
1011            annotation_dict = annotation.features[feature_to_plot]
1012        except KeyError:
1013            raise KeyError(f'Feature {feature_to_plot} not found. Use annotation.features.keys() to see available features.')
1014
1015    # plotting and formating
1016    _add_track_positions(annotation_dict)
1017    _plot_annotation(annotation_dict, ax, direction_marker_size=direction_marker_size, color=color)
1018    _format_x_axis(aln, ax, show_x_label, show_left=False)
1019    ax.set_ylim(bottom=0.8)
1020    ax.set_yticks([])
1021    ax.set_yticklabels([])
1022    ax.set_title(f'{annotation.locus} ({feature_to_plot})', loc='left')
1023
1024    return ax
1025
1026
1027def sequence_logo(aln:explore.MSA | str, ax:plt.Axes | None = None, color_scheme: str = 'standard', plot_type: str = 'stacked',
1028                  show_x_label:bool = False) -> plt.Axes:
1029    """
1030    Plot sequence logo or stacked area plot (use the first one with appropriate zoom levels). The
1031    logo visualizes the relative frequency of nt or aa characters in the alignment. The char frequency
1032    is scaled to the information content at each position. --> identical to how Geneious calculates it.
1033
1034    :param aln: alignment MSA class or path
1035    :param ax: matplotlib axes
1036    :param color_scheme: color scheme for characters. see config.CHAR_COLORS for available options
1037    :param plot_type: 'logo' for sequence logo, 'stacked' for stacked area plot
1038    :param show_x_label: whether to show the x-axis label
1039    :return matplotlib axes
1040    """
1041
1042    aln, ax = _validate_input_parameters(aln, ax)
1043    # calc matrix
1044    matrix = aln.calc_position_matrix('IC') * aln.calc_position_matrix('PPM')
1045    letters_to_plot = list(config.CHAR_COLORS[aln.aln_type]['standard'].keys())
1046
1047    # plot
1048    if plot_type == 'logo':
1049        for pos in range(matrix.shape[1]):
1050            # sort the positive matrix row values by size
1051            items = [(letters_to_plot[i], matrix[i, pos]) for i in range(len(letters_to_plot)) if matrix[i, pos] > 0]
1052            items.sort(key=lambda x: x[1])
1053            # plot each position
1054            y_offset = 0
1055            for letter, h in items:
1056                tp = TextPath((aln.zoom[0] - 0.325 if aln.zoom is not None else - 0.325, 0), letter, size=1, prop=FontProperties(weight='bold'))
1057                bb = tp.get_extents()
1058                glyph_height = bb.height if bb.height > 0 else 1e-6  # avoid div by zero
1059                scale_to_1 = 1.0 / glyph_height
1060
1061                transform = (Affine2D()
1062                             .scale(1.0, h * scale_to_1)  # scale manually by IC and normalize font
1063                             .translate(pos, y_offset)
1064                             + ax.transData)
1065
1066                patch = PathPatch(tp, transform=transform,
1067                                  facecolor=config.CHAR_COLORS[aln.aln_type][color_scheme][letter],
1068                                  edgecolor='none')
1069                ax.add_patch(patch)
1070                y_offset += h
1071    elif plot_type == 'stacked':
1072        y_values = np.zeros(matrix.shape[1])
1073        x_values = np.arange(0, matrix.shape[1]) if aln.zoom is None else np.arange(aln.zoom[0], aln.zoom[1])
1074        for row in range(matrix.shape[0]):
1075            y = matrix[row]
1076            ax.fill_between(x_values,
1077                            y_values,
1078                            y_values + y,
1079                            fc=config.CHAR_COLORS[aln.aln_type][color_scheme].get(letters_to_plot[row]),
1080                            ec='None',
1081                            label=letters_to_plot[row],
1082                            step='mid')
1083
1084    # adjust limits & labels
1085    _format_x_axis(aln, ax, show_x_label, show_left=True)
1086    if aln.aln_type == 'AA':
1087        ax.set_ylim(bottom=0, top=5)
1088    else:
1089        ax.set_ylim(bottom=0, top=2)
1090    ax.spines['top'].set_visible(False)
1091    ax.spines['right'].set_visible(False)
1092    ax.set_ylabel('bits')
1093
1094    return ax
1095
1096
1097def consensus_plot(aln: explore.MSA | str, ax: plt.Axes | None = None, threshold: float | None = None, use_ambig_nt: bool = False,
1098                   color_scheme: str | None = 'standard', mask_color: str = 'dimgrey', ambiguity_color: str = 'black', basic_color: str = 'lightgrey',
1099                   show_x_label: bool = False, show_name: bool = True, show_sequence: bool = False) -> plt.Axes:
1100    """
1101    Plot a consensus sequence as a single-row colored sequence.
1102
1103    :param aln: alignment MSA class or path
1104    :param ax: matplotlib axes
1105    :param threshold: consensus threshold (0-1) or None for majority rule
1106    :param use_ambig_nt: whether to use ambiguous nucleotide codes when building consensus
1107    :param color_scheme: color scheme for characters. see config.CHAR_COLORS for available options or None
1108    :param mask_color: color used for mask characters (N/X)
1109    :param ambiguity_color: color used for ambiguous characters
1110    :param show_x_label: whether to show the x-axis label
1111    :param show_name: whether to show the 'consensus' label at y-axis
1112    :param show_sequence: whether to show the sequence at the y-axis
1113
1114    :return: matplotlib axes
1115    """
1116
1117    # validate input
1118    aln, ax = _validate_input_parameters(aln, ax)
1119    # validate colors
1120    for c in [mask_color, ambiguity_color]:
1121        _validate_color(c)
1122    # validate color scheme
1123    _validate_color_scheme(scheme=color_scheme, aln=aln)
1124
1125    # get consensus
1126    consensus = aln.get_consensus(threshold=threshold, use_ambig_nt=use_ambig_nt)
1127
1128    # prepare color mapping
1129    if color_scheme is not None:
1130        # get mapping from config
1131        char_colors = config.CHAR_COLORS[aln.aln_type][color_scheme]
1132    else:
1133        char_colors = None
1134
1135    # Build polygons and colors for a single PolyCollection
1136    polygons = []
1137    poly_colors = []
1138    text_positions = []
1139    text_chars = []
1140    text_colors = []
1141
1142    zoom_offset = aln.zoom[0] if aln.zoom is not None else 0
1143
1144    y_position = len(aln) - 0.4
1145
1146    for pos, char in enumerate(consensus):
1147        x = pos + zoom_offset
1148        # determine color
1149        if char_colors is not None and char in char_colors:
1150            color = char_colors[char]
1151        else:
1152            # ambiguous nucleotide/aminoacid codes
1153            if char in config.AMBIG_CHARS[aln.aln_type]:
1154                color = ambiguity_color
1155            elif char in ['N', 'X']:
1156                color = mask_color
1157            else:
1158                color = basic_color
1159
1160        rect = [
1161            (x - 0.5, y_position),
1162            (x + 0.5, y_position),
1163            (x + 0.5, y_position+0.8),
1164            (x - 0.5, y_position+0.8),
1165        ]
1166        polygons.append(rect)
1167        poly_colors.append(color)
1168
1169        if show_sequence:
1170            text_positions.append(x)
1171            text_chars.append(char)
1172            # determine readable text color
1173            text_colors.append(_get_contrast_text_color(to_rgba(color)))
1174
1175    # add single PolyCollection
1176    if polygons:
1177        pc = PolyCollection(polygons, facecolors=poly_colors, edgecolors=poly_colors, linewidths=0.5)
1178        ax.add_collection(pc)
1179
1180    # add texts if requested
1181    if show_sequence:
1182        for x, ch, tc in zip(text_positions, text_chars, text_colors):
1183            ax.text(x, y_position+0.4, ch, ha='center', va='center_baseline', c=tc)
1184
1185    # format axis
1186    _format_x_axis(aln=aln, ax=ax, show_x_label=show_x_label, show_left=False)
1187    ax.set_ylim(y_position-0.1, y_position + 0.9)
1188    if show_name:
1189        ax.yaxis.set_ticks_position('none')
1190        ax.set_yticks([y_position + 0.4])
1191        ax.set_yticklabels(['consensus'])
1192    else:
1193        ax.set_yticks([])
1194
1195    return ax
1196
1197def simplot(aln: explore.MSA | str, ref: str | None, ax: plt.Axes | None = None, colors: str | list | None = None,
1198            window_size: int = 200, step_size: int = 20, distance_calculation: str = 'ged', line_width: int | float = 1,
1199            show_legend: bool = False, show_reference: bool = True, bbox_to_anchor: tuple[float|int, float|int] | list= (1, 1),
1200            show_x_label: bool = False) -> plt.Axes:
1201    """
1202    Calculate binned pairwise distances (similarity) between sequences and a reference sequence
1203    over a stepwise sliding window in the zoomed region of the alignment. This is inspired by simplot that helps to identify
1204    recombination sites. For proper recombination analyses use simplot or simplot++ (https://github.com/Stephane-S/Simplot_PlusPlus).
1205    Each sequence is plotted as a line displaying the similarity to the reference. The reference sequence can be either
1206    set to None (compared to consensus) or to a specific reference id.
1207
1208    :param aln: alignment MSA class or path
1209    :param ax: matplotlib axes
1210    :param ref: reference sequence id or None. For 'None' all computations are compared to a majority consensus
1211    :param colors: color for each sequence. can be a single named color or a list of named colors or a plt.colormap or None (auto coloring)
1212    :param window_size: window size for sliding window
1213    :param step_size: step size for sliding window
1214    :param distance_calculation: distance calculation method. Supported: ghd (global hamming distance), ged (gap excluded distance) and for nt additionally: jc69(Jukes-Cantor 1969) and k2p (Kimura 2-Parameter / K80). For more information see: explore.MSA.calc_pairwise_identity_matrix()
1215    :param line_width: width of the plotted lines
1216    :param show_legend: whether to show the legend
1217    :param show_reference: whether to show the reference id in the right lower corner
1218    :param bbox_to_anchor: bounding box coordinates for the legend - see: https://matplotlib.org/stable/api/legend_api.html
1219    :param show_x_label: whether to show the x-axis label
1220
1221    :return: matplotlib axes
1222    """
1223
1224    # validate inputs
1225    aln, ax = _validate_input_parameters(aln=aln, ax=ax)
1226    if not isinstance(window_size, int) or window_size <= 0:
1227        raise ValueError('window_size has to be a positive integer')
1228    if window_size > aln.length:
1229        raise ValueError('window_size can not be larger than the (zoomed) alignment length')
1230    if not isinstance(step_size, int) or step_size <= 0:
1231        raise ValueError('step_size has to be a positive integer')
1232    if step_size > aln.length:
1233        raise ValueError('step_size can not be larger than the (zoomed) alignment length')
1234    if ref is not None and ref not in aln:
1235        raise ValueError(f'Reference {ref} not in alignment')
1236    if distance_calculation not in ['ghd', 'ged', 'jc69', 'k2p']:
1237        raise ValueError(f'Distance calculation method {distance_calculation} not supported. Supported: ghd, ged, jc69, k2p')
1238    if distance_calculation in ['jc69', 'k2p'] and aln.aln_type == 'AA':
1239        raise ValueError(f'Distance calculation method {distance_calculation} only supported for nucleotide alignments')
1240
1241    # get the sequence ids to plot
1242    sequence_ids = [seq_id for seq_id in aln if seq_id != ref]
1243
1244    # validate colors
1245    if colors is not None:
1246        # named color or colormap
1247        if isinstance(colors, str):
1248            # first try potential colormap
1249            try:
1250                cmap = plt.get_cmap(colors)
1251                cmap_colors = cmap(np.linspace(0, 1, len(sequence_ids)))
1252                color_map = {seq_id: color for seq_id, color in zip(sequence_ids, cmap_colors)}
1253            # single color
1254            except ValueError:
1255                _validate_color(colors)
1256                color_map = {seq_id: colors for seq_id in sequence_ids}
1257        # list of colors
1258        elif isinstance(colors, list):
1259            if len(colors) != len(sequence_ids):
1260                raise ValueError('colors list length has to match the number of plotted sequences')
1261            for color in colors:
1262                _validate_color(color)
1263            color_map = {seq_id: color for seq_id, color in zip(sequence_ids, colors)}
1264        else:
1265            raise ValueError('colors has to be either a single named color, a list of named colors, a plt colormap or None (auto)')
1266    else:
1267        color_map = None
1268
1269    # define region and window
1270    region_start = aln.zoom[0] if aln.zoom is not None else 0
1271    half_window_size = window_size // 2
1272
1273    # copy alignment and set reference
1274    aln_tmp = deepcopy(aln)
1275    aln_tmp.reference_id = ref
1276    # reset zoom (later the alignment dictionary will be replaced for a slice of the alignment)
1277    aln_tmp._zoom = None
1278
1279    # remember x positions and distances
1280    x_positions = []
1281    distance_traces = {seq_id: [] for seq_id in sequence_ids}
1282
1283    for point_to_plot in range(0, aln.length + 1, step_size):
1284        # define windows
1285        left_side = point_to_plot - half_window_size if point_to_plot - half_window_size > 0 else 0
1286        right_side = point_to_plot + half_window_size if point_to_plot + half_window_size < aln.length else aln.length
1287        # slice alignment and replace the original alignment
1288        aln_tmp._alignment = {
1289            seq_id: seq[left_side:right_side]
1290            for seq_id, seq in aln.alignment.items()
1291        }
1292        window_result = aln_tmp.calc_pairwise_distance_to_reference(distance_type=distance_calculation)
1293        value_map = {seq_id: value for seq_id, value in zip(window_result.sequence_ids, window_result.distances)}
1294
1295        x_positions.append(region_start + point_to_plot)
1296        for seq_id in sequence_ids:
1297            distance_traces[seq_id].append(value_map[seq_id])
1298
1299    for seq_id in sequence_ids:
1300        if color_map is not None:
1301            ax.plot(x_positions, distance_traces[seq_id],
1302                    color=color_map[seq_id],
1303                    linewidth=line_width, label=seq_id)
1304        else:
1305            ax.plot(x_positions, distance_traces[seq_id],
1306                    linewidth=line_width, label=seq_id)
1307    # Format axis
1308    ax.set_ylabel('similarity (%)')
1309    ax.set_ylim(0, 102)
1310    _format_x_axis(aln=aln, ax=ax, show_x_label=show_x_label, show_left=True)
1311    # add legend for all sequences on top
1312    if show_legend:
1313        leg1 = ax.legend(frameon=False, loc='lower right', bbox_to_anchor=bbox_to_anchor, ncols=3)
1314        ax.add_artist(leg1)
1315    # add legend for query
1316    if show_reference:
1317        ref_label = ref if ref is not None else 'consensus'
1318        ref_handle = plt.Line2D([0], [0], linewidth=0, label=f'reference: {ref_label}')
1319        leg2 = ax.legend(handles=[ref_handle], frameon=False, bbox_to_anchor=(1, 0), loc='lower right')
1320        ax.add_artist(leg2)
1321
1322    return ax
def alignment( aln: msaexplorer.explore.MSA | str, ax: matplotlib.axes._axes.Axes | None = None, show_sequence_all: bool = False, show_seq_names: bool = False, custom_seq_names: tuple | list = (), mask_color: str = 'dimgrey', ambiguity_color: str = 'black', basic_color: str = 'lightgrey', show_mask: bool = True, fancy_gaps: bool = False, show_ambiguities: bool = False, color_scheme: str | None = 'standard', show_x_label: bool = True, show_legend: bool = False, bbox_to_anchor: tuple[float | int, float | int] | list = (1, 1), show_consensus: bool = False) -> matplotlib.axes._axes.Axes:
393def alignment(aln: explore.MSA | str, ax: plt.Axes | None = None, show_sequence_all: bool = False, show_seq_names: bool = False,
394              custom_seq_names: tuple | list = (), mask_color: str = 'dimgrey', ambiguity_color: str = 'black', basic_color: str = 'lightgrey',
395              show_mask:bool = True, fancy_gaps:bool = False, show_ambiguities: bool = False, color_scheme: str | None = 'standard',
396              show_x_label: bool = True, show_legend: bool = False, bbox_to_anchor: tuple[float|int, float|int] | list= (1, 1),
397              show_consensus: bool = False) -> plt.Axes:
398    """
399
400    Plot an alignment with each character colored as defined in the scheme. This is computationally more intensive as 
401    the identity alignments and similarity alignment function as each square for each character is individually plotted.
402
403    :param aln: alignment MSA class or path
404    :param ax: matplotlib axes
405    :param show_seq_names: whether to show seq names
406    :param show_sequence_all: whether to show all sequences - zoom in to avoid plotting issues
407    :param custom_seq_names: custom seq names
408    :param mask_color: color for masked nucleotides/aminoacids
409    :param ambiguity_color: color for ambiguous nucleotides/aminoacids
410    :param basic_color: color that will be used for all normal chars if the colorscheme is None
411    :param show_mask: whether to show N or X chars otherwise it will be shown as match or mismatch
412    :param fancy_gaps: show gaps with a small black bar
413    :param show_ambiguities: whether to show non-N ambiguities -> only relevant for RNA/DNA sequences
414    :param color_scheme: color mismatching chars with their unique color. Options for DNA/RNA are: standard, purine_pyrimidine, strong_weak; and for AS: standard, clustal, zappo, hydrophobicity. Will overwrite different_char_color.
415    :param show_x_label: whether to show x label
416    :param show_legend: whether to show the legend
417    :param bbox_to_anchor: bounding box coordinates for the legend - see: https://matplotlib.org/stable/api/legend_api.html
418    :param show_consensus: whether to show the majority consensus sequence (standard-color scheme)
419    :return  matplotlib axis
420    """
421    # Validate aln, ax inputs
422    aln, ax = _validate_input_parameters(aln=aln, ax=ax)
423    # Validate colors
424    for c in [mask_color, ambiguity_color, basic_color]:
425        _validate_color(c)
426    # Validate color scheme
427    _validate_color_scheme(scheme=color_scheme, aln=aln)
428    # create color mapping
429    aln_colors = _create_color_dictionary(
430        aln=aln, color_scheme=color_scheme, identical_char_color=basic_color,
431        different_char_color=None, mask_color=mask_color, ambiguity_color=ambiguity_color
432    )
433    # Compute identity alignment
434    numerical_alignment = aln.calc_numerical_alignment(encode_ambiguities=show_ambiguities, encode_mask=show_mask)
435    if color_scheme is None:
436        numerical_alignment[np.where(numerical_alignment > 0)] = 0
437    # create the alignment
438    detected_identity_values = _create_alignment(
439        aln=aln, ax=ax, matrix=numerical_alignment, fancy_gaps=fancy_gaps, create_identity_patch=True if color_scheme is None else False, show_gaps=True,
440        show_different_sequence=False, show_sequence_all=show_sequence_all, reference_color=None, aln_colors=aln_colors,
441        values_to_plot = list(aln_colors.keys())
442    )
443    # custom legend
444    if show_legend:
445        aln_colors.pop(0)  # otherwise legend is wrong (here there is no check if a position is identical or not)
446        _create_legend(
447            color_scheme=color_scheme, aln_colors=aln_colors, aln=aln,
448            detected_identity_values=detected_identity_values,
449            ax=ax, bbox_to_anchor=bbox_to_anchor
450        )
451    if show_consensus:
452        consensus_plot(aln=aln, ax=ax, show_x_label=show_x_label, show_name=False, show_sequence=show_sequence_all,
453            color_scheme='standard', basic_color=basic_color, mask_color=mask_color, ambiguity_color=ambiguity_color
454        )
455        ax.set_ylim(-0.5, len(aln) + 1)
456    else:
457        ax.set_ylim(-0.5, len(aln))
458
459    _seq_names(aln=aln, ax=ax, custom_seq_names=custom_seq_names, show_seq_names=show_seq_names, include_consensus=show_consensus)
460    # configure axis
461    _format_x_axis(aln=aln, ax=ax, show_x_label=show_x_label, show_left=False)
462
463    return ax

Plot an alignment with each character colored as defined in the scheme. This is computationally more intensive as the identity alignments and similarity alignment function as each square for each character is individually plotted.

Parameters
  • aln: alignment MSA class or path
  • ax: matplotlib axes
  • show_seq_names: whether to show seq names
  • show_sequence_all: whether to show all sequences - zoom in to avoid plotting issues
  • custom_seq_names: custom seq names
  • mask_color: color for masked nucleotides/aminoacids
  • ambiguity_color: color for ambiguous nucleotides/aminoacids
  • basic_color: color that will be used for all normal chars if the colorscheme is None
  • show_mask: whether to show N or X chars otherwise it will be shown as match or mismatch
  • fancy_gaps: show gaps with a small black bar
  • show_ambiguities: whether to show non-N ambiguities -> only relevant for RNA/DNA sequences
  • color_scheme: color mismatching chars with their unique color. Options for DNA/RNA are: standard, purine_pyrimidine, strong_weak; and for AS: standard, clustal, zappo, hydrophobicity. Will overwrite different_char_color.
  • show_x_label: whether to show x label
  • show_legend: whether to show the legend
  • bbox_to_anchor: bounding box coordinates for the legend - see: https: //matplotlib.org/stable/api/legend_api.html
  • show_consensus: whether to show the majority consensus sequence (standard-color scheme) :return matplotlib axis
def identity_alignment( aln: msaexplorer.explore.MSA | str, ax: matplotlib.axes._axes.Axes | None = None, show_title: bool = True, show_identity_sequence: bool = False, show_sequence_all: bool = False, show_seq_names: bool = False, custom_seq_names: tuple | list = (), reference_color: str = 'lightsteelblue', basic_color: str = 'lightgrey', different_char_color: str = 'peru', mask_color: str = 'dimgrey', ambiguity_color: str = 'black', show_mask: bool = True, show_gaps: bool = True, fancy_gaps: bool = False, show_mismatches: bool = True, show_ambiguities: bool = False, color_scheme: str | None = None, show_x_label: bool = True, show_legend: bool = False, bbox_to_anchor: tuple[float | int, float | int] | list = (1, 1), show_consensus: bool = False) -> matplotlib.axes._axes.Axes:
467def identity_alignment(aln: explore.MSA | str, ax: plt.Axes | None = None, show_title: bool = True, show_identity_sequence: bool = False,
468                       show_sequence_all: bool = False, show_seq_names: bool = False, custom_seq_names: tuple | list = (),
469                       reference_color: str = 'lightsteelblue', basic_color: str = 'lightgrey', different_char_color: str = 'peru',
470                       mask_color: str = 'dimgrey', ambiguity_color: str = 'black', show_mask:bool = True, show_gaps:bool = True,
471                       fancy_gaps:bool = False, show_mismatches: bool = True, show_ambiguities: bool = False,
472                       color_scheme: str | None = None, show_x_label: bool = True, show_legend: bool = False,
473                       bbox_to_anchor: tuple[float|int, float|int] | list = (1, 1), show_consensus: bool = False) -> plt.Axes:
474    """
475    Generates an identity alignment plot.
476    :param aln: alignment MSA class or path
477    :param ax: matplotlib axes
478    :param show_title: whether to show title
479    :param show_seq_names: whether to show seq names
480    :param show_identity_sequence: whether to show sequence for only differences and reference - zoom in to avoid plotting issues
481    :param show_sequence_all: whether to show all sequences - zoom in to avoid plotting issues
482    :param custom_seq_names: custom seq names
483    :param reference_color: color of reference sequence
484    :param basic_color: color for identical nucleotides/aminoacids
485    :param different_char_color: color for different nucleotides/aminoacids
486    :param mask_color: color for masked nucleotides/aminoacids
487    :param ambiguity_color: color for ambiguous nucleotides/aminoacids
488    :param show_mask: whether to show N or X chars otherwise it will be shown as match or mismatch
489    :param show_gaps: whether to show gaps otherwise it will be shown as match or mismatch
490    :param fancy_gaps: show gaps with a small black bar
491    :param show_mismatches: whether to show mismatches otherwise it will be shown as match
492    :param show_ambiguities: whether to show non-N ambiguities -> only relevant for RNA/DNA sequences
493    :param color_scheme: color mismatching chars with their unique color. Options for DNA/RNA are: standard, purine_pyrimidine, strong_weak; and for AS: standard, clustal, zappo, hydrophobicity. Will overwrite different_char_color.
494    :param show_x_label: whether to show x label
495    :param show_legend: whether to show the legend
496    :param bbox_to_anchor: bounding box coordinates for the legend - see: https://matplotlib.org/stable/api/legend_api.html
497    :param show_consensus: whether to show the majority consensus sequence (standard-color scheme)
498
499    :return  matplotlib axis
500    """
501
502    # Both options for gaps work hand in hand
503    if fancy_gaps:
504        show_gaps = True
505    # Validate aln, ax inputs
506    aln, ax = _validate_input_parameters(aln=aln, ax=ax)
507    # Validate colors
508    for c in [reference_color, basic_color, different_char_color, mask_color, ambiguity_color]:
509        _validate_color(c)
510    # Validate color scheme
511    _validate_color_scheme(scheme=color_scheme, aln=aln)
512    # create color mapping
513    aln_colors = _create_color_dictionary(
514        aln=aln, color_scheme=color_scheme, identical_char_color=basic_color, different_char_color=different_char_color,
515        mask_color=mask_color, ambiguity_color=ambiguity_color
516    )
517    # Compute identity alignment
518    identity_aln = aln.calc_identity_alignment(
519        encode_mask=show_mask, encode_gaps=show_gaps, encode_mismatches=show_mismatches,encode_ambiguities=show_ambiguities,
520        encode_each_mismatch_char=True if color_scheme is not None else False
521    )
522    # create the alignment
523    detected_identity_values = _create_alignment(
524        aln=aln, ax=ax, matrix=identity_aln, fancy_gaps=fancy_gaps, create_identity_patch=True, show_gaps=show_gaps,
525        show_different_sequence=show_identity_sequence, show_sequence_all=show_sequence_all, reference_color=reference_color,
526        aln_colors=aln_colors, values_to_plot=list(aln_colors.keys())[1:]
527    )
528    # custom legend
529    if show_legend:
530        _create_legend(
531            color_scheme=color_scheme, aln_colors=aln_colors, aln=aln, detected_identity_values=detected_identity_values,
532            ax=ax, bbox_to_anchor=bbox_to_anchor
533        )
534    _seq_names(aln=aln, ax=ax, custom_seq_names=custom_seq_names, show_seq_names=show_seq_names)
535    # configure axis
536    if show_consensus:
537        consensus_plot(aln=aln, ax=ax, show_x_label=show_x_label, show_name=False, show_sequence=any([show_sequence_all, show_identity_sequence]),
538                       color_scheme='standard', basic_color=basic_color, mask_color=mask_color,
539                       ambiguity_color=ambiguity_color
540                       )
541        ax.set_ylim(-0.5, len(aln) + 1)
542    else:
543        ax.set_ylim(-0.5, len(aln))
544
545    _seq_names(aln=aln, ax=ax, custom_seq_names=custom_seq_names, show_seq_names=show_seq_names,
546               include_consensus=show_consensus)
547    if show_title:
548        ax.set_title('identity', loc='left')
549    _format_x_axis(aln=aln, ax=ax, show_x_label=show_x_label, show_left=False)
550
551    return ax

Generates an identity alignment plot.

Parameters
  • aln: alignment MSA class or path
  • ax: matplotlib axes
  • show_title: whether to show title
  • show_seq_names: whether to show seq names
  • show_identity_sequence: whether to show sequence for only differences and reference - zoom in to avoid plotting issues
  • show_sequence_all: whether to show all sequences - zoom in to avoid plotting issues
  • custom_seq_names: custom seq names
  • reference_color: color of reference sequence
  • basic_color: color for identical nucleotides/aminoacids
  • different_char_color: color for different nucleotides/aminoacids
  • mask_color: color for masked nucleotides/aminoacids
  • ambiguity_color: color for ambiguous nucleotides/aminoacids
  • show_mask: whether to show N or X chars otherwise it will be shown as match or mismatch
  • show_gaps: whether to show gaps otherwise it will be shown as match or mismatch
  • fancy_gaps: show gaps with a small black bar
  • show_mismatches: whether to show mismatches otherwise it will be shown as match
  • show_ambiguities: whether to show non-N ambiguities -> only relevant for RNA/DNA sequences
  • color_scheme: color mismatching chars with their unique color. Options for DNA/RNA are: standard, purine_pyrimidine, strong_weak; and for AS: standard, clustal, zappo, hydrophobicity. Will overwrite different_char_color.
  • show_x_label: whether to show x label
  • show_legend: whether to show the legend
  • bbox_to_anchor: bounding box coordinates for the legend - see: https: //matplotlib.org/stable/api/legend_api.html
  • show_consensus: whether to show the majority consensus sequence (standard-color scheme)

:return matplotlib axis

def similarity_alignment( aln: msaexplorer.explore.MSA | str, ax: matplotlib.axes._axes.Axes | None = None, matrix_type: str | None = None, show_title: bool = True, show_similarity_sequence: bool = False, show_sequence_all: bool = False, show_seq_names: bool = False, custom_seq_names: tuple | list = (), reference_color: str = 'lightsteelblue', different_char_color: str = 'peru', basic_color: str = 'lightgrey', show_gaps: bool = True, fancy_gaps: bool = False, show_x_label: bool = True, show_cbar: bool = False, cbar_fraction: float = 0.1, show_consensus: bool = False) -> matplotlib.axes._axes.Axes:
554def similarity_alignment(aln: explore.MSA | str, ax: plt.Axes | None = None, matrix_type: str | None = None, show_title: bool = True,
555                         show_similarity_sequence: bool = False, show_sequence_all: bool = False, show_seq_names: bool = False,
556                         custom_seq_names: tuple | list = (), reference_color: str = 'lightsteelblue', different_char_color: str = 'peru', basic_color: str = 'lightgrey',
557                         show_gaps:bool = True, fancy_gaps:bool = False, show_x_label: bool = True, show_cbar: bool = False,
558                         cbar_fraction: float = 0.1, show_consensus: bool = False)  -> plt.Axes:
559    """
560    Generates a similarity alignment plot. Importantly the similarity values are normalized!
561    :param aln: alignment MSA class or path
562    :param ax: matplotlib axes
563    :param matrix_type: substitution matrix - see config.SUBS_MATRICES, standard: NT - TRANS, AA - BLOSUM65
564    :param show_title: whether to show title
565    :param show_similarity_sequence: whether to show sequence only for differences and reference - zoom in to avoid plotting issues
566    :param show_sequence_all: whether to show all sequences - zoom in to avoid plotting issues
567    :param show_seq_names: whether to show seq names
568    :param custom_seq_names: custom seq names
569    :param reference_color: color of reference sequence
570    :param different_char_color: color for the lowest similarity
571    :param basic_color: basic color for the highest similarity
572    :param show_gaps: whether to show gaps otherwise it will be ignored
573    :param fancy_gaps: show gaps with a small black bar
574    :param show_x_label: whether to show x label
575    :param show_cbar: whether to show the legend - see https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.colorbar.html
576    :param cbar_fraction: fraction of the original ax reserved for the legend
577    :param show_consensus: whether to show the majority consensus sequence (standard color scheme, no specical handling for special characters)
578    :return  matplotlib axis
579    """
580    # Both options for gaps work hand in hand
581    if fancy_gaps:
582        show_gaps = True
583    # Validate aln, ax inputs
584    aln, ax = _validate_input_parameters(aln=aln, ax=ax)
585    # Validate colors
586    for c in [reference_color, different_char_color, basic_color]:
587        _validate_color(c)
588    # Compute similarity alignment
589    similarity_aln = aln.calc_similarity_alignment(matrix_type=matrix_type)  # use normalized values here
590    similarity_aln = similarity_aln.round(2)  # round data for good color mapping
591    # create cmap
592    cmap = LinearSegmentedColormap.from_list(
593        "extended",
594        [
595            (0.0, different_char_color),
596            (1.0, basic_color)
597        ],
598    )
599    cmap = ScalarMappable(norm=Normalize(vmin=0, vmax=1), cmap=cmap)
600    # create similarity values
601    similarity_values = np.arange(start=0, stop=1, step=0.01)
602    # round it to be absolutely sure that values match with rounded sim alignment
603    similarity_values = list(similarity_values.round(2))
604    # create the alignment
605    _create_alignment(
606        aln=aln, ax=ax, matrix=similarity_aln, fancy_gaps=fancy_gaps, create_identity_patch=True, show_gaps=show_gaps,
607        show_different_sequence=show_similarity_sequence, show_sequence_all=show_sequence_all, reference_color=reference_color,
608        aln_colors=cmap, values_to_plot=similarity_values, identical_value=1
609    )
610    # legend
611    if show_cbar:
612        cbar = plt.colorbar(cmap, ax=ax, location= 'top', anchor=(1,0), shrink=0.2, pad=2/ax.bbox.height, fraction=cbar_fraction)
613        cbar.set_ticks([0, 1])
614        cbar.set_ticklabels(['low', 'high'])
615
616    # format seq names
617    _seq_names(aln, ax, custom_seq_names, show_seq_names)
618
619    # configure axis
620    if show_consensus:
621        consensus_plot(aln=aln, ax=ax, show_x_label=show_x_label, show_name=False, show_sequence=any([show_sequence_all, show_similarity_sequence]),
622                       color_scheme='standard', basic_color=basic_color)
623        ax.set_ylim(-0.5, len(aln) + 1)
624    else:
625        ax.set_ylim(-0.5, len(aln))
626
627    _seq_names(aln=aln, ax=ax, custom_seq_names=custom_seq_names, show_seq_names=show_seq_names,
628               include_consensus=show_consensus)
629    if show_title:
630        ax.set_title('similarity', loc='left')
631    _format_x_axis(aln, ax, show_x_label, show_left=False)
632
633    return ax

Generates a similarity alignment plot. Importantly the similarity values are normalized!

Parameters
  • aln: alignment MSA class or path
  • ax: matplotlib axes
  • matrix_type: substitution matrix - see config.SUBS_MATRICES, standard: NT - TRANS, AA - BLOSUM65
  • show_title: whether to show title
  • show_similarity_sequence: whether to show sequence only for differences and reference - zoom in to avoid plotting issues
  • show_sequence_all: whether to show all sequences - zoom in to avoid plotting issues
  • show_seq_names: whether to show seq names
  • custom_seq_names: custom seq names
  • reference_color: color of reference sequence
  • different_char_color: color for the lowest similarity
  • basic_color: basic color for the highest similarity
  • show_gaps: whether to show gaps otherwise it will be ignored
  • fancy_gaps: show gaps with a small black bar
  • show_x_label: whether to show x label
  • show_cbar: whether to show the legend - see https: //matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.colorbar.html
  • cbar_fraction: fraction of the original ax reserved for the legend
  • show_consensus: whether to show the majority consensus sequence (standard color scheme, no specical handling for special characters) :return matplotlib axis
def stat_plot( aln: msaexplorer.explore.MSA | str, stat_type: str, ax: matplotlib.axes._axes.Axes | None = None, line_color: str = 'burlywood', line_width: int | float = 2, rolling_average: int = 20, show_x_label: bool = False, show_title: bool = True) -> matplotlib.axes._axes.Axes:
661def stat_plot(aln: explore.MSA | str, stat_type: str, ax: plt.Axes | None = None, line_color: str = 'burlywood',
662              line_width: int | float = 2, rolling_average: int = 20, show_x_label: bool = False, show_title: bool = True) -> plt.Axes:
663    """
664    Generate a plot for the various alignment stats.
665    :param aln: alignment MSA class or path
666    :param ax: matplotlib axes
667    :param stat_type: 'entropy', 'gc', 'coverage', 'ts tv score', gap frequency, 'identity' or 'similarity' -> (here default matrices are used NT - TRANS, AA - BLOSUM65)
668    :param line_color: color of the line
669    :param line_width: width of the line
670    :param rolling_average: average rolling window size left and right of a position in nucleotides or amino acids
671    :param show_x_label: whether to show the x-axis label
672    :param show_title: whether to show the title
673    :return matplotlib axes
674    """
675
676    # input check
677    aln, ax = _validate_input_parameters(aln, ax)
678
679    # define possible functions to calc here
680    stat_functions: Dict[str, Callable[[], AlignmentStats | ndarray]] = {
681        'gc': aln.calc_gc,
682        'entropy': aln.calc_entropy,
683        'coverage': aln.calc_coverage,
684        'identity': aln.calc_identity_alignment,
685        'similarity': aln.calc_similarity_alignment,
686        'ts tv score': aln.calc_transition_transversion_score,
687        'gap frequency': aln.calc_gap_frequency
688    }
689
690    if stat_type not in stat_functions:
691        raise ValueError('stat_type must be one of {}'.format(list(stat_functions.keys())))
692
693    _validate_color(line_color)
694    if rolling_average < 1 or rolling_average > aln.length:
695        raise ValueError('rolling_average must be between 1 and length of sequence')
696
697    # generate input data
698    array = stat_functions[stat_type]()
699    if isinstance(array, AlignmentStats):
700        array = array.values
701    # define possible spans for values
702    if stat_type == 'identity':
703        min_value, max_value = -1, 0
704    elif stat_type == 'ts tv score':
705        min_value, max_value = -1, 1
706    else:
707        min_value, max_value = 0, 1
708    if stat_type in ['identity', 'similarity']:
709        # for the mean nan values get handled as the lowest possible number in the matrix
710        array = np.nan_to_num(array, True, min_value)
711        array = np.mean(array, axis=0)
712    data, plot_idx = _moving_average(array, rolling_average, aln.zoom, aln.length)
713
714    # plot the data
715    ax.fill_between(
716        # this add dummy data left and right for better plotting
717        # otherwise only half of the step is shown
718        np.concatenate(([plot_idx[0] - 0.5], plot_idx, [plot_idx[-1] + 0.5])) if rolling_average == 1 else plot_idx,
719        np.concatenate(([data[0]], data, [data[-1]])) if rolling_average == 1 else data,
720        min_value,
721        linewidth = line_width,
722        edgecolor=line_color,
723        step='mid' if rolling_average == 1 else None,
724        facecolor=(line_color, 0.6) if stat_type not in ['ts tv score', 'gc'] else 'none'
725    )
726    if stat_type == 'gc':
727        ax.hlines(0.5, xmin=0, xmax=aln.zoom[0] + aln.length if aln.zoom is not None else aln.length, color='black', linestyles='--', linewidth=1)
728
729    # format axis
730    ax.set_ylim(min_value, max_value*0.1+max_value)
731    ax.set_yticks([min_value, max_value])
732    if stat_type == 'gc':
733        ax.set_yticklabels(['0', '100'])
734    elif stat_type == 'ts tv score':
735        ax.set_yticklabels(['tv', 'ts'])
736    else:
737        ax.set_yticklabels(['low', 'high'])
738
739    # show title
740    if show_title:
741        ax.set_title(
742            f'{stat_type} (average over {rolling_average} positions)' if rolling_average > 1 else f'{stat_type} for each position',
743            loc='left'
744        )
745
746    _format_x_axis(aln, ax, show_x_label, show_left=True)
747
748    return ax

Generate a plot for the various alignment stats.

Parameters
  • aln: alignment MSA class or path
  • ax: matplotlib axes
  • stat_type: 'entropy', 'gc', 'coverage', 'ts tv score', gap frequency, 'identity' or 'similarity' -> (here default matrices are used NT - TRANS, AA - BLOSUM65)
  • line_color: color of the line
  • line_width: width of the line
  • rolling_average: average rolling window size left and right of a position in nucleotides or amino acids
  • show_x_label: whether to show the x-axis label
  • show_title: whether to show the title :return matplotlib axes
def variant_plot( aln: msaexplorer.explore.MSA | str, ax: matplotlib.axes._axes.Axes | None = None, lollisize: tuple[int, int] | list = (1, 3), color_scheme: str = 'standard', show_x_label: bool = False, show_legend: bool = True, bbox_to_anchor: tuple[float | int, float | int] | list = (1, 1)) -> matplotlib.axes._axes.Axes:
751def variant_plot(aln: explore.MSA | str, ax: plt.Axes | None = None, lollisize: tuple[int, int] | list = (1, 3),
752                 color_scheme: str = 'standard', show_x_label: bool = False, show_legend: bool = True,
753                 bbox_to_anchor: tuple[float|int, float|int] | list = (1, 1)) -> plt.Axes:
754    """
755    Plots variants.
756    :param aln: alignment MSA class or path
757    :param ax: matplotlib axes
758    :param lollisize: (stem_size, head_size)
759    :param color_scheme: color scheme for characters. see config.CHAR_COLORS for available options
760    :param show_x_label:  whether to show the x-axis label
761    :param show_legend: whether to show the legend
762    :param bbox_to_anchor: bounding box coordinates for the legend - see: https://matplotlib.org/stable/api/legend_api.html
763    :return matplotlib axes
764    """
765
766    # validate input
767    aln, ax = _validate_input_parameters(aln, ax)
768    _validate_color_scheme(color_scheme, aln)
769    if not isinstance(lollisize, tuple) or len(lollisize) != 2:
770        raise ValueError('lollisize must be tuple of length 2 (stem, head)')
771    for _size in lollisize:
772        if not isinstance(_size, float | int) or _size <= 0:
773            raise ValueError('lollisize must be floats greater than zero')
774
775    # define colors
776    colors = config.CHAR_COLORS[aln.aln_type][color_scheme]
777    # get snps
778    snps = aln.get_snps()
779    # define where to plot (each ref type gets a separate line)
780    ref_y_positions, y_pos, detected_var = {}, 0, set()
781
782    # iterate over SNPs
783    for pos, snp_pos in snps.positions.items():
784        if snp_pos.ref not in ref_y_positions:
785            ref_y_positions[snp_pos.ref] = y_pos
786            y_pos += 1.1
787
788        for alt, (af, _) in snp_pos.alt.items():
789            x_pos = pos + aln.zoom[0] if aln.zoom is not None else pos
790            y_ref = ref_y_positions[snp_pos.ref]
791            ax.vlines(
792                x=x_pos,
793                ymin=y_ref,
794                ymax=y_ref + af,
795                color=colors[alt],
796                zorder=100,
797                linewidth=lollisize[0]
798            )
799            ax.plot(
800                x_pos,
801                y_ref + af,
802                color=colors[alt],
803                marker='o',
804                markersize=lollisize[1]
805            )
806            detected_var.add(alt)
807
808    # plot hlines
809    for y_char in ref_y_positions:
810        ax.hlines(
811            ref_y_positions[y_char],
812            xmin=aln.zoom[0] - 0.5 if aln.zoom is not None else -0.5,
813            xmax=aln.zoom[0] + aln.length + 0.5 if aln.zoom is not None else aln.length + 0.5,
814            color='black',
815            linestyle='-',
816            zorder=0,
817            linewidth=0.75
818        )
819    # create a custom legend
820    if show_legend:
821        custom_legend = [
822            ax.add_line(
823                plt.Line2D(
824                    [],
825                    [],
826                    color=colors[char],
827                    marker='o',
828                    linestyle='',
829                    markersize=5
830                )
831            ) for char in colors if char in detected_var
832        ]
833        ax.legend(
834            custom_legend,
835            [char for char in colors if char in detected_var],  # ensures correct sorting
836            loc='lower right',
837            title='variant',
838            bbox_to_anchor=bbox_to_anchor,
839            ncols=len(detected_var)/2 if aln.aln_type == 'AA' else len(detected_var),
840            frameon=False
841        )
842
843    # format axis
844    _format_x_axis(aln, ax, show_x_label, show_left=False)
845    ax.spines['left'].set_visible(False)
846    ax.set_yticks([ref_y_positions[x] for x in ref_y_positions])
847    ax.set_yticklabels(ref_y_positions.keys())
848    ax.set_ylim(0, y_pos)
849    ax.set_ylabel('reference')
850
851    return ax

Plots variants.

Parameters
  • aln: alignment MSA class or path
  • ax: matplotlib axes
  • lollisize: (stem_size, head_size)
  • color_scheme: color scheme for characters. see config.CHAR_COLORS for available options
  • show_x_label: whether to show the x-axis label
  • show_legend: whether to show the legend
  • bbox_to_anchor: bounding box coordinates for the legend - see: https: //matplotlib.org/stable/api/legend_api.html :return matplotlib axes
def orf_plot( aln: msaexplorer.explore.MSA | str, ax: matplotlib.axes._axes.Axes | None = None, min_length: int = 500, non_overlapping_orfs: bool = True, cmap: str = 'Blues', direction_marker_size: int | None = 5, show_x_label: bool = False, show_cbar: bool = False, cbar_fraction: float = 0.1) -> matplotlib.axes._axes.Axes:
921def orf_plot(aln: explore.MSA | str, ax: plt.Axes | None = None, min_length: int = 500, non_overlapping_orfs: bool = True,
922             cmap: str = 'Blues', direction_marker_size: int | None = 5, show_x_label: bool = False, show_cbar: bool = False,
923             cbar_fraction: float = 0.1) -> plt.Axes:
924    """
925    Plot conserved ORFs.
926    :param aln: alignment MSA class or path
927    :param ax: matplotlib axes
928    :param min_length: minimum length of orf
929    :param non_overlapping_orfs: whether to consider overlapping orfs
930    :param cmap: color mapping for % identity - see https://matplotlib.org/stable/users/explain/colors/colormaps.html
931    :param direction_marker_size: marker size for direction marker, not shown if marker_size == None
932    :param show_x_label: whether to show the x-axis label
933    :param show_cbar: whether to show the colorbar - see https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.colorbar.html
934    :param cbar_fraction: fraction of the original ax reserved for the colorbar
935    :return matplotlib axes
936    """
937
938    # normalize colorbar
939    cmap = ScalarMappable(norm=Normalize(0, 100), cmap=plt.get_cmap(cmap))
940    # validate input
941    aln, ax = _validate_input_parameters(aln, ax)
942
943    # get orfs --> first deepcopy and reset zoom that the orfs are also zoomed in (by default, the orfs are only
944    # searched within the zoomed region)
945    aln_temp = deepcopy(aln)
946    aln_temp.zoom = None
947    if non_overlapping_orfs:
948        orf_collection = aln_temp.get_non_overlapping_conserved_orfs(min_length=min_length)
949    else:
950        orf_collection = aln_temp.get_conserved_orfs(min_length=min_length)
951
952    # Normalize ORF dataclasses to the annotation dict shape consumed by plotting helpers.
953    annotation_dict = {}
954    for orf_id, orf_data in orf_collection.items():
955        annotation_dict[orf_id] = {
956            'location': orf_data.location,
957            'strand': orf_data.strand,
958            'conservation': orf_data.conservation,
959        }
960    # filter dict for zoom
961    if aln.zoom is not None:
962        annotation_dict = {
963            key: val
964            for key, val in annotation_dict.items()
965            if max(val['location'][0][0], aln.zoom[0]) <= min(val['location'][0][1], aln.zoom[1])
966        }
967    # add track for plotting
968    _add_track_positions(annotation_dict)
969    # plot
970    _plot_annotation(annotation_dict, ax, direction_marker_size=direction_marker_size, color=cmap)
971
972    # legend
973    if show_cbar:
974        cbar = plt.colorbar(cmap,ax=ax, location= 'top', orientation='horizontal', anchor=(1,0), shrink=0.2, pad=2/ax.bbox.height, fraction=cbar_fraction)
975        cbar.set_label('% identity')
976        cbar.set_ticks([0, 100])
977
978    # format axis
979    _format_x_axis(aln, ax, show_x_label, show_left=False)
980    ax.set_ylim(bottom=0.8)
981    ax.set_yticks([])
982    ax.set_yticklabels([])
983    ax.set_title('conserved orfs', loc='left')
984
985    return ax

Plot conserved ORFs.

Parameters
  • aln: alignment MSA class or path
  • ax: matplotlib axes
  • min_length: minimum length of orf
  • non_overlapping_orfs: whether to consider overlapping orfs
  • cmap: color mapping for % identity - see https: //matplotlib.org/stable/users/explain/colors/colormaps.html
  • direction_marker_size: marker size for direction marker, not shown if marker_size == None
  • show_x_label: whether to show the x-axis label
  • show_cbar: whether to show the colorbar - see https: //matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.colorbar.html
  • cbar_fraction: fraction of the original ax reserved for the colorbar :return matplotlib axes
def annotation_plot( aln: msaexplorer.explore.MSA | str, annotation: msaexplorer.explore.Annotation | str, feature_to_plot: str, ax: matplotlib.axes._axes.Axes | None = None, color: str = 'wheat', direction_marker_size: int | None = 5, show_x_label: bool = False) -> matplotlib.axes._axes.Axes:
 988def annotation_plot(aln: explore.MSA | str, annotation: explore.Annotation | str, feature_to_plot: str, ax: plt.Axes | None = None,
 989                    color: str = 'wheat', direction_marker_size: int | None = 5, show_x_label: bool = False) -> plt.Axes:
 990    """
 991    Plot annotations from bed, gff or gb files. Are automatically mapped to alignment.
 992    :param aln: alignment MSA class
 993    :param annotation: annotation class | path to annotation file
 994    :param ax: matplotlib axes
 995    :param feature_to_plot: potential feature to plot (not for bed files as it is parsed as one feature)
 996    :param color: color for the annotation
 997    :param direction_marker_size: marker size for direction marker, only relevant if show_direction is True
 998    :param show_x_label: whether to show the x-axis label
 999    :return matplotlib axes
1000    """
1001    # validate input
1002    aln, ax, annotation = _validate_input_parameters(aln, ax, annotation)
1003    _validate_color(color)
1004
1005    # ignore features to plot for bed files (here it is written into one feature)
1006    if annotation.ann_type == 'bed':
1007        annotation_dict = annotation.features['region']
1008        feature_to_plot = 'bed regions'
1009    else:
1010        # try to subset the annotation dict
1011        try:
1012            annotation_dict = annotation.features[feature_to_plot]
1013        except KeyError:
1014            raise KeyError(f'Feature {feature_to_plot} not found. Use annotation.features.keys() to see available features.')
1015
1016    # plotting and formating
1017    _add_track_positions(annotation_dict)
1018    _plot_annotation(annotation_dict, ax, direction_marker_size=direction_marker_size, color=color)
1019    _format_x_axis(aln, ax, show_x_label, show_left=False)
1020    ax.set_ylim(bottom=0.8)
1021    ax.set_yticks([])
1022    ax.set_yticklabels([])
1023    ax.set_title(f'{annotation.locus} ({feature_to_plot})', loc='left')
1024
1025    return ax

Plot annotations from bed, gff or gb files. Are automatically mapped to alignment.

Parameters
  • aln: alignment MSA class
  • annotation: annotation class | path to annotation file
  • ax: matplotlib axes
  • feature_to_plot: potential feature to plot (not for bed files as it is parsed as one feature)
  • color: color for the annotation
  • direction_marker_size: marker size for direction marker, only relevant if show_direction is True
  • show_x_label: whether to show the x-axis label :return matplotlib axes
def consensus_plot( aln: msaexplorer.explore.MSA | str, ax: matplotlib.axes._axes.Axes | None = None, threshold: float | None = None, use_ambig_nt: bool = False, color_scheme: str | None = 'standard', mask_color: str = 'dimgrey', ambiguity_color: str = 'black', basic_color: str = 'lightgrey', show_x_label: bool = False, show_name: bool = True, show_sequence: bool = False) -> matplotlib.axes._axes.Axes:
1098def consensus_plot(aln: explore.MSA | str, ax: plt.Axes | None = None, threshold: float | None = None, use_ambig_nt: bool = False,
1099                   color_scheme: str | None = 'standard', mask_color: str = 'dimgrey', ambiguity_color: str = 'black', basic_color: str = 'lightgrey',
1100                   show_x_label: bool = False, show_name: bool = True, show_sequence: bool = False) -> plt.Axes:
1101    """
1102    Plot a consensus sequence as a single-row colored sequence.
1103
1104    :param aln: alignment MSA class or path
1105    :param ax: matplotlib axes
1106    :param threshold: consensus threshold (0-1) or None for majority rule
1107    :param use_ambig_nt: whether to use ambiguous nucleotide codes when building consensus
1108    :param color_scheme: color scheme for characters. see config.CHAR_COLORS for available options or None
1109    :param mask_color: color used for mask characters (N/X)
1110    :param ambiguity_color: color used for ambiguous characters
1111    :param show_x_label: whether to show the x-axis label
1112    :param show_name: whether to show the 'consensus' label at y-axis
1113    :param show_sequence: whether to show the sequence at the y-axis
1114
1115    :return: matplotlib axes
1116    """
1117
1118    # validate input
1119    aln, ax = _validate_input_parameters(aln, ax)
1120    # validate colors
1121    for c in [mask_color, ambiguity_color]:
1122        _validate_color(c)
1123    # validate color scheme
1124    _validate_color_scheme(scheme=color_scheme, aln=aln)
1125
1126    # get consensus
1127    consensus = aln.get_consensus(threshold=threshold, use_ambig_nt=use_ambig_nt)
1128
1129    # prepare color mapping
1130    if color_scheme is not None:
1131        # get mapping from config
1132        char_colors = config.CHAR_COLORS[aln.aln_type][color_scheme]
1133    else:
1134        char_colors = None
1135
1136    # Build polygons and colors for a single PolyCollection
1137    polygons = []
1138    poly_colors = []
1139    text_positions = []
1140    text_chars = []
1141    text_colors = []
1142
1143    zoom_offset = aln.zoom[0] if aln.zoom is not None else 0
1144
1145    y_position = len(aln) - 0.4
1146
1147    for pos, char in enumerate(consensus):
1148        x = pos + zoom_offset
1149        # determine color
1150        if char_colors is not None and char in char_colors:
1151            color = char_colors[char]
1152        else:
1153            # ambiguous nucleotide/aminoacid codes
1154            if char in config.AMBIG_CHARS[aln.aln_type]:
1155                color = ambiguity_color
1156            elif char in ['N', 'X']:
1157                color = mask_color
1158            else:
1159                color = basic_color
1160
1161        rect = [
1162            (x - 0.5, y_position),
1163            (x + 0.5, y_position),
1164            (x + 0.5, y_position+0.8),
1165            (x - 0.5, y_position+0.8),
1166        ]
1167        polygons.append(rect)
1168        poly_colors.append(color)
1169
1170        if show_sequence:
1171            text_positions.append(x)
1172            text_chars.append(char)
1173            # determine readable text color
1174            text_colors.append(_get_contrast_text_color(to_rgba(color)))
1175
1176    # add single PolyCollection
1177    if polygons:
1178        pc = PolyCollection(polygons, facecolors=poly_colors, edgecolors=poly_colors, linewidths=0.5)
1179        ax.add_collection(pc)
1180
1181    # add texts if requested
1182    if show_sequence:
1183        for x, ch, tc in zip(text_positions, text_chars, text_colors):
1184            ax.text(x, y_position+0.4, ch, ha='center', va='center_baseline', c=tc)
1185
1186    # format axis
1187    _format_x_axis(aln=aln, ax=ax, show_x_label=show_x_label, show_left=False)
1188    ax.set_ylim(y_position-0.1, y_position + 0.9)
1189    if show_name:
1190        ax.yaxis.set_ticks_position('none')
1191        ax.set_yticks([y_position + 0.4])
1192        ax.set_yticklabels(['consensus'])
1193    else:
1194        ax.set_yticks([])
1195
1196    return ax

Plot a consensus sequence as a single-row colored sequence.

Parameters
  • aln: alignment MSA class or path
  • ax: matplotlib axes
  • threshold: consensus threshold (0-1) or None for majority rule
  • use_ambig_nt: whether to use ambiguous nucleotide codes when building consensus
  • color_scheme: color scheme for characters. see config.CHAR_COLORS for available options or None
  • mask_color: color used for mask characters (N/X)
  • ambiguity_color: color used for ambiguous characters
  • show_x_label: whether to show the x-axis label
  • show_name: whether to show the 'consensus' label at y-axis
  • show_sequence: whether to show the sequence at the y-axis
Returns

matplotlib axes

def simplot( aln: msaexplorer.explore.MSA | str, ref: str | None, ax: matplotlib.axes._axes.Axes | None = None, colors: str | list | None = None, window_size: int = 200, step_size: int = 20, distance_calculation: str = 'ged', line_width: int | float = 1, show_legend: bool = False, show_reference: bool = True, bbox_to_anchor: tuple[float | int, float | int] | list = (1, 1), show_x_label: bool = False) -> matplotlib.axes._axes.Axes:
1198def simplot(aln: explore.MSA | str, ref: str | None, ax: plt.Axes | None = None, colors: str | list | None = None,
1199            window_size: int = 200, step_size: int = 20, distance_calculation: str = 'ged', line_width: int | float = 1,
1200            show_legend: bool = False, show_reference: bool = True, bbox_to_anchor: tuple[float|int, float|int] | list= (1, 1),
1201            show_x_label: bool = False) -> plt.Axes:
1202    """
1203    Calculate binned pairwise distances (similarity) between sequences and a reference sequence
1204    over a stepwise sliding window in the zoomed region of the alignment. This is inspired by simplot that helps to identify
1205    recombination sites. For proper recombination analyses use simplot or simplot++ (https://github.com/Stephane-S/Simplot_PlusPlus).
1206    Each sequence is plotted as a line displaying the similarity to the reference. The reference sequence can be either
1207    set to None (compared to consensus) or to a specific reference id.
1208
1209    :param aln: alignment MSA class or path
1210    :param ax: matplotlib axes
1211    :param ref: reference sequence id or None. For 'None' all computations are compared to a majority consensus
1212    :param colors: color for each sequence. can be a single named color or a list of named colors or a plt.colormap or None (auto coloring)
1213    :param window_size: window size for sliding window
1214    :param step_size: step size for sliding window
1215    :param distance_calculation: distance calculation method. Supported: ghd (global hamming distance), ged (gap excluded distance) and for nt additionally: jc69(Jukes-Cantor 1969) and k2p (Kimura 2-Parameter / K80). For more information see: explore.MSA.calc_pairwise_identity_matrix()
1216    :param line_width: width of the plotted lines
1217    :param show_legend: whether to show the legend
1218    :param show_reference: whether to show the reference id in the right lower corner
1219    :param bbox_to_anchor: bounding box coordinates for the legend - see: https://matplotlib.org/stable/api/legend_api.html
1220    :param show_x_label: whether to show the x-axis label
1221
1222    :return: matplotlib axes
1223    """
1224
1225    # validate inputs
1226    aln, ax = _validate_input_parameters(aln=aln, ax=ax)
1227    if not isinstance(window_size, int) or window_size <= 0:
1228        raise ValueError('window_size has to be a positive integer')
1229    if window_size > aln.length:
1230        raise ValueError('window_size can not be larger than the (zoomed) alignment length')
1231    if not isinstance(step_size, int) or step_size <= 0:
1232        raise ValueError('step_size has to be a positive integer')
1233    if step_size > aln.length:
1234        raise ValueError('step_size can not be larger than the (zoomed) alignment length')
1235    if ref is not None and ref not in aln:
1236        raise ValueError(f'Reference {ref} not in alignment')
1237    if distance_calculation not in ['ghd', 'ged', 'jc69', 'k2p']:
1238        raise ValueError(f'Distance calculation method {distance_calculation} not supported. Supported: ghd, ged, jc69, k2p')
1239    if distance_calculation in ['jc69', 'k2p'] and aln.aln_type == 'AA':
1240        raise ValueError(f'Distance calculation method {distance_calculation} only supported for nucleotide alignments')
1241
1242    # get the sequence ids to plot
1243    sequence_ids = [seq_id for seq_id in aln if seq_id != ref]
1244
1245    # validate colors
1246    if colors is not None:
1247        # named color or colormap
1248        if isinstance(colors, str):
1249            # first try potential colormap
1250            try:
1251                cmap = plt.get_cmap(colors)
1252                cmap_colors = cmap(np.linspace(0, 1, len(sequence_ids)))
1253                color_map = {seq_id: color for seq_id, color in zip(sequence_ids, cmap_colors)}
1254            # single color
1255            except ValueError:
1256                _validate_color(colors)
1257                color_map = {seq_id: colors for seq_id in sequence_ids}
1258        # list of colors
1259        elif isinstance(colors, list):
1260            if len(colors) != len(sequence_ids):
1261                raise ValueError('colors list length has to match the number of plotted sequences')
1262            for color in colors:
1263                _validate_color(color)
1264            color_map = {seq_id: color for seq_id, color in zip(sequence_ids, colors)}
1265        else:
1266            raise ValueError('colors has to be either a single named color, a list of named colors, a plt colormap or None (auto)')
1267    else:
1268        color_map = None
1269
1270    # define region and window
1271    region_start = aln.zoom[0] if aln.zoom is not None else 0
1272    half_window_size = window_size // 2
1273
1274    # copy alignment and set reference
1275    aln_tmp = deepcopy(aln)
1276    aln_tmp.reference_id = ref
1277    # reset zoom (later the alignment dictionary will be replaced for a slice of the alignment)
1278    aln_tmp._zoom = None
1279
1280    # remember x positions and distances
1281    x_positions = []
1282    distance_traces = {seq_id: [] for seq_id in sequence_ids}
1283
1284    for point_to_plot in range(0, aln.length + 1, step_size):
1285        # define windows
1286        left_side = point_to_plot - half_window_size if point_to_plot - half_window_size > 0 else 0
1287        right_side = point_to_plot + half_window_size if point_to_plot + half_window_size < aln.length else aln.length
1288        # slice alignment and replace the original alignment
1289        aln_tmp._alignment = {
1290            seq_id: seq[left_side:right_side]
1291            for seq_id, seq in aln.alignment.items()
1292        }
1293        window_result = aln_tmp.calc_pairwise_distance_to_reference(distance_type=distance_calculation)
1294        value_map = {seq_id: value for seq_id, value in zip(window_result.sequence_ids, window_result.distances)}
1295
1296        x_positions.append(region_start + point_to_plot)
1297        for seq_id in sequence_ids:
1298            distance_traces[seq_id].append(value_map[seq_id])
1299
1300    for seq_id in sequence_ids:
1301        if color_map is not None:
1302            ax.plot(x_positions, distance_traces[seq_id],
1303                    color=color_map[seq_id],
1304                    linewidth=line_width, label=seq_id)
1305        else:
1306            ax.plot(x_positions, distance_traces[seq_id],
1307                    linewidth=line_width, label=seq_id)
1308    # Format axis
1309    ax.set_ylabel('similarity (%)')
1310    ax.set_ylim(0, 102)
1311    _format_x_axis(aln=aln, ax=ax, show_x_label=show_x_label, show_left=True)
1312    # add legend for all sequences on top
1313    if show_legend:
1314        leg1 = ax.legend(frameon=False, loc='lower right', bbox_to_anchor=bbox_to_anchor, ncols=3)
1315        ax.add_artist(leg1)
1316    # add legend for query
1317    if show_reference:
1318        ref_label = ref if ref is not None else 'consensus'
1319        ref_handle = plt.Line2D([0], [0], linewidth=0, label=f'reference: {ref_label}')
1320        leg2 = ax.legend(handles=[ref_handle], frameon=False, bbox_to_anchor=(1, 0), loc='lower right')
1321        ax.add_artist(leg2)
1322
1323    return ax

Calculate binned pairwise distances (similarity) between sequences and a reference sequence over a stepwise sliding window in the zoomed region of the alignment. This is inspired by simplot that helps to identify recombination sites. For proper recombination analyses use simplot or simplot++ (https://github.com/Stephane-S/Simplot_PlusPlus). Each sequence is plotted as a line displaying the similarity to the reference. The reference sequence can be either set to None (compared to consensus) or to a specific reference id.

Parameters
  • aln: alignment MSA class or path
  • ax: matplotlib axes
  • ref: reference sequence id or None. For 'None' all computations are compared to a majority consensus
  • colors: color for each sequence. can be a single named color or a list of named colors or a plt.colormap or None (auto coloring)
  • window_size: window size for sliding window
  • step_size: step size for sliding window
  • distance_calculation: distance calculation method. Supported: ghd (global hamming distance), ged (gap excluded distance) and for nt additionally: jc69(Jukes-Cantor 1969) and k2p (Kimura 2-Parameter / K80). For more information see: explore.MSA.calc_pairwise_identity_matrix()
  • line_width: width of the plotted lines
  • show_legend: whether to show the legend
  • show_reference: whether to show the reference id in the right lower corner
  • bbox_to_anchor: bounding box coordinates for the legend - see: https: //matplotlib.org/stable/api/legend_api.html
  • show_x_label: whether to show the x-axis label
Returns

matplotlib axes