msaexplorer.draw

The draw module

The draw module lets you draw alignments and statistic plots such as SNPs, ORFs, entropy and much more. For each plot a matplotlib axes has to be passed to the plotting function.

Importantly some of the plotting features can only be accessed for nucleotide alignments but not for amino acid alignments. The functions will raise the appropriate exception in such a case.

Functions

   1r"""
   2# The draw module
   3
   4The draw module lets you draw alignments and statistic plots such as SNPs, ORFs, entropy and much more. For each plot a
   5`matplotlib axes` has to be passed to the plotting function.
   6
   7Importantly some of the plotting features can only be accessed for nucleotide alignments but not for amino acid alignments.
   8The functions will raise the appropriate exception in such a case.
   9
  10## Functions
  11
  12"""
  13import pathlib
  14# built-in
  15from itertools import chain
  16from typing import Callable, Dict
  17from copy import deepcopy
  18import os
  19
  20import matplotlib
  21from numpy import ndarray
  22
  23# MSAexplorer
  24from msaexplorer import explore, config
  25
  26# libs
  27import numpy as np
  28import matplotlib.pyplot as plt
  29import matplotlib.patches as patches
  30from matplotlib.cm import ScalarMappable
  31from matplotlib.colors import is_color_like, Normalize, to_rgba, LinearSegmentedColormap
  32from matplotlib.collections import PatchCollection, PolyCollection
  33from matplotlib.text import TextPath
  34from matplotlib.patches import PathPatch
  35from matplotlib.font_manager import FontProperties
  36from matplotlib.transforms import Affine2D
  37
  38
  39def _validate_input_parameters(aln: explore.MSA | str, ax: plt.Axes, annotation: explore.Annotation | str | None = None) \
  40        -> tuple[explore.MSA, plt.Axes, explore.Annotation] | tuple[explore.MSA, plt.Axes]:
  41    """
  42    Validate MSA class and axis.
  43    """
  44    # check if alignment is path
  45    if type(aln) is str:
  46        if os.path.exists(aln):
  47            aln = explore.MSA(aln)
  48        else:
  49            raise FileNotFoundError(f'File {aln} does not exist')
  50    # check if alignment is correct type
  51    if not isinstance(aln, explore.MSA):
  52        raise ValueError('alignment has to be an MSA class. use explore.MSA() to read in alignment')
  53    # check if a axis was created
  54    # if ax not provided generate one from scratch
  55    if ax is None:
  56        ax = plt.gca()
  57    elif not isinstance(ax, plt.Axes):
  58            raise ValueError('ax has to be an matplotlib axis')
  59    # check if annotation is correct type
  60    if annotation is not None:
  61        if type(annotation) is str:
  62            if os.path.exists(annotation):
  63                # reset zoom so the annotation is correctly parsed
  64                msa_temp = deepcopy(aln)
  65                msa_temp.zoom = None
  66                annotation = explore.Annotation(msa_temp, annotation)
  67            else:
  68                raise FileNotFoundError()
  69        if not isinstance(annotation, explore.Annotation):
  70            raise ValueError('annotation has to be an annotation class. use explore.Annotation() to read in annotation')
  71
  72    if annotation is None:
  73        return aln, ax
  74    else:
  75        return aln, ax, annotation
  76
  77
  78def _validate_color(c):
  79    """
  80    validate color and raise error
  81    """
  82    if not is_color_like(c):
  83        raise ValueError(f'{c} is not a color')
  84
  85
  86def _validate_color_scheme(scheme: str | None, aln: explore.MSA):
  87    """
  88    validates colorscheme
  89    """
  90    if scheme is not None:
  91        if scheme not in config.CHAR_COLORS[aln.aln_type].keys():
  92            raise ValueError(f'Scheme not supported. Supported: {config.CHAR_COLORS[aln.aln_type].keys()}')
  93
  94
  95def _create_color_dictionary(aln: explore.MSA, color_scheme: str, identical_char_color: str | None = None, different_char_color: str | None = None, mask_color: str | None = None, ambiguity_color: str | None = None):
  96    """
  97    create the colorscheme dictionary -> this functions adds the respective colors to a dictionary. Some have fixed values and colors of defined
  98    schemes are always the idx + 1
  99    """
 100    # basic color mapping
 101    aln_colors = {}
 102
 103    if identical_char_color is not None:
 104        aln_colors[0] = {'type': 'identical', 'color': identical_char_color}
 105    if different_char_color is not None:
 106        aln_colors[-1] = {'type': 'different', 'color': different_char_color}
 107    if mask_color is not None:
 108        aln_colors[-2] = {'type': 'mask', 'color': mask_color}
 109    if ambiguity_color is not None:
 110        aln_colors[-3] = {'type': 'ambiguity', 'color': ambiguity_color}
 111
 112    # use the standard setting for the index (same as in aln.calc_identity_alignment)
 113    # and map the corresponding color scheme to it
 114    if color_scheme is not None:
 115        for idx, char in enumerate(config.CHAR_COLORS[aln.aln_type]['standard']):
 116            aln_colors[idx + 1] = {'type': char, 'color': config.CHAR_COLORS[aln.aln_type][color_scheme][char]}
 117
 118    return aln_colors
 119
 120
 121def _format_x_axis(aln: explore.MSA, ax: plt.Axes, show_x_label: bool, show_left: bool):
 122    """
 123    General x axis formatting.
 124    """
 125    ax.set_xlim(
 126        (aln.zoom[0] - 0.5, aln.zoom[0] + aln.length - 0.5) if aln.zoom is not None else (-0.5, aln.length - 0.5)
 127    )
 128    ax.spines['top'].set_visible(False)
 129    ax.spines['right'].set_visible(False)
 130    if show_x_label:
 131        ax.set_xlabel('alignment position')
 132    if not show_left:
 133        ax.spines['left'].set_visible(False)
 134
 135
 136def _seq_names(aln: explore.MSA, ax: plt.Axes, custom_seq_names: tuple, show_seq_names: bool, include_consensus: bool = False):
 137    """
 138    Validate custom names and set show names to True. Format axis accordingly.
 139    """
 140    if custom_seq_names:
 141        show_seq_names = True
 142        if not isinstance(custom_seq_names, tuple):
 143            raise ValueError('configure your custom names list: custom_names=(name1, name2...)')
 144        if len(custom_seq_names) != len(aln.alignment.keys()):
 145            raise ValueError('length of sequences not equal to number of custom names')
 146    if show_seq_names:
 147        ax.yaxis.set_ticks_position('none')
 148        ax.set_yticks(np.arange(len(aln.alignment)))
 149        if custom_seq_names:
 150            names = custom_seq_names[::-1]
 151        else:
 152            names = [x.split(' ')[0] for x in list(aln.alignment.keys())[::-1]]
 153        if include_consensus:
 154            names = names + ['consensus']
 155            y_ticks = np.arange(len(aln.alignment) + 1)
 156        else:
 157            y_ticks = np.arange(len(aln.alignment))
 158        ax.set_yticks(y_ticks)
 159        ax.set_yticklabels(names)
 160    else:
 161        ax.set_yticks([])
 162
 163
 164def _create_legend(color_scheme: str, aln_colors: dict, aln: explore.MSA, detected_identity_values: set, ax: plt.Axes, bbox_to_anchor: tuple | list):
 165    """
 166    create the legend for the alignment and identity alignment plot
 167    """
 168    if color_scheme is not None and color_scheme != 'standard':
 169        for x in aln_colors:
 170            for group in config.CHAR_GROUPS[aln.aln_type][color_scheme]:
 171                if aln_colors[x]['type'] in config.CHAR_GROUPS[aln.aln_type][color_scheme][group]:
 172                    aln_colors[x]['type'] = group
 173                    break
 174    # create it
 175    handels, labels, detected_groups = [], [], set()
 176    for x in aln_colors:
 177        if x in detected_identity_values and aln_colors[x]['type'] not in detected_groups:
 178            handels.append(
 179                ax.add_line(
 180                    plt.Line2D(
 181                        [],
 182                        [],
 183                        color=aln_colors[x]['color'] if color_scheme != 'hydrophobicity' or x == 0 else
 184                        config.CHAR_COLORS[aln.aln_type]['hydrophobicity'][
 185                            config.CHAR_GROUPS[aln.aln_type]['hydrophobicity'][aln_colors[x]['type']][0]
 186                        ],
 187                        marker='s',
 188                        markeredgecolor='grey',
 189                        linestyle='',
 190                        markersize=10))
 191            )
 192            labels.append(aln_colors[x]['type'])
 193            detected_groups.add(aln_colors[x]['type'])
 194
 195    # ncols
 196    if color_scheme is None or aln.aln_type != 'AA':
 197        ncols = len(detected_identity_values)
 198    elif color_scheme == 'standard':
 199        ncols = (len(detected_identity_values) + 1) / 2
 200    else:
 201        ncols = (len(detected_groups) + 1) / 2
 202
 203    # plot it
 204    ax.legend(
 205        handels,
 206        labels,
 207        loc='lower right',
 208        bbox_to_anchor=bbox_to_anchor,
 209        ncols=ncols,
 210        frameon=False
 211    )
 212
 213
 214def _get_contrast_text_color(rgba_color):
 215        """
 216        compute the brightness of a color
 217        """
 218        r, g, b, a = rgba_color
 219        brightness = (r * 299 + g * 587 + b * 114) / 1000
 220
 221        return 'white' if brightness < 0.5 else 'black'
 222
 223
 224def _create_alignment(aln: explore.MSA, ax: plt.Axes, matrix: ndarray, aln_colors: dict | ScalarMappable, fancy_gaps: bool, create_identity_patch: bool,
 225                      show_gaps: bool, show_different_sequence: bool, show_sequence_all: bool, reference_color: str | None, values_to_plot: list,
 226                      identical_value: int | float = 0):
 227    """
 228    create the polygon patch collection for an alignment
 229    """
 230
 231    # helper functions
 232    def _create_identity_patch(row, aln: explore.MSA, col: list, zoom: tuple[int, int], y_position: float | int,
 233                               reference_color: str | None, seq_name: str, identity_color: str | ndarray,
 234                               show_gaps: bool):
 235        """
 236        Creates the initial patch and adds it to the collection.
 237        """
 238
 239        color = reference_color if seq_name == aln.reference_id and reference_color is not None else identity_color
 240
 241        if show_gaps:
 242            # plot a rectangle for parts that do not have gaps
 243            for stretch in _find_stretches(row, True):
 244                col.append(
 245                    patches.Rectangle(
 246                        (stretch[0] + zoom[0] - 0.5, y_position), stretch[1] - stretch[0] + 1, 0.8,
 247                        facecolor=color,
 248                        edgecolor=color,
 249                        linewidth=0.5
 250                    )
 251                )
 252        # just plot a rectangle
 253        else:
 254            col.append(
 255                patches.Rectangle(
 256                    (zoom[0] - 0.5, y_position), zoom[1] - zoom[0], 0.8,
 257                    facecolor=color,
 258                    edgecolor=color,
 259                    linewidth=0.5
 260                )
 261            )
 262
 263        return col
 264
 265    def _find_stretches(row, non_nan_only=False) -> list[tuple[int, int, int]] | list[tuple[int, int]]:
 266        """
 267        Finds consecutive stretches of values in an array, with an option to exclude NaN stretches and return start, end, value at start
 268        """
 269        if row.size == 0:
 270            return []
 271
 272        if non_nan_only:
 273            # Create a boolean mask for non-NaN values
 274            non_nan_mask = ~np.isnan(row)
 275            # Find changes in the mask
 276            changes = np.diff(non_nan_mask.astype(int)) != 0
 277            change_idx = np.nonzero(changes)[0]
 278            starts = np.concatenate(([0], change_idx + 1))
 279            ends = np.concatenate((change_idx, [len(row) - 1]))
 280
 281            # Return only stretches that start with non-NaN values
 282            return [(start, end) for start, end in zip(starts, ends) if non_nan_mask[start]]
 283
 284        else:
 285            # Find change points: where adjacent cells differ.
 286            changes = np.diff(row) != 0
 287            change_idx = np.nonzero(changes)[0]
 288            starts = np.concatenate(([0], change_idx + 1))
 289            ends = np.concatenate((change_idx, [len(row) - 1]))
 290
 291            return [(start, end, row[start]) for start, end in zip(starts, ends)]
 292
 293    def _create_polygons(stretches: list, values_to_plot: list | ndarray, zoom: tuple, y_position: int | float,
 294                         polygons: list, aln_colors: dict | ScalarMappable, polygon_colors: list,
 295                         detected_identity_values: set | None = None):
 296        """
 297        create the individual polygons for the heatmap (do not plot each cell but pre-compute if cells are the same and adjacent to each other)
 298        """
 299
 300        for start, end, value in stretches:
 301            # check if this is a value for which we might want to create a polygon
 302            # e.g. we might not want to create a polygon for identical values
 303            if value not in values_to_plot:
 304                continue
 305            # add values to this set - > this is important for correct legend creation
 306            if detected_identity_values is not None:
 307                detected_identity_values.add(value)
 308            width = end + 1 - start
 309            # Calculate x coordinates adjusted for zoom and centering
 310            x0 = start + zoom[0] - 0.5
 311            x1 = x0 + width
 312            # Define the rectangle corners
 313            rect_coords = [
 314                (x0, y_position),
 315                (x1, y_position),
 316                (x1, y_position + 0.8),
 317                (x0, y_position + 0.8),
 318                (x0, y_position)
 319            ]
 320            polygons.append(rect_coords)
 321            if type(aln_colors) != ScalarMappable:
 322                polygon_colors.append(aln_colors[value]['color'])
 323            else:
 324                polygon_colors.append(aln_colors.to_rgba(value))
 325
 326        return detected_identity_values, polygons, polygon_colors
 327
 328    def _plot_sequence_text(aln: explore.MSA, seq_name: str, ref_name: str | None, always_text: bool, values: ndarray,
 329                            matrix: ndarray, ax: plt.Axes, zoom: tuple, y_position: int | float, value_to_skip: int | None,
 330                            ref_color: str, show_gaps: bool, aln_colors: dict | ScalarMappable = None):
 331        """
 332        Plot sequence text - however this will be done even if there is not enough space.
 333        """
 334        x_text = 0
 335        if seq_name == ref_name and ref_color is not None:
 336            different_cols = np.any((matrix != value_to_skip) & ~np.isnan(matrix), axis=0)
 337        else:
 338            different_cols = [False] * aln.length
 339
 340        for idx, (character, value) in enumerate(zip(aln.alignment[seq_name], values)):
 341            if value != value_to_skip and character != '-' or seq_name == ref_name and character != '-' or character == '-' and not show_gaps or always_text and character != '-':
 342                if seq_name == ref_name and ref_color is not None:
 343                    text_color = _get_contrast_text_color(to_rgba(ref_color))
 344                elif type(aln_colors) is ScalarMappable:
 345                    text_color = _get_contrast_text_color(aln_colors.to_rgba(value))
 346                else:
 347                    text_color = _get_contrast_text_color(to_rgba(aln_colors[value]['color']))
 348
 349                ax.text(
 350                    x=x_text + zoom[0] if zoom is not None else x_text,
 351                    y=y_position + 0.4,
 352                    s=character,
 353                    fontweight='bold' if different_cols[idx] else 'normal',
 354                    ha='center',
 355                    va='center_baseline',
 356                    c=text_color if value != value_to_skip or seq_name == ref_name and ref_color is not None else 'dimgrey'
 357                )
 358            x_text += 1
 359
 360    # List to store polygons
 361    detected_identity_values = {0}
 362    polygons, polygon_colors, patch_list = [], [], []
 363    # determine zoom
 364    zoom = (0, aln.length) if aln.zoom is None else aln.zoom
 365    for i, seq_name in enumerate(aln.alignment):
 366        # define initial y position
 367        y_position = len(aln.alignment) - i - 1.4
 368        # now plot relevant stuff for the current row
 369        row = matrix[i]
 370        # plot a line below everything for fancy gaps
 371        if fancy_gaps:
 372            ax.hlines(
 373                y_position + 0.4,
 374                xmin=zoom[0] - 0.5,
 375                xmax=zoom[1] + 0.5,
 376                color='black',
 377                linestyle='-',
 378                zorder=0,
 379                linewidth=0.75
 380            )
 381        # plot the basic shape per sequence with gaps
 382        if create_identity_patch:
 383            if type(aln_colors) == dict:
 384                identity_color = aln_colors[0]['color']
 385            else:
 386                identity_color = aln_colors.to_rgba(1)  # for similarity alignment
 387            patch_list = _create_identity_patch(row, aln, patch_list, zoom, y_position, reference_color, seq_name, identity_color, show_gaps)
 388        # find consecutive stretches
 389        stretches = _find_stretches(row)
 390        # create polygons per stretch
 391        detected_identity_values, polygons, polygon_colors = _create_polygons(
 392            stretches=stretches, values_to_plot=values_to_plot, zoom=zoom, y_position=y_position, polygons=polygons,
 393            aln_colors=aln_colors, polygon_colors=polygon_colors, detected_identity_values=detected_identity_values
 394        )
 395        # add sequence text
 396        if show_different_sequence or show_sequence_all:
 397            _plot_sequence_text(
 398                aln=aln, seq_name=list(aln.alignment.keys())[i], ref_name=aln.reference_id, always_text=show_sequence_all,
 399                values=matrix[i], matrix=matrix, ax=ax, zoom=zoom, y_position=y_position, value_to_skip=identical_value, ref_color=reference_color,
 400                show_gaps=show_gaps, aln_colors=aln_colors
 401            )
 402    # Create the LineCollection: each segment is drawn in a single call.
 403    ax.add_collection(PatchCollection(patch_list, match_original=True, linewidths='none', joinstyle='miter', capstyle='butt'))
 404    # NOTE: linewidth are important so in large overview plot the differences are still visible
 405    ax.add_collection(PolyCollection(polygons, facecolors=polygon_colors, linewidths=0.5, edgecolors=polygon_colors))
 406
 407
 408    return detected_identity_values
 409
 410
 411def alignment(aln: explore.MSA | str, ax: plt.Axes | None = None, show_sequence_all: bool = False, show_seq_names: bool = False,
 412              custom_seq_names: tuple | list = (), mask_color: str = 'dimgrey', ambiguity_color: str = 'black', basic_color: str = 'lightgrey',
 413              show_mask:bool = True, fancy_gaps:bool = False, show_ambiguities: bool = False, color_scheme: str | None = 'standard',
 414              show_x_label: bool = True, show_legend: bool = False, bbox_to_anchor: tuple[float|int, float|int] | list= (1, 1),
 415              show_consensus: bool = False) -> plt.Axes:
 416    """
 417
 418    Plot an alignment with each character colored as defined in the scheme. This is computationally more intensive as 
 419    the identity alignments and similarity alignment function as each square for each character is individually plotted.
 420
 421    :param aln: alignment MSA class or path
 422    :param ax: matplotlib axes
 423    :param show_seq_names: whether to show seq names
 424    :param show_sequence_all: whether to show all sequences - zoom in to avoid plotting issues
 425    :param custom_seq_names: custom seq names
 426    :param mask_color: color for masked nucleotides/aminoacids
 427    :param ambiguity_color: color for ambiguous nucleotides/aminoacids
 428    :param basic_color: color that will be used for all normal chars if the colorscheme is None
 429    :param show_mask: whether to show N or X chars otherwise it will be shown as match or mismatch
 430    :param fancy_gaps: show gaps with a small black bar
 431    :param show_ambiguities: whether to show non-N ambiguities -> only relevant for RNA/DNA sequences
 432    :param color_scheme: color mismatching chars with their unique color. Options for DNA/RNA are: standard, purine_pyrimidine, strong_weak; and for AS: standard, clustal, zappo, hydrophobicity. Will overwrite different_char_color.
 433    :param show_x_label: whether to show x label
 434    :param show_legend: whether to show the legend
 435    :param bbox_to_anchor: bounding box coordinates for the legend - see: https://matplotlib.org/stable/api/legend_api.html
 436    :param show_consensus: whether to show the majority consensus sequence (standard-color scheme)
 437    :return  matplotlib axis
 438    """
 439    # Validate aln, ax inputs
 440    aln, ax = _validate_input_parameters(aln=aln, ax=ax)
 441    # Validate colors
 442    for c in [mask_color, ambiguity_color, basic_color]:
 443        _validate_color(c)
 444    # Validate color scheme
 445    _validate_color_scheme(scheme=color_scheme, aln=aln)
 446    # create color mapping
 447    aln_colors = _create_color_dictionary(
 448        aln=aln, color_scheme=color_scheme, identical_char_color=basic_color,
 449        different_char_color=None, mask_color=mask_color, ambiguity_color=ambiguity_color
 450    )
 451    # Compute identity alignment
 452    numerical_alignment = aln.calc_numerical_alignment(encode_ambiguities=show_ambiguities, encode_mask=show_mask)
 453    if color_scheme is None:
 454        numerical_alignment[np.where(numerical_alignment > 0)] = 0
 455    # create the alignment
 456    detected_identity_values = _create_alignment(
 457        aln=aln, ax=ax, matrix=numerical_alignment, fancy_gaps=fancy_gaps, create_identity_patch=True if color_scheme is None else False, show_gaps=True,
 458        show_different_sequence=False, show_sequence_all=show_sequence_all, reference_color=None, aln_colors=aln_colors,
 459        values_to_plot = list(aln_colors.keys())
 460    )
 461    # custom legend
 462    if show_legend:
 463        aln_colors.pop(0)  # otherwise legend is wrong (here there is no check if a position is identical or not)
 464        _create_legend(
 465            color_scheme=color_scheme, aln_colors=aln_colors, aln=aln,
 466            detected_identity_values=detected_identity_values,
 467            ax=ax, bbox_to_anchor=bbox_to_anchor
 468        )
 469    if show_consensus:
 470        consensus_plot(aln=aln, ax=ax, show_x_label=show_x_label, show_name=False, show_sequence=show_sequence_all,
 471            color_scheme='standard', basic_color=basic_color, mask_color=mask_color, ambiguity_color=ambiguity_color
 472        )
 473        ax.set_ylim(-0.5, len(aln.alignment) + 1)
 474    else:
 475        ax.set_ylim(-0.5, len(aln.alignment))
 476
 477    _seq_names(aln=aln, ax=ax, custom_seq_names=custom_seq_names, show_seq_names=show_seq_names, include_consensus=show_consensus)
 478    # configure axis
 479    _format_x_axis(aln=aln, ax=ax, show_x_label=show_x_label, show_left=False)
 480
 481    return ax
 482
 483
 484
 485def identity_alignment(aln: explore.MSA | str, ax: plt.Axes | None = None, show_title: bool = True, show_identity_sequence: bool = False,
 486                       show_sequence_all: bool = False, show_seq_names: bool = False, custom_seq_names: tuple | list = (),
 487                       reference_color: str = 'lightsteelblue', basic_color: str = 'lightgrey', different_char_color: str = 'peru',
 488                       mask_color: str = 'dimgrey', ambiguity_color: str = 'black', show_mask:bool = True, show_gaps:bool = True,
 489                       fancy_gaps:bool = False, show_mismatches: bool = True, show_ambiguities: bool = False,
 490                       color_scheme: str | None = None, show_x_label: bool = True, show_legend: bool = False,
 491                       bbox_to_anchor: tuple[float|int, float|int] | list = (1, 1), show_consensus: bool = False) -> plt.Axes:
 492    """
 493    Generates an identity alignment plot.
 494    :param aln: alignment MSA class or path
 495    :param ax: matplotlib axes
 496    :param show_title: whether to show title
 497    :param show_seq_names: whether to show seq names
 498    :param show_identity_sequence: whether to show sequence for only differences and reference - zoom in to avoid plotting issues
 499    :param show_sequence_all: whether to show all sequences - zoom in to avoid plotting issues
 500    :param custom_seq_names: custom seq names
 501    :param reference_color: color of reference sequence
 502    :param basic_color: color for identical nucleotides/aminoacids
 503    :param different_char_color: color for different nucleotides/aminoacids
 504    :param mask_color: color for masked nucleotides/aminoacids
 505    :param ambiguity_color: color for ambiguous nucleotides/aminoacids
 506    :param show_mask: whether to show N or X chars otherwise it will be shown as match or mismatch
 507    :param show_gaps: whether to show gaps otherwise it will be shown as match or mismatch
 508    :param fancy_gaps: show gaps with a small black bar
 509    :param show_mismatches: whether to show mismatches otherwise it will be shown as match
 510    :param show_ambiguities: whether to show non-N ambiguities -> only relevant for RNA/DNA sequences
 511    :param color_scheme: color mismatching chars with their unique color. Options for DNA/RNA are: standard, purine_pyrimidine, strong_weak; and for AS: standard, clustal, zappo, hydrophobicity. Will overwrite different_char_color.
 512    :param show_x_label: whether to show x label
 513    :param show_legend: whether to show the legend
 514    :param bbox_to_anchor: bounding box coordinates for the legend - see: https://matplotlib.org/stable/api/legend_api.html
 515    :param show_consensus: whether to show the majority consensus sequence (standard-color scheme)
 516
 517    :return  matplotlib axis
 518    """
 519
 520    # Both options for gaps work hand in hand
 521    if fancy_gaps:
 522        show_gaps = True
 523    # Validate aln, ax inputs
 524    aln, ax = _validate_input_parameters(aln=aln, ax=ax)
 525    # Validate colors
 526    for c in [reference_color, basic_color, different_char_color, mask_color, ambiguity_color]:
 527        _validate_color(c)
 528    # Validate color scheme
 529    _validate_color_scheme(scheme=color_scheme, aln=aln)
 530    # create color mapping
 531    aln_colors = _create_color_dictionary(
 532        aln=aln, color_scheme=color_scheme, identical_char_color=basic_color, different_char_color=different_char_color,
 533        mask_color=mask_color, ambiguity_color=ambiguity_color
 534    )
 535    # Compute identity alignment
 536    identity_aln = aln.calc_identity_alignment(
 537        encode_mask=show_mask, encode_gaps=show_gaps, encode_mismatches=show_mismatches,encode_ambiguities=show_ambiguities,
 538        encode_each_mismatch_char=True if color_scheme is not None else False
 539    )
 540    # create the alignment
 541    detected_identity_values = _create_alignment(
 542        aln=aln, ax=ax, matrix=identity_aln, fancy_gaps=fancy_gaps, create_identity_patch=True, show_gaps=show_gaps,
 543        show_different_sequence=show_identity_sequence, show_sequence_all=show_sequence_all, reference_color=reference_color,
 544        aln_colors=aln_colors, values_to_plot=list(aln_colors.keys())[1:]
 545    )
 546    # custom legend
 547    if show_legend:
 548        _create_legend(
 549            color_scheme=color_scheme, aln_colors=aln_colors, aln=aln, detected_identity_values=detected_identity_values,
 550            ax=ax, bbox_to_anchor=bbox_to_anchor
 551        )
 552    _seq_names(aln=aln, ax=ax, custom_seq_names=custom_seq_names, show_seq_names=show_seq_names)
 553    # configure axis
 554    if show_consensus:
 555        consensus_plot(aln=aln, ax=ax, show_x_label=show_x_label, show_name=False, show_sequence=any([show_sequence_all, show_identity_sequence]),
 556                       color_scheme='standard', basic_color=basic_color, mask_color=mask_color,
 557                       ambiguity_color=ambiguity_color
 558                       )
 559        ax.set_ylim(-0.5, len(aln.alignment) + 1)
 560    else:
 561        ax.set_ylim(-0.5, len(aln.alignment))
 562
 563    _seq_names(aln=aln, ax=ax, custom_seq_names=custom_seq_names, show_seq_names=show_seq_names,
 564               include_consensus=show_consensus)
 565    if show_title:
 566        ax.set_title('identity', loc='left')
 567    _format_x_axis(aln=aln, ax=ax, show_x_label=show_x_label, show_left=False)
 568
 569    return ax
 570
 571
 572def similarity_alignment(aln: explore.MSA | str, ax: plt.Axes | None = None, matrix_type: str | None = None, show_title: bool = True,
 573                         show_similarity_sequence: bool = False, show_sequence_all: bool = False, show_seq_names: bool = False,
 574                         custom_seq_names: tuple | list = (), reference_color: str = 'lightsteelblue', different_char_color: str = 'peru', basic_color: str = 'lightgrey',
 575                         show_gaps:bool = True, fancy_gaps:bool = False, show_x_label: bool = True, show_cbar: bool = False,
 576                         cbar_fraction: float = 0.1, show_consensus: bool = False)  -> plt.Axes:
 577    """
 578    Generates a similarity alignment plot. Importantly the similarity values are normalized!
 579    :param aln: alignment MSA class or path
 580    :param ax: matplotlib axes
 581    :param matrix_type: substitution matrix - see config.SUBS_MATRICES, standard: NT - TRANS, AA - BLOSUM65
 582    :param show_title: whether to show title
 583    :param show_similarity_sequence: whether to show sequence only for differences and reference - zoom in to avoid plotting issues
 584    :param show_sequence_all: whether to show all sequences - zoom in to avoid plotting issues
 585    :param show_seq_names: whether to show seq names
 586    :param custom_seq_names: custom seq names
 587    :param reference_color: color of reference sequence
 588    :param different_char_color: color for the lowest similarity
 589    :param basic_color: basic color for the highest similarity
 590    :param show_gaps: whether to show gaps otherwise it will be ignored
 591    :param fancy_gaps: show gaps with a small black bar
 592    :param show_x_label: whether to show x label
 593    :param show_cbar: whether to show the legend - see https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.colorbar.html
 594    :param cbar_fraction: fraction of the original ax reserved for the legend
 595    :param show_consensus: whether to show the majority consensus sequence (standard color scheme, no specical handling for special characters)
 596    :return  matplotlib axis
 597    """
 598    # Both options for gaps work hand in hand
 599    if fancy_gaps:
 600        show_gaps = True
 601    # Validate aln, ax inputs
 602    aln, ax = _validate_input_parameters(aln=aln, ax=ax)
 603    # Validate colors
 604    for c in [reference_color, different_char_color, basic_color]:
 605        _validate_color(c)
 606    # Compute similarity alignment
 607    similarity_aln = aln.calc_similarity_alignment(matrix_type=matrix_type)  # use normalized values here
 608    similarity_aln = similarity_aln.round(2)  # round data for good color mapping
 609    # create cmap
 610    cmap = LinearSegmentedColormap.from_list(
 611        "extended",
 612        [
 613            (0.0, different_char_color),
 614            (1.0, basic_color)
 615        ],
 616    )
 617    cmap = ScalarMappable(norm=Normalize(vmin=0, vmax=1), cmap=cmap)
 618    # create similarity values
 619    similarity_values = np.arange(start=0, stop=1, step=0.01)
 620    # round it to be absolutely sure that values match with rounded sim alignment
 621    similarity_values = list(similarity_values.round(2))
 622    # create the alignment
 623    _create_alignment(
 624        aln=aln, ax=ax, matrix=similarity_aln, fancy_gaps=fancy_gaps, create_identity_patch=True, show_gaps=show_gaps,
 625        show_different_sequence=show_similarity_sequence, show_sequence_all=show_sequence_all, reference_color=reference_color,
 626        aln_colors=cmap, values_to_plot=similarity_values, identical_value=1
 627    )
 628    # legend
 629    if show_cbar:
 630        cbar = plt.colorbar(cmap, ax=ax, location= 'top', anchor=(1,0), shrink=0.2, pad=2/ax.bbox.height, fraction=cbar_fraction)
 631        cbar.set_ticks([0, 1])
 632        cbar.set_ticklabels(['low', 'high'])
 633
 634    # format seq names
 635    _seq_names(aln, ax, custom_seq_names, show_seq_names)
 636
 637    # configure axis
 638    if show_consensus:
 639        consensus_plot(aln=aln, ax=ax, show_x_label=show_x_label, show_name=False, show_sequence=any([show_sequence_all, show_similarity_sequence]),
 640                       color_scheme='standard', basic_color=basic_color)
 641        ax.set_ylim(-0.5, len(aln.alignment) + 1)
 642    else:
 643        ax.set_ylim(-0.5, len(aln.alignment))
 644
 645    _seq_names(aln=aln, ax=ax, custom_seq_names=custom_seq_names, show_seq_names=show_seq_names,
 646               include_consensus=show_consensus)
 647    if show_title:
 648        ax.set_title('similarity', loc='left')
 649    _format_x_axis(aln, ax, show_x_label, show_left=False)
 650
 651    return ax
 652
 653
 654def _moving_average(arr: ndarray, window_size: int, zoom: tuple | None, aln_length: int) -> tuple[ndarray, ndarray]:
 655    """
 656    Calculate the moving average of an array.
 657    :param arr: array with values
 658    :param window_size: size of the moving average
 659    :param zoom: zoom of the alignment
 660    :param aln_length: length of the alignment
 661    :return: new array with moving average
 662    """
 663    if window_size > 1:
 664        i = 0
 665        moving_averages, plotting_idx = [], []
 666        while i < len(arr) + 1:
 667            half_window_size = window_size // 2
 668            window_left = arr[i - half_window_size : i] if i > half_window_size else arr[0:i]
 669            window_right = arr[i: i + half_window_size] if i < len(arr) - half_window_size else arr[i: len(arr)]
 670            moving_averages.append((sum(window_left) + sum(window_right)) / (len(window_left) + len(window_right)))
 671            plotting_idx.append(i)
 672            i += 1
 673
 674        return np.array(moving_averages), np.array(plotting_idx) if zoom is None else np.array(plotting_idx) + zoom[0]
 675    else:
 676        return arr, np.arange(zoom[0], zoom[1]) if zoom is not None else np.arange(aln_length)
 677
 678
 679def stat_plot(aln: explore.MSA | str, stat_type: str, ax: plt.Axes | None = None, line_color: str = 'burlywood',
 680              line_width: int | float = 2, rolling_average: int = 20, show_x_label: bool = False, show_title: bool = True) -> plt.Axes:
 681    """
 682    Generate a plot for the various alignment stats.
 683    :param aln: alignment MSA class or path
 684    :param ax: matplotlib axes
 685    :param stat_type: 'entropy', 'gc', 'coverage', 'ts tv score', 'identity' or 'similarity' -> (here default matrices are used NT - TRANS, AA - BLOSUM65)
 686    :param line_color: color of the line
 687    :param line_width: width of the line
 688    :param rolling_average: average rolling window size left and right of a position in nucleotides or amino acids
 689    :param show_x_label: whether to show the x-axis label
 690    :param show_title: whether to show the title
 691    :return matplotlib axes
 692    """
 693
 694    # input check
 695    aln, ax = _validate_input_parameters(aln, ax)
 696
 697    # define possible functions to calc here
 698    stat_functions: Dict[str, Callable[[], list | ndarray]] = {
 699        'gc': aln.calc_gc,
 700        'entropy': aln.calc_entropy,
 701        'coverage': aln.calc_coverage,
 702        'identity': aln.calc_identity_alignment,
 703        'similarity': aln.calc_similarity_alignment,
 704        'ts tv score': aln.calc_transition_transversion_score
 705    }
 706
 707    if stat_type not in stat_functions:
 708        raise ValueError('stat_type must be one of {}'.format(list(stat_functions.keys())))
 709
 710    _validate_color(line_color)
 711    if rolling_average < 1 or rolling_average > aln.length:
 712        raise ValueError('rolling_average must be between 1 and length of sequence')
 713
 714    # generate input data
 715    array = stat_functions[stat_type]()
 716
 717    if stat_type == 'identity':
 718        min_value, max_value = -1, 0
 719    elif stat_type == 'ts tv score':
 720        min_value, max_value = -1, 1
 721    else:
 722        min_value, max_value = 0, 1
 723    if stat_type in ['identity', 'similarity']:
 724        # for the mean nan values get handled as the lowest possible number in the matrix
 725        array = np.nan_to_num(array, True, min_value)
 726        array = np.mean(array, axis=0)
 727    data, plot_idx = _moving_average(array, rolling_average, aln.zoom, aln.length)
 728
 729    # plot the data
 730    ax.fill_between(
 731        # this add dummy data left and right for better plotting
 732        # otherwise only half of the step is shown
 733        np.concatenate(([plot_idx[0] - 0.5], plot_idx, [plot_idx[-1] + 0.5])) if rolling_average == 1 else plot_idx,
 734        np.concatenate(([data[0]], data, [data[-1]])) if rolling_average == 1 else data,
 735        min_value,
 736        linewidth = line_width,
 737        edgecolor=line_color,
 738        step='mid' if rolling_average == 1 else None,
 739        facecolor=(line_color, 0.6) if stat_type not in ['ts tv score', 'gc'] else 'none'
 740    )
 741    if stat_type == 'gc':
 742        ax.hlines(0.5, xmin=0, xmax=aln.zoom[0] + aln.length if aln.zoom is not None else aln.length, color='black', linestyles='--', linewidth=1)
 743
 744    # format axis
 745    ax.set_ylim(min_value, max_value*0.1+max_value)
 746    ax.set_yticks([min_value, max_value])
 747    if stat_type == 'gc':
 748        ax.set_yticklabels(['0', '100'])
 749    elif stat_type == 'ts tv score':
 750        ax.set_yticklabels(['tv', 'ts'])
 751    else:
 752        ax.set_yticklabels(['low', 'high'])
 753
 754    # show title
 755    if show_title:
 756        ax.set_title(
 757            f'{stat_type} (average over {rolling_average} positions)' if rolling_average > 1 else f'{stat_type} for each position',
 758            loc='left'
 759        )
 760
 761    _format_x_axis(aln, ax, show_x_label, show_left=True)
 762
 763    return ax
 764
 765
 766def variant_plot(aln: explore.MSA | str, ax: plt.Axes | None = None, lollisize: tuple[int, int] | list = (1, 3),
 767                 color_scheme: str = 'standard', show_x_label: bool = False, show_legend: bool = True,
 768                 bbox_to_anchor: tuple[float|int, float|int] | list = (1, 1)) -> plt.Axes:
 769    """
 770    Plots variants.
 771    :param aln: alignment MSA class or path
 772    :param ax: matplotlib axes
 773    :param lollisize: (stem_size, head_size)
 774    :param color_scheme: color scheme for characters. see config.CHAR_COLORS for available options
 775    :param show_x_label:  whether to show the x-axis label
 776    :param show_legend: whether to show the legend
 777    :param bbox_to_anchor: bounding box coordinates for the legend - see: https://matplotlib.org/stable/api/legend_api.html
 778    :return matplotlib axes
 779    """
 780
 781    # validate input
 782    aln, ax = _validate_input_parameters(aln, ax)
 783    _validate_color_scheme(color_scheme, aln)
 784    if not isinstance(lollisize, tuple) or len(lollisize) != 2:
 785        raise ValueError('lollisize must be tuple of length 2 (stem, head)')
 786    for _size in lollisize:
 787        if not isinstance(_size, float | int) or _size <= 0:
 788            raise ValueError('lollisize must be floats greater than zero')
 789
 790    # define colors
 791    colors = config.CHAR_COLORS[aln.aln_type][color_scheme]
 792    # get snps
 793    snps = aln.get_snps()
 794    # define where to plot (each ref type gets a separate line)
 795    ref_y_positions, y_pos, detected_var = {}, 0, set()
 796
 797    # iterate over snp dict
 798    for pos in snps['POS']:
 799        for identifier in snps['POS'][pos]:
 800            # fill in y pos dict
 801            if identifier == 'ref':
 802                if snps['POS'][pos]['ref'] not in ref_y_positions:
 803                    ref_y_positions[snps['POS'][pos]['ref']] = y_pos
 804                    y_pos += 1.1
 805                    continue
 806            # plot
 807            if identifier == 'ALT':
 808                for alt in snps['POS'][pos]['ALT']:
 809                    ax.vlines(x=pos + aln.zoom[0] if aln.zoom is not None else pos,
 810                              ymin=ref_y_positions[snps['POS'][pos]['ref']],
 811                              ymax=ref_y_positions[snps['POS'][pos]['ref']] + snps['POS'][pos]['ALT'][alt]['AF'],
 812                              color=colors[alt],
 813                              zorder=100,
 814                              linewidth=lollisize[0]
 815                              )
 816                    ax.plot(pos + aln.zoom[0] if aln.zoom is not None else pos,
 817                            ref_y_positions[snps['POS'][pos]['ref']] + snps['POS'][pos]['ALT'][alt]['AF'],
 818                            color=colors[alt],
 819                            marker='o',
 820                            markersize=lollisize[1]
 821                            )
 822                    detected_var.add(alt)
 823
 824    # plot hlines
 825    for y_char in ref_y_positions:
 826        ax.hlines(
 827            ref_y_positions[y_char],
 828            xmin=aln.zoom[0] - 0.5 if aln.zoom is not None else -0.5,
 829            xmax=aln.zoom[0] + aln.length + 0.5 if aln.zoom is not None else aln.length + 0.5,
 830            color='black',
 831            linestyle='-',
 832            zorder=0,
 833            linewidth=0.75
 834        )
 835    # create a custom legend
 836    if show_legend:
 837        custom_legend = [
 838            ax.add_line(
 839                plt.Line2D(
 840                    [],
 841                    [],
 842                    color=colors[char],
 843                    marker='o',
 844                    linestyle='',
 845                    markersize=5
 846                )
 847            ) for char in colors if char in detected_var
 848        ]
 849        ax.legend(
 850            custom_legend,
 851            [char for char in colors if char in detected_var],  # ensures correct sorting
 852            loc='lower right',
 853            title='variant',
 854            bbox_to_anchor=bbox_to_anchor,
 855            ncols=len(detected_var)/2 if aln.aln_type == 'AA' else len(detected_var),
 856            frameon=False
 857        )
 858
 859    # format axis
 860    _format_x_axis(aln, ax, show_x_label, show_left=False)
 861    ax.spines['left'].set_visible(False)
 862    ax.set_yticks([ref_y_positions[x] for x in ref_y_positions])
 863    ax.set_yticklabels(ref_y_positions.keys())
 864    ax.set_ylim(0, y_pos)
 865    ax.set_ylabel('reference')
 866
 867    return ax
 868
 869
 870
 871def _plot_annotation(annotation_dict: dict, ax: plt.Axes, direction_marker_size: int | None, color: str | ScalarMappable):
 872    """
 873    Plot annotation rectangles
 874    """
 875    for annotation in annotation_dict:
 876        for locations in annotation_dict[annotation]['location']:
 877            x_value = locations[0]
 878            length = locations[1] - locations[0]
 879            ax.add_patch(
 880                patches.FancyBboxPatch(
 881                    (x_value, annotation_dict[annotation]['track'] + 1),
 882                    length,
 883                    0.8,
 884                    boxstyle="Round, pad=0",
 885                    ec="black",
 886                    fc=color.to_rgba(annotation_dict[annotation]['conservation']) if isinstance(color, ScalarMappable) else color,
 887                )
 888            )
 889            if direction_marker_size is not None:
 890                if annotation_dict[annotation]['strand'] == '-':
 891                    marker = '<'
 892                else:
 893                    marker = '>'
 894                ax.plot(x_value + length/2, annotation_dict[annotation]['track'] + 1.4, marker=marker, markersize=direction_marker_size, color='white', markeredgecolor='black')
 895
 896        # plot linked annotations (such as splicing)
 897        if len(annotation_dict[annotation]['location']) > 1:
 898            y_value = annotation_dict[annotation]['track'] + 1.4
 899            start = None
 900            for locations in annotation_dict[annotation]['location']:
 901                if start is None:
 902                    start = locations[1]
 903                    continue
 904                ax.plot([start, locations[0]], [y_value, y_value], '--', linewidth=2, color='black')
 905                start = locations[1]
 906
 907
 908def _add_track_positions(annotation_dic):
 909    """
 910    define the position for annotations square so that overlapping annotations do not overlap in the plot
 911    """
 912    # create a dict and sort
 913    annotation_dic = dict(sorted(annotation_dic.items(), key=lambda x: x[1]['location'][0][0]))
 914
 915    # remember for each track the largest stop
 916    track_stops = [0]
 917
 918    for ann in annotation_dic:
 919        flattened_locations = list(chain.from_iterable(annotation_dic[ann]['location']))  # flatten list
 920        track = 0
 921        # check if a start of a gene is smaller than the stop of the current track
 922        # -> move to new track
 923        while flattened_locations[0] < track_stops[track]:
 924            track += 1
 925            # if all prior tracks are potentially causing an overlap
 926            # create a new track and break
 927            if len(track_stops) <= track:
 928                track_stops.append(0)
 929                break
 930        # in the current track remember the stop of the current gene
 931        track_stops[track] = flattened_locations[-1]
 932        # and indicate the track in the dict
 933        annotation_dic[ann]['track'] = track
 934
 935    return annotation_dic
 936
 937
 938def orf_plot(aln: explore.MSA | str, ax: plt.Axes | None = None, min_length: int = 500, non_overlapping_orfs: bool = True,
 939             cmap: str = 'Blues', direction_marker_size: int | None = 5, show_x_label: bool = False, show_cbar: bool = False,
 940             cbar_fraction: float = 0.1) -> plt.Axes:
 941    """
 942    Plot conserved ORFs.
 943    :param aln: alignment MSA class or path
 944    :param ax: matplotlib axes
 945    :param min_length: minimum length of orf
 946    :param non_overlapping_orfs: whether to consider overlapping orfs
 947    :param cmap: color mapping for % identity - see https://matplotlib.org/stable/users/explain/colors/colormaps.html
 948    :param direction_marker_size: marker size for direction marker, not shown if marker_size == None
 949    :param show_x_label: whether to show the x-axis label
 950    :param show_cbar: whether to show the colorbar - see https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.colorbar.html
 951    :param cbar_fraction: fraction of the original ax reserved for the colorbar
 952    :return matplotlib axes
 953    """
 954
 955    # normalize colorbar
 956    cmap = ScalarMappable(norm=Normalize(0, 100), cmap=plt.get_cmap(cmap))
 957    # validate input
 958    aln, ax = _validate_input_parameters(aln, ax)
 959
 960    # get orfs --> first deepcopy and reset zoom that the orfs are also zoomed in (by default, the orfs are only
 961    # searched within the zoomed region)
 962    aln_temp = deepcopy(aln)
 963    aln_temp.zoom = None
 964    if non_overlapping_orfs:
 965        annotation_dict = aln_temp.get_non_overlapping_conserved_orfs(min_length=min_length)
 966    else:
 967        annotation_dict = aln_temp.get_conserved_orfs(min_length=min_length)
 968    # filter dict for zoom
 969    if aln.zoom is not None:
 970        annotation_dict = {key:val for key, val in annotation_dict.items() if max(val['location'][0][0], aln.zoom[0]) <= min(val['location'][0][1], aln.zoom[1])}
 971    # add track for plotting
 972    _add_track_positions(annotation_dict)
 973    # plot
 974    _plot_annotation(annotation_dict, ax, direction_marker_size=direction_marker_size, color=cmap)
 975
 976    # legend
 977    if show_cbar:
 978        cbar = plt.colorbar(cmap,ax=ax, location= 'top', orientation='horizontal', anchor=(1,0), shrink=0.2, pad=2/ax.bbox.height, fraction=cbar_fraction)
 979        cbar.set_label('% identity')
 980        cbar.set_ticks([0, 100])
 981
 982    # format axis
 983    _format_x_axis(aln, ax, show_x_label, show_left=False)
 984    ax.set_ylim(bottom=0.8)
 985    ax.set_yticks([])
 986    ax.set_yticklabels([])
 987    ax.set_title('conserved orfs', loc='left')
 988
 989    return ax
 990
 991
 992def annotation_plot(aln: explore.MSA | str, annotation: explore.Annotation | str, feature_to_plot: str, ax: plt.Axes | None = None,
 993                    color: str = 'wheat', direction_marker_size: int | None = 5, show_x_label: bool = False) -> plt.Axes:
 994    """
 995    Plot annotations from bed, gff or gb files. Are automatically mapped to alignment.
 996    :param aln: alignment MSA class
 997    :param annotation: annotation class | path to annotation file
 998    :param ax: matplotlib axes
 999    :param feature_to_plot: potential feature to plot (not for bed files as it is parsed as one feature)
1000    :param color: color for the annotation
1001    :param direction_marker_size: marker size for direction marker, only relevant if show_direction is True
1002    :param show_x_label: whether to show the x-axis label
1003    :return matplotlib axes
1004    """
1005    # validate input
1006    aln, ax, annotation = _validate_input_parameters(aln, ax, annotation)
1007    _validate_color(color)
1008
1009    # ignore features to plot for bed files (here it is written into one feature)
1010    if annotation.ann_type == 'bed':
1011        annotation_dict = annotation.features['region']
1012        feature_to_plot = 'bed regions'
1013    else:
1014        # try to subset the annotation dict
1015        try:
1016            annotation_dict = annotation.features[feature_to_plot]
1017        except KeyError:
1018            raise KeyError(f'Feature {feature_to_plot} not found. Use annotation.features.keys() to see available features.')
1019
1020    # plotting and formating
1021    _add_track_positions(annotation_dict)
1022    _plot_annotation(annotation_dict, ax, direction_marker_size=direction_marker_size, color=color)
1023    _format_x_axis(aln, ax, show_x_label, show_left=False)
1024    ax.set_ylim(bottom=0.8)
1025    ax.set_yticks([])
1026    ax.set_yticklabels([])
1027    ax.set_title(f'{annotation.locus} ({feature_to_plot})', loc='left')
1028
1029    return ax
1030
1031
1032def sequence_logo(aln:explore.MSA | str, ax:plt.Axes | None = None, color_scheme: str = 'standard', plot_type: str = 'stacked',
1033                  show_x_label:bool = False) -> plt.Axes:
1034    """
1035    Plot sequence logo or stacked area plot (use the first one with appropriate zoom levels). The
1036    logo visualizes the relative frequency of nt or aa characters in the alignment. The char frequency
1037    is scaled to the information content at each position. --> identical to how Geneious calculates it.
1038
1039    :param aln: alignment MSA class or path
1040    :param ax: matplotlib axes
1041    :param color_scheme: color scheme for characters. see config.CHAR_COLORS for available options
1042    :param plot_type: 'logo' for sequence logo, 'stacked' for stacked area plot
1043    :param show_x_label: whether to show the x-axis label
1044    :return matplotlib axes
1045    """
1046
1047    aln, ax = _validate_input_parameters(aln, ax)
1048    # calc matrix
1049    matrix = aln.calc_position_matrix('IC') * aln.calc_position_matrix('PPM')
1050    letters_to_plot = list(config.CHAR_COLORS[aln.aln_type]['standard'].keys())
1051
1052    # plot
1053    if plot_type == 'logo':
1054        for pos in range(matrix.shape[1]):
1055            # sort the positive matrix row values by size
1056            items = [(letters_to_plot[i], matrix[i, pos]) for i in range(len(letters_to_plot)) if matrix[i, pos] > 0]
1057            items.sort(key=lambda x: x[1])
1058            # plot each position
1059            y_offset = 0
1060            for letter, h in items:
1061                tp = TextPath((aln.zoom[0] - 0.325 if aln.zoom is not None else - 0.325, 0), letter, size=1, prop=FontProperties(weight='bold'))
1062                bb = tp.get_extents()
1063                glyph_height = bb.height if bb.height > 0 else 1e-6  # avoid div by zero
1064                scale_to_1 = 1.0 / glyph_height
1065
1066                transform = (Affine2D()
1067                             .scale(1.0, h * scale_to_1)  # scale manually by IC and normalize font
1068                             .translate(pos, y_offset)
1069                             + ax.transData)
1070
1071                patch = PathPatch(tp, transform=transform,
1072                                  facecolor=config.CHAR_COLORS[aln.aln_type][color_scheme][letter],
1073                                  edgecolor='none')
1074                ax.add_patch(patch)
1075                y_offset += h
1076    elif plot_type == 'stacked':
1077        y_values = np.zeros(matrix.shape[1])
1078        x_values = np.arange(0, matrix.shape[1]) if aln.zoom is None else np.arange(aln.zoom[0], aln.zoom[1])
1079        for row in range(matrix.shape[0]):
1080            y = matrix[row]
1081            ax.fill_between(x_values,
1082                            y_values,
1083                            y_values + y,
1084                            fc=config.CHAR_COLORS[aln.aln_type][color_scheme].get(letters_to_plot[row]),
1085                            ec='None',
1086                            label=letters_to_plot[row],
1087                            step='mid')
1088
1089    # adjust limits & labels
1090    _format_x_axis(aln, ax, show_x_label, show_left=True)
1091    if aln.aln_type == 'AA':
1092        ax.set_ylim(bottom=0, top=5)
1093    else:
1094        ax.set_ylim(bottom=0, top=2)
1095    ax.spines['top'].set_visible(False)
1096    ax.spines['right'].set_visible(False)
1097    ax.set_ylabel('bits')
1098
1099    return ax
1100
1101
1102def consensus_plot(aln: explore.MSA | str, ax: plt.Axes | None = None, threshold: float | None = None, use_ambig_nt: bool = False,
1103                   color_scheme: str | None = 'standard', mask_color: str = 'dimgrey', ambiguity_color: str = 'black', basic_color: str = 'lightgrey',
1104                   show_x_label: bool = False, show_name: bool = True, show_sequence: bool = False) -> plt.Axes:
1105    """
1106    Plot a consensus sequence as a single-row colored sequence.
1107
1108    :param aln: alignment MSA class or path
1109    :param ax: matplotlib axes
1110    :param threshold: consensus threshold (0-1) or None for majority rule
1111    :param use_ambig_nt: whether to use ambiguous nucleotide codes when building consensus
1112    :param color_scheme: color scheme for characters. see config.CHAR_COLORS for available options or None
1113    :param mask_color: color used for mask characters (N/X)
1114    :param ambiguity_color: color used for ambiguous characters
1115    :param show_x_label: whether to show the x-axis label
1116    :param show_name: whether to show the 'consensus' label at y-axis
1117    :param show_sequence: whether to show the sequence at the y-axis
1118
1119    :return: matplotlib axes
1120    """
1121
1122    # validate input
1123    aln, ax = _validate_input_parameters(aln, ax)
1124    # validate colors
1125    for c in [mask_color, ambiguity_color]:
1126        _validate_color(c)
1127    # validate color scheme
1128    _validate_color_scheme(scheme=color_scheme, aln=aln)
1129
1130    # get consensus
1131    consensus = aln.get_consensus(threshold=threshold, use_ambig_nt=use_ambig_nt)
1132
1133    # prepare color mapping
1134    if color_scheme is not None:
1135        # get mapping from config
1136        char_colors = config.CHAR_COLORS[aln.aln_type][color_scheme]
1137    else:
1138        char_colors = None
1139
1140    # Build polygons and colors for a single PolyCollection
1141    polygons = []
1142    poly_colors = []
1143    text_positions = []
1144    text_chars = []
1145    text_colors = []
1146
1147    zoom_offset = aln.zoom[0] if aln.zoom is not None else 0
1148
1149    y_position = len(aln.alignment) - 0.4
1150
1151    for pos, char in enumerate(consensus):
1152        x = pos + zoom_offset
1153        # determine color
1154        if char_colors is not None and char in char_colors:
1155            color = char_colors[char]
1156        else:
1157            # ambiguous nucleotide/aminoacid codes
1158            if char in config.AMBIG_CHARS[aln.aln_type]:
1159                color = ambiguity_color
1160            elif char in ['N', 'X']:
1161                color = mask_color
1162            else:
1163                color = basic_color
1164
1165        rect = [
1166            (x - 0.5, y_position),
1167            (x + 0.5, y_position),
1168            (x + 0.5, y_position+0.8),
1169            (x - 0.5, y_position+0.8),
1170        ]
1171        polygons.append(rect)
1172        poly_colors.append(color)
1173
1174        if show_sequence:
1175            text_positions.append(x)
1176            text_chars.append(char)
1177            # determine readable text color
1178            text_colors.append(_get_contrast_text_color(to_rgba(color)))
1179
1180    # add single PolyCollection
1181    if polygons:
1182        pc = PolyCollection(polygons, facecolors=poly_colors, edgecolors=poly_colors, linewidths=0.5)
1183        ax.add_collection(pc)
1184
1185    # add texts if requested
1186    if show_sequence:
1187        for x, ch, tc in zip(text_positions, text_chars, text_colors):
1188            ax.text(x, y_position+0.4, ch, ha='center', va='center_baseline', c=tc)
1189
1190    # format axis
1191    _format_x_axis(aln=aln, ax=ax, show_x_label=show_x_label, show_left=False)
1192    ax.set_ylim(y_position-0.1, y_position + 0.9)
1193    if show_name:
1194        ax.yaxis.set_ticks_position('none')
1195        ax.set_yticks([y_position + 0.4])
1196        ax.set_yticklabels(['consensus'])
1197    else:
1198        ax.set_yticks([])
1199
1200    return ax
def alignment( aln: msaexplorer.explore.MSA | str, ax: matplotlib.axes._axes.Axes | None = None, show_sequence_all: bool = False, show_seq_names: bool = False, custom_seq_names: tuple | list = (), mask_color: str = 'dimgrey', ambiguity_color: str = 'black', basic_color: str = 'lightgrey', show_mask: bool = True, fancy_gaps: bool = False, show_ambiguities: bool = False, color_scheme: str | None = 'standard', show_x_label: bool = True, show_legend: bool = False, bbox_to_anchor: tuple[float | int, float | int] | list = (1, 1), show_consensus: bool = False) -> matplotlib.axes._axes.Axes:
412def alignment(aln: explore.MSA | str, ax: plt.Axes | None = None, show_sequence_all: bool = False, show_seq_names: bool = False,
413              custom_seq_names: tuple | list = (), mask_color: str = 'dimgrey', ambiguity_color: str = 'black', basic_color: str = 'lightgrey',
414              show_mask:bool = True, fancy_gaps:bool = False, show_ambiguities: bool = False, color_scheme: str | None = 'standard',
415              show_x_label: bool = True, show_legend: bool = False, bbox_to_anchor: tuple[float|int, float|int] | list= (1, 1),
416              show_consensus: bool = False) -> plt.Axes:
417    """
418
419    Plot an alignment with each character colored as defined in the scheme. This is computationally more intensive as 
420    the identity alignments and similarity alignment function as each square for each character is individually plotted.
421
422    :param aln: alignment MSA class or path
423    :param ax: matplotlib axes
424    :param show_seq_names: whether to show seq names
425    :param show_sequence_all: whether to show all sequences - zoom in to avoid plotting issues
426    :param custom_seq_names: custom seq names
427    :param mask_color: color for masked nucleotides/aminoacids
428    :param ambiguity_color: color for ambiguous nucleotides/aminoacids
429    :param basic_color: color that will be used for all normal chars if the colorscheme is None
430    :param show_mask: whether to show N or X chars otherwise it will be shown as match or mismatch
431    :param fancy_gaps: show gaps with a small black bar
432    :param show_ambiguities: whether to show non-N ambiguities -> only relevant for RNA/DNA sequences
433    :param color_scheme: color mismatching chars with their unique color. Options for DNA/RNA are: standard, purine_pyrimidine, strong_weak; and for AS: standard, clustal, zappo, hydrophobicity. Will overwrite different_char_color.
434    :param show_x_label: whether to show x label
435    :param show_legend: whether to show the legend
436    :param bbox_to_anchor: bounding box coordinates for the legend - see: https://matplotlib.org/stable/api/legend_api.html
437    :param show_consensus: whether to show the majority consensus sequence (standard-color scheme)
438    :return  matplotlib axis
439    """
440    # Validate aln, ax inputs
441    aln, ax = _validate_input_parameters(aln=aln, ax=ax)
442    # Validate colors
443    for c in [mask_color, ambiguity_color, basic_color]:
444        _validate_color(c)
445    # Validate color scheme
446    _validate_color_scheme(scheme=color_scheme, aln=aln)
447    # create color mapping
448    aln_colors = _create_color_dictionary(
449        aln=aln, color_scheme=color_scheme, identical_char_color=basic_color,
450        different_char_color=None, mask_color=mask_color, ambiguity_color=ambiguity_color
451    )
452    # Compute identity alignment
453    numerical_alignment = aln.calc_numerical_alignment(encode_ambiguities=show_ambiguities, encode_mask=show_mask)
454    if color_scheme is None:
455        numerical_alignment[np.where(numerical_alignment > 0)] = 0
456    # create the alignment
457    detected_identity_values = _create_alignment(
458        aln=aln, ax=ax, matrix=numerical_alignment, fancy_gaps=fancy_gaps, create_identity_patch=True if color_scheme is None else False, show_gaps=True,
459        show_different_sequence=False, show_sequence_all=show_sequence_all, reference_color=None, aln_colors=aln_colors,
460        values_to_plot = list(aln_colors.keys())
461    )
462    # custom legend
463    if show_legend:
464        aln_colors.pop(0)  # otherwise legend is wrong (here there is no check if a position is identical or not)
465        _create_legend(
466            color_scheme=color_scheme, aln_colors=aln_colors, aln=aln,
467            detected_identity_values=detected_identity_values,
468            ax=ax, bbox_to_anchor=bbox_to_anchor
469        )
470    if show_consensus:
471        consensus_plot(aln=aln, ax=ax, show_x_label=show_x_label, show_name=False, show_sequence=show_sequence_all,
472            color_scheme='standard', basic_color=basic_color, mask_color=mask_color, ambiguity_color=ambiguity_color
473        )
474        ax.set_ylim(-0.5, len(aln.alignment) + 1)
475    else:
476        ax.set_ylim(-0.5, len(aln.alignment))
477
478    _seq_names(aln=aln, ax=ax, custom_seq_names=custom_seq_names, show_seq_names=show_seq_names, include_consensus=show_consensus)
479    # configure axis
480    _format_x_axis(aln=aln, ax=ax, show_x_label=show_x_label, show_left=False)
481
482    return ax

Plot an alignment with each character colored as defined in the scheme. This is computationally more intensive as the identity alignments and similarity alignment function as each square for each character is individually plotted.

Parameters
  • aln: alignment MSA class or path
  • ax: matplotlib axes
  • show_seq_names: whether to show seq names
  • show_sequence_all: whether to show all sequences - zoom in to avoid plotting issues
  • custom_seq_names: custom seq names
  • mask_color: color for masked nucleotides/aminoacids
  • ambiguity_color: color for ambiguous nucleotides/aminoacids
  • basic_color: color that will be used for all normal chars if the colorscheme is None
  • show_mask: whether to show N or X chars otherwise it will be shown as match or mismatch
  • fancy_gaps: show gaps with a small black bar
  • show_ambiguities: whether to show non-N ambiguities -> only relevant for RNA/DNA sequences
  • color_scheme: color mismatching chars with their unique color. Options for DNA/RNA are: standard, purine_pyrimidine, strong_weak; and for AS: standard, clustal, zappo, hydrophobicity. Will overwrite different_char_color.
  • show_x_label: whether to show x label
  • show_legend: whether to show the legend
  • bbox_to_anchor: bounding box coordinates for the legend - see: https: //matplotlib.org/stable/api/legend_api.html
  • show_consensus: whether to show the majority consensus sequence (standard-color scheme) :return matplotlib axis
def identity_alignment( aln: msaexplorer.explore.MSA | str, ax: matplotlib.axes._axes.Axes | None = None, show_title: bool = True, show_identity_sequence: bool = False, show_sequence_all: bool = False, show_seq_names: bool = False, custom_seq_names: tuple | list = (), reference_color: str = 'lightsteelblue', basic_color: str = 'lightgrey', different_char_color: str = 'peru', mask_color: str = 'dimgrey', ambiguity_color: str = 'black', show_mask: bool = True, show_gaps: bool = True, fancy_gaps: bool = False, show_mismatches: bool = True, show_ambiguities: bool = False, color_scheme: str | None = None, show_x_label: bool = True, show_legend: bool = False, bbox_to_anchor: tuple[float | int, float | int] | list = (1, 1), show_consensus: bool = False) -> matplotlib.axes._axes.Axes:
486def identity_alignment(aln: explore.MSA | str, ax: plt.Axes | None = None, show_title: bool = True, show_identity_sequence: bool = False,
487                       show_sequence_all: bool = False, show_seq_names: bool = False, custom_seq_names: tuple | list = (),
488                       reference_color: str = 'lightsteelblue', basic_color: str = 'lightgrey', different_char_color: str = 'peru',
489                       mask_color: str = 'dimgrey', ambiguity_color: str = 'black', show_mask:bool = True, show_gaps:bool = True,
490                       fancy_gaps:bool = False, show_mismatches: bool = True, show_ambiguities: bool = False,
491                       color_scheme: str | None = None, show_x_label: bool = True, show_legend: bool = False,
492                       bbox_to_anchor: tuple[float|int, float|int] | list = (1, 1), show_consensus: bool = False) -> plt.Axes:
493    """
494    Generates an identity alignment plot.
495    :param aln: alignment MSA class or path
496    :param ax: matplotlib axes
497    :param show_title: whether to show title
498    :param show_seq_names: whether to show seq names
499    :param show_identity_sequence: whether to show sequence for only differences and reference - zoom in to avoid plotting issues
500    :param show_sequence_all: whether to show all sequences - zoom in to avoid plotting issues
501    :param custom_seq_names: custom seq names
502    :param reference_color: color of reference sequence
503    :param basic_color: color for identical nucleotides/aminoacids
504    :param different_char_color: color for different nucleotides/aminoacids
505    :param mask_color: color for masked nucleotides/aminoacids
506    :param ambiguity_color: color for ambiguous nucleotides/aminoacids
507    :param show_mask: whether to show N or X chars otherwise it will be shown as match or mismatch
508    :param show_gaps: whether to show gaps otherwise it will be shown as match or mismatch
509    :param fancy_gaps: show gaps with a small black bar
510    :param show_mismatches: whether to show mismatches otherwise it will be shown as match
511    :param show_ambiguities: whether to show non-N ambiguities -> only relevant for RNA/DNA sequences
512    :param color_scheme: color mismatching chars with their unique color. Options for DNA/RNA are: standard, purine_pyrimidine, strong_weak; and for AS: standard, clustal, zappo, hydrophobicity. Will overwrite different_char_color.
513    :param show_x_label: whether to show x label
514    :param show_legend: whether to show the legend
515    :param bbox_to_anchor: bounding box coordinates for the legend - see: https://matplotlib.org/stable/api/legend_api.html
516    :param show_consensus: whether to show the majority consensus sequence (standard-color scheme)
517
518    :return  matplotlib axis
519    """
520
521    # Both options for gaps work hand in hand
522    if fancy_gaps:
523        show_gaps = True
524    # Validate aln, ax inputs
525    aln, ax = _validate_input_parameters(aln=aln, ax=ax)
526    # Validate colors
527    for c in [reference_color, basic_color, different_char_color, mask_color, ambiguity_color]:
528        _validate_color(c)
529    # Validate color scheme
530    _validate_color_scheme(scheme=color_scheme, aln=aln)
531    # create color mapping
532    aln_colors = _create_color_dictionary(
533        aln=aln, color_scheme=color_scheme, identical_char_color=basic_color, different_char_color=different_char_color,
534        mask_color=mask_color, ambiguity_color=ambiguity_color
535    )
536    # Compute identity alignment
537    identity_aln = aln.calc_identity_alignment(
538        encode_mask=show_mask, encode_gaps=show_gaps, encode_mismatches=show_mismatches,encode_ambiguities=show_ambiguities,
539        encode_each_mismatch_char=True if color_scheme is not None else False
540    )
541    # create the alignment
542    detected_identity_values = _create_alignment(
543        aln=aln, ax=ax, matrix=identity_aln, fancy_gaps=fancy_gaps, create_identity_patch=True, show_gaps=show_gaps,
544        show_different_sequence=show_identity_sequence, show_sequence_all=show_sequence_all, reference_color=reference_color,
545        aln_colors=aln_colors, values_to_plot=list(aln_colors.keys())[1:]
546    )
547    # custom legend
548    if show_legend:
549        _create_legend(
550            color_scheme=color_scheme, aln_colors=aln_colors, aln=aln, detected_identity_values=detected_identity_values,
551            ax=ax, bbox_to_anchor=bbox_to_anchor
552        )
553    _seq_names(aln=aln, ax=ax, custom_seq_names=custom_seq_names, show_seq_names=show_seq_names)
554    # configure axis
555    if show_consensus:
556        consensus_plot(aln=aln, ax=ax, show_x_label=show_x_label, show_name=False, show_sequence=any([show_sequence_all, show_identity_sequence]),
557                       color_scheme='standard', basic_color=basic_color, mask_color=mask_color,
558                       ambiguity_color=ambiguity_color
559                       )
560        ax.set_ylim(-0.5, len(aln.alignment) + 1)
561    else:
562        ax.set_ylim(-0.5, len(aln.alignment))
563
564    _seq_names(aln=aln, ax=ax, custom_seq_names=custom_seq_names, show_seq_names=show_seq_names,
565               include_consensus=show_consensus)
566    if show_title:
567        ax.set_title('identity', loc='left')
568    _format_x_axis(aln=aln, ax=ax, show_x_label=show_x_label, show_left=False)
569
570    return ax

Generates an identity alignment plot.

Parameters
  • aln: alignment MSA class or path
  • ax: matplotlib axes
  • show_title: whether to show title
  • show_seq_names: whether to show seq names
  • show_identity_sequence: whether to show sequence for only differences and reference - zoom in to avoid plotting issues
  • show_sequence_all: whether to show all sequences - zoom in to avoid plotting issues
  • custom_seq_names: custom seq names
  • reference_color: color of reference sequence
  • basic_color: color for identical nucleotides/aminoacids
  • different_char_color: color for different nucleotides/aminoacids
  • mask_color: color for masked nucleotides/aminoacids
  • ambiguity_color: color for ambiguous nucleotides/aminoacids
  • show_mask: whether to show N or X chars otherwise it will be shown as match or mismatch
  • show_gaps: whether to show gaps otherwise it will be shown as match or mismatch
  • fancy_gaps: show gaps with a small black bar
  • show_mismatches: whether to show mismatches otherwise it will be shown as match
  • show_ambiguities: whether to show non-N ambiguities -> only relevant for RNA/DNA sequences
  • color_scheme: color mismatching chars with their unique color. Options for DNA/RNA are: standard, purine_pyrimidine, strong_weak; and for AS: standard, clustal, zappo, hydrophobicity. Will overwrite different_char_color.
  • show_x_label: whether to show x label
  • show_legend: whether to show the legend
  • bbox_to_anchor: bounding box coordinates for the legend - see: https: //matplotlib.org/stable/api/legend_api.html
  • show_consensus: whether to show the majority consensus sequence (standard-color scheme)

:return matplotlib axis

def similarity_alignment( aln: msaexplorer.explore.MSA | str, ax: matplotlib.axes._axes.Axes | None = None, matrix_type: str | None = None, show_title: bool = True, show_similarity_sequence: bool = False, show_sequence_all: bool = False, show_seq_names: bool = False, custom_seq_names: tuple | list = (), reference_color: str = 'lightsteelblue', different_char_color: str = 'peru', basic_color: str = 'lightgrey', show_gaps: bool = True, fancy_gaps: bool = False, show_x_label: bool = True, show_cbar: bool = False, cbar_fraction: float = 0.1, show_consensus: bool = False) -> matplotlib.axes._axes.Axes:
573def similarity_alignment(aln: explore.MSA | str, ax: plt.Axes | None = None, matrix_type: str | None = None, show_title: bool = True,
574                         show_similarity_sequence: bool = False, show_sequence_all: bool = False, show_seq_names: bool = False,
575                         custom_seq_names: tuple | list = (), reference_color: str = 'lightsteelblue', different_char_color: str = 'peru', basic_color: str = 'lightgrey',
576                         show_gaps:bool = True, fancy_gaps:bool = False, show_x_label: bool = True, show_cbar: bool = False,
577                         cbar_fraction: float = 0.1, show_consensus: bool = False)  -> plt.Axes:
578    """
579    Generates a similarity alignment plot. Importantly the similarity values are normalized!
580    :param aln: alignment MSA class or path
581    :param ax: matplotlib axes
582    :param matrix_type: substitution matrix - see config.SUBS_MATRICES, standard: NT - TRANS, AA - BLOSUM65
583    :param show_title: whether to show title
584    :param show_similarity_sequence: whether to show sequence only for differences and reference - zoom in to avoid plotting issues
585    :param show_sequence_all: whether to show all sequences - zoom in to avoid plotting issues
586    :param show_seq_names: whether to show seq names
587    :param custom_seq_names: custom seq names
588    :param reference_color: color of reference sequence
589    :param different_char_color: color for the lowest similarity
590    :param basic_color: basic color for the highest similarity
591    :param show_gaps: whether to show gaps otherwise it will be ignored
592    :param fancy_gaps: show gaps with a small black bar
593    :param show_x_label: whether to show x label
594    :param show_cbar: whether to show the legend - see https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.colorbar.html
595    :param cbar_fraction: fraction of the original ax reserved for the legend
596    :param show_consensus: whether to show the majority consensus sequence (standard color scheme, no specical handling for special characters)
597    :return  matplotlib axis
598    """
599    # Both options for gaps work hand in hand
600    if fancy_gaps:
601        show_gaps = True
602    # Validate aln, ax inputs
603    aln, ax = _validate_input_parameters(aln=aln, ax=ax)
604    # Validate colors
605    for c in [reference_color, different_char_color, basic_color]:
606        _validate_color(c)
607    # Compute similarity alignment
608    similarity_aln = aln.calc_similarity_alignment(matrix_type=matrix_type)  # use normalized values here
609    similarity_aln = similarity_aln.round(2)  # round data for good color mapping
610    # create cmap
611    cmap = LinearSegmentedColormap.from_list(
612        "extended",
613        [
614            (0.0, different_char_color),
615            (1.0, basic_color)
616        ],
617    )
618    cmap = ScalarMappable(norm=Normalize(vmin=0, vmax=1), cmap=cmap)
619    # create similarity values
620    similarity_values = np.arange(start=0, stop=1, step=0.01)
621    # round it to be absolutely sure that values match with rounded sim alignment
622    similarity_values = list(similarity_values.round(2))
623    # create the alignment
624    _create_alignment(
625        aln=aln, ax=ax, matrix=similarity_aln, fancy_gaps=fancy_gaps, create_identity_patch=True, show_gaps=show_gaps,
626        show_different_sequence=show_similarity_sequence, show_sequence_all=show_sequence_all, reference_color=reference_color,
627        aln_colors=cmap, values_to_plot=similarity_values, identical_value=1
628    )
629    # legend
630    if show_cbar:
631        cbar = plt.colorbar(cmap, ax=ax, location= 'top', anchor=(1,0), shrink=0.2, pad=2/ax.bbox.height, fraction=cbar_fraction)
632        cbar.set_ticks([0, 1])
633        cbar.set_ticklabels(['low', 'high'])
634
635    # format seq names
636    _seq_names(aln, ax, custom_seq_names, show_seq_names)
637
638    # configure axis
639    if show_consensus:
640        consensus_plot(aln=aln, ax=ax, show_x_label=show_x_label, show_name=False, show_sequence=any([show_sequence_all, show_similarity_sequence]),
641                       color_scheme='standard', basic_color=basic_color)
642        ax.set_ylim(-0.5, len(aln.alignment) + 1)
643    else:
644        ax.set_ylim(-0.5, len(aln.alignment))
645
646    _seq_names(aln=aln, ax=ax, custom_seq_names=custom_seq_names, show_seq_names=show_seq_names,
647               include_consensus=show_consensus)
648    if show_title:
649        ax.set_title('similarity', loc='left')
650    _format_x_axis(aln, ax, show_x_label, show_left=False)
651
652    return ax

Generates a similarity alignment plot. Importantly the similarity values are normalized!

Parameters
  • aln: alignment MSA class or path
  • ax: matplotlib axes
  • matrix_type: substitution matrix - see config.SUBS_MATRICES, standard: NT - TRANS, AA - BLOSUM65
  • show_title: whether to show title
  • show_similarity_sequence: whether to show sequence only for differences and reference - zoom in to avoid plotting issues
  • show_sequence_all: whether to show all sequences - zoom in to avoid plotting issues
  • show_seq_names: whether to show seq names
  • custom_seq_names: custom seq names
  • reference_color: color of reference sequence
  • different_char_color: color for the lowest similarity
  • basic_color: basic color for the highest similarity
  • show_gaps: whether to show gaps otherwise it will be ignored
  • fancy_gaps: show gaps with a small black bar
  • show_x_label: whether to show x label
  • show_cbar: whether to show the legend - see https: //matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.colorbar.html
  • cbar_fraction: fraction of the original ax reserved for the legend
  • show_consensus: whether to show the majority consensus sequence (standard color scheme, no specical handling for special characters) :return matplotlib axis
def stat_plot( aln: msaexplorer.explore.MSA | str, stat_type: str, ax: matplotlib.axes._axes.Axes | None = None, line_color: str = 'burlywood', line_width: int | float = 2, rolling_average: int = 20, show_x_label: bool = False, show_title: bool = True) -> matplotlib.axes._axes.Axes:
680def stat_plot(aln: explore.MSA | str, stat_type: str, ax: plt.Axes | None = None, line_color: str = 'burlywood',
681              line_width: int | float = 2, rolling_average: int = 20, show_x_label: bool = False, show_title: bool = True) -> plt.Axes:
682    """
683    Generate a plot for the various alignment stats.
684    :param aln: alignment MSA class or path
685    :param ax: matplotlib axes
686    :param stat_type: 'entropy', 'gc', 'coverage', 'ts tv score', 'identity' or 'similarity' -> (here default matrices are used NT - TRANS, AA - BLOSUM65)
687    :param line_color: color of the line
688    :param line_width: width of the line
689    :param rolling_average: average rolling window size left and right of a position in nucleotides or amino acids
690    :param show_x_label: whether to show the x-axis label
691    :param show_title: whether to show the title
692    :return matplotlib axes
693    """
694
695    # input check
696    aln, ax = _validate_input_parameters(aln, ax)
697
698    # define possible functions to calc here
699    stat_functions: Dict[str, Callable[[], list | ndarray]] = {
700        'gc': aln.calc_gc,
701        'entropy': aln.calc_entropy,
702        'coverage': aln.calc_coverage,
703        'identity': aln.calc_identity_alignment,
704        'similarity': aln.calc_similarity_alignment,
705        'ts tv score': aln.calc_transition_transversion_score
706    }
707
708    if stat_type not in stat_functions:
709        raise ValueError('stat_type must be one of {}'.format(list(stat_functions.keys())))
710
711    _validate_color(line_color)
712    if rolling_average < 1 or rolling_average > aln.length:
713        raise ValueError('rolling_average must be between 1 and length of sequence')
714
715    # generate input data
716    array = stat_functions[stat_type]()
717
718    if stat_type == 'identity':
719        min_value, max_value = -1, 0
720    elif stat_type == 'ts tv score':
721        min_value, max_value = -1, 1
722    else:
723        min_value, max_value = 0, 1
724    if stat_type in ['identity', 'similarity']:
725        # for the mean nan values get handled as the lowest possible number in the matrix
726        array = np.nan_to_num(array, True, min_value)
727        array = np.mean(array, axis=0)
728    data, plot_idx = _moving_average(array, rolling_average, aln.zoom, aln.length)
729
730    # plot the data
731    ax.fill_between(
732        # this add dummy data left and right for better plotting
733        # otherwise only half of the step is shown
734        np.concatenate(([plot_idx[0] - 0.5], plot_idx, [plot_idx[-1] + 0.5])) if rolling_average == 1 else plot_idx,
735        np.concatenate(([data[0]], data, [data[-1]])) if rolling_average == 1 else data,
736        min_value,
737        linewidth = line_width,
738        edgecolor=line_color,
739        step='mid' if rolling_average == 1 else None,
740        facecolor=(line_color, 0.6) if stat_type not in ['ts tv score', 'gc'] else 'none'
741    )
742    if stat_type == 'gc':
743        ax.hlines(0.5, xmin=0, xmax=aln.zoom[0] + aln.length if aln.zoom is not None else aln.length, color='black', linestyles='--', linewidth=1)
744
745    # format axis
746    ax.set_ylim(min_value, max_value*0.1+max_value)
747    ax.set_yticks([min_value, max_value])
748    if stat_type == 'gc':
749        ax.set_yticklabels(['0', '100'])
750    elif stat_type == 'ts tv score':
751        ax.set_yticklabels(['tv', 'ts'])
752    else:
753        ax.set_yticklabels(['low', 'high'])
754
755    # show title
756    if show_title:
757        ax.set_title(
758            f'{stat_type} (average over {rolling_average} positions)' if rolling_average > 1 else f'{stat_type} for each position',
759            loc='left'
760        )
761
762    _format_x_axis(aln, ax, show_x_label, show_left=True)
763
764    return ax

Generate a plot for the various alignment stats.

Parameters
  • aln: alignment MSA class or path
  • ax: matplotlib axes
  • stat_type: 'entropy', 'gc', 'coverage', 'ts tv score', 'identity' or 'similarity' -> (here default matrices are used NT - TRANS, AA - BLOSUM65)
  • line_color: color of the line
  • line_width: width of the line
  • rolling_average: average rolling window size left and right of a position in nucleotides or amino acids
  • show_x_label: whether to show the x-axis label
  • show_title: whether to show the title :return matplotlib axes
def variant_plot( aln: msaexplorer.explore.MSA | str, ax: matplotlib.axes._axes.Axes | None = None, lollisize: tuple[int, int] | list = (1, 3), color_scheme: str = 'standard', show_x_label: bool = False, show_legend: bool = True, bbox_to_anchor: tuple[float | int, float | int] | list = (1, 1)) -> matplotlib.axes._axes.Axes:
767def variant_plot(aln: explore.MSA | str, ax: plt.Axes | None = None, lollisize: tuple[int, int] | list = (1, 3),
768                 color_scheme: str = 'standard', show_x_label: bool = False, show_legend: bool = True,
769                 bbox_to_anchor: tuple[float|int, float|int] | list = (1, 1)) -> plt.Axes:
770    """
771    Plots variants.
772    :param aln: alignment MSA class or path
773    :param ax: matplotlib axes
774    :param lollisize: (stem_size, head_size)
775    :param color_scheme: color scheme for characters. see config.CHAR_COLORS for available options
776    :param show_x_label:  whether to show the x-axis label
777    :param show_legend: whether to show the legend
778    :param bbox_to_anchor: bounding box coordinates for the legend - see: https://matplotlib.org/stable/api/legend_api.html
779    :return matplotlib axes
780    """
781
782    # validate input
783    aln, ax = _validate_input_parameters(aln, ax)
784    _validate_color_scheme(color_scheme, aln)
785    if not isinstance(lollisize, tuple) or len(lollisize) != 2:
786        raise ValueError('lollisize must be tuple of length 2 (stem, head)')
787    for _size in lollisize:
788        if not isinstance(_size, float | int) or _size <= 0:
789            raise ValueError('lollisize must be floats greater than zero')
790
791    # define colors
792    colors = config.CHAR_COLORS[aln.aln_type][color_scheme]
793    # get snps
794    snps = aln.get_snps()
795    # define where to plot (each ref type gets a separate line)
796    ref_y_positions, y_pos, detected_var = {}, 0, set()
797
798    # iterate over snp dict
799    for pos in snps['POS']:
800        for identifier in snps['POS'][pos]:
801            # fill in y pos dict
802            if identifier == 'ref':
803                if snps['POS'][pos]['ref'] not in ref_y_positions:
804                    ref_y_positions[snps['POS'][pos]['ref']] = y_pos
805                    y_pos += 1.1
806                    continue
807            # plot
808            if identifier == 'ALT':
809                for alt in snps['POS'][pos]['ALT']:
810                    ax.vlines(x=pos + aln.zoom[0] if aln.zoom is not None else pos,
811                              ymin=ref_y_positions[snps['POS'][pos]['ref']],
812                              ymax=ref_y_positions[snps['POS'][pos]['ref']] + snps['POS'][pos]['ALT'][alt]['AF'],
813                              color=colors[alt],
814                              zorder=100,
815                              linewidth=lollisize[0]
816                              )
817                    ax.plot(pos + aln.zoom[0] if aln.zoom is not None else pos,
818                            ref_y_positions[snps['POS'][pos]['ref']] + snps['POS'][pos]['ALT'][alt]['AF'],
819                            color=colors[alt],
820                            marker='o',
821                            markersize=lollisize[1]
822                            )
823                    detected_var.add(alt)
824
825    # plot hlines
826    for y_char in ref_y_positions:
827        ax.hlines(
828            ref_y_positions[y_char],
829            xmin=aln.zoom[0] - 0.5 if aln.zoom is not None else -0.5,
830            xmax=aln.zoom[0] + aln.length + 0.5 if aln.zoom is not None else aln.length + 0.5,
831            color='black',
832            linestyle='-',
833            zorder=0,
834            linewidth=0.75
835        )
836    # create a custom legend
837    if show_legend:
838        custom_legend = [
839            ax.add_line(
840                plt.Line2D(
841                    [],
842                    [],
843                    color=colors[char],
844                    marker='o',
845                    linestyle='',
846                    markersize=5
847                )
848            ) for char in colors if char in detected_var
849        ]
850        ax.legend(
851            custom_legend,
852            [char for char in colors if char in detected_var],  # ensures correct sorting
853            loc='lower right',
854            title='variant',
855            bbox_to_anchor=bbox_to_anchor,
856            ncols=len(detected_var)/2 if aln.aln_type == 'AA' else len(detected_var),
857            frameon=False
858        )
859
860    # format axis
861    _format_x_axis(aln, ax, show_x_label, show_left=False)
862    ax.spines['left'].set_visible(False)
863    ax.set_yticks([ref_y_positions[x] for x in ref_y_positions])
864    ax.set_yticklabels(ref_y_positions.keys())
865    ax.set_ylim(0, y_pos)
866    ax.set_ylabel('reference')
867
868    return ax

Plots variants.

Parameters
  • aln: alignment MSA class or path
  • ax: matplotlib axes
  • lollisize: (stem_size, head_size)
  • color_scheme: color scheme for characters. see config.CHAR_COLORS for available options
  • show_x_label: whether to show the x-axis label
  • show_legend: whether to show the legend
  • bbox_to_anchor: bounding box coordinates for the legend - see: https: //matplotlib.org/stable/api/legend_api.html :return matplotlib axes
def orf_plot( aln: msaexplorer.explore.MSA | str, ax: matplotlib.axes._axes.Axes | None = None, min_length: int = 500, non_overlapping_orfs: bool = True, cmap: str = 'Blues', direction_marker_size: int | None = 5, show_x_label: bool = False, show_cbar: bool = False, cbar_fraction: float = 0.1) -> matplotlib.axes._axes.Axes:
939def orf_plot(aln: explore.MSA | str, ax: plt.Axes | None = None, min_length: int = 500, non_overlapping_orfs: bool = True,
940             cmap: str = 'Blues', direction_marker_size: int | None = 5, show_x_label: bool = False, show_cbar: bool = False,
941             cbar_fraction: float = 0.1) -> plt.Axes:
942    """
943    Plot conserved ORFs.
944    :param aln: alignment MSA class or path
945    :param ax: matplotlib axes
946    :param min_length: minimum length of orf
947    :param non_overlapping_orfs: whether to consider overlapping orfs
948    :param cmap: color mapping for % identity - see https://matplotlib.org/stable/users/explain/colors/colormaps.html
949    :param direction_marker_size: marker size for direction marker, not shown if marker_size == None
950    :param show_x_label: whether to show the x-axis label
951    :param show_cbar: whether to show the colorbar - see https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.colorbar.html
952    :param cbar_fraction: fraction of the original ax reserved for the colorbar
953    :return matplotlib axes
954    """
955
956    # normalize colorbar
957    cmap = ScalarMappable(norm=Normalize(0, 100), cmap=plt.get_cmap(cmap))
958    # validate input
959    aln, ax = _validate_input_parameters(aln, ax)
960
961    # get orfs --> first deepcopy and reset zoom that the orfs are also zoomed in (by default, the orfs are only
962    # searched within the zoomed region)
963    aln_temp = deepcopy(aln)
964    aln_temp.zoom = None
965    if non_overlapping_orfs:
966        annotation_dict = aln_temp.get_non_overlapping_conserved_orfs(min_length=min_length)
967    else:
968        annotation_dict = aln_temp.get_conserved_orfs(min_length=min_length)
969    # filter dict for zoom
970    if aln.zoom is not None:
971        annotation_dict = {key:val for key, val in annotation_dict.items() if max(val['location'][0][0], aln.zoom[0]) <= min(val['location'][0][1], aln.zoom[1])}
972    # add track for plotting
973    _add_track_positions(annotation_dict)
974    # plot
975    _plot_annotation(annotation_dict, ax, direction_marker_size=direction_marker_size, color=cmap)
976
977    # legend
978    if show_cbar:
979        cbar = plt.colorbar(cmap,ax=ax, location= 'top', orientation='horizontal', anchor=(1,0), shrink=0.2, pad=2/ax.bbox.height, fraction=cbar_fraction)
980        cbar.set_label('% identity')
981        cbar.set_ticks([0, 100])
982
983    # format axis
984    _format_x_axis(aln, ax, show_x_label, show_left=False)
985    ax.set_ylim(bottom=0.8)
986    ax.set_yticks([])
987    ax.set_yticklabels([])
988    ax.set_title('conserved orfs', loc='left')
989
990    return ax

Plot conserved ORFs.

Parameters
  • aln: alignment MSA class or path
  • ax: matplotlib axes
  • min_length: minimum length of orf
  • non_overlapping_orfs: whether to consider overlapping orfs
  • cmap: color mapping for % identity - see https: //matplotlib.org/stable/users/explain/colors/colormaps.html
  • direction_marker_size: marker size for direction marker, not shown if marker_size == None
  • show_x_label: whether to show the x-axis label
  • show_cbar: whether to show the colorbar - see https: //matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.colorbar.html
  • cbar_fraction: fraction of the original ax reserved for the colorbar :return matplotlib axes
def annotation_plot( aln: msaexplorer.explore.MSA | str, annotation: msaexplorer.explore.Annotation | str, feature_to_plot: str, ax: matplotlib.axes._axes.Axes | None = None, color: str = 'wheat', direction_marker_size: int | None = 5, show_x_label: bool = False) -> matplotlib.axes._axes.Axes:
 993def annotation_plot(aln: explore.MSA | str, annotation: explore.Annotation | str, feature_to_plot: str, ax: plt.Axes | None = None,
 994                    color: str = 'wheat', direction_marker_size: int | None = 5, show_x_label: bool = False) -> plt.Axes:
 995    """
 996    Plot annotations from bed, gff or gb files. Are automatically mapped to alignment.
 997    :param aln: alignment MSA class
 998    :param annotation: annotation class | path to annotation file
 999    :param ax: matplotlib axes
1000    :param feature_to_plot: potential feature to plot (not for bed files as it is parsed as one feature)
1001    :param color: color for the annotation
1002    :param direction_marker_size: marker size for direction marker, only relevant if show_direction is True
1003    :param show_x_label: whether to show the x-axis label
1004    :return matplotlib axes
1005    """
1006    # validate input
1007    aln, ax, annotation = _validate_input_parameters(aln, ax, annotation)
1008    _validate_color(color)
1009
1010    # ignore features to plot for bed files (here it is written into one feature)
1011    if annotation.ann_type == 'bed':
1012        annotation_dict = annotation.features['region']
1013        feature_to_plot = 'bed regions'
1014    else:
1015        # try to subset the annotation dict
1016        try:
1017            annotation_dict = annotation.features[feature_to_plot]
1018        except KeyError:
1019            raise KeyError(f'Feature {feature_to_plot} not found. Use annotation.features.keys() to see available features.')
1020
1021    # plotting and formating
1022    _add_track_positions(annotation_dict)
1023    _plot_annotation(annotation_dict, ax, direction_marker_size=direction_marker_size, color=color)
1024    _format_x_axis(aln, ax, show_x_label, show_left=False)
1025    ax.set_ylim(bottom=0.8)
1026    ax.set_yticks([])
1027    ax.set_yticklabels([])
1028    ax.set_title(f'{annotation.locus} ({feature_to_plot})', loc='left')
1029
1030    return ax

Plot annotations from bed, gff or gb files. Are automatically mapped to alignment.

Parameters
  • aln: alignment MSA class
  • annotation: annotation class | path to annotation file
  • ax: matplotlib axes
  • feature_to_plot: potential feature to plot (not for bed files as it is parsed as one feature)
  • color: color for the annotation
  • direction_marker_size: marker size for direction marker, only relevant if show_direction is True
  • show_x_label: whether to show the x-axis label :return matplotlib axes
def consensus_plot( aln: msaexplorer.explore.MSA | str, ax: matplotlib.axes._axes.Axes | None = None, threshold: float | None = None, use_ambig_nt: bool = False, color_scheme: str | None = 'standard', mask_color: str = 'dimgrey', ambiguity_color: str = 'black', basic_color: str = 'lightgrey', show_x_label: bool = False, show_name: bool = True, show_sequence: bool = False) -> matplotlib.axes._axes.Axes:
1103def consensus_plot(aln: explore.MSA | str, ax: plt.Axes | None = None, threshold: float | None = None, use_ambig_nt: bool = False,
1104                   color_scheme: str | None = 'standard', mask_color: str = 'dimgrey', ambiguity_color: str = 'black', basic_color: str = 'lightgrey',
1105                   show_x_label: bool = False, show_name: bool = True, show_sequence: bool = False) -> plt.Axes:
1106    """
1107    Plot a consensus sequence as a single-row colored sequence.
1108
1109    :param aln: alignment MSA class or path
1110    :param ax: matplotlib axes
1111    :param threshold: consensus threshold (0-1) or None for majority rule
1112    :param use_ambig_nt: whether to use ambiguous nucleotide codes when building consensus
1113    :param color_scheme: color scheme for characters. see config.CHAR_COLORS for available options or None
1114    :param mask_color: color used for mask characters (N/X)
1115    :param ambiguity_color: color used for ambiguous characters
1116    :param show_x_label: whether to show the x-axis label
1117    :param show_name: whether to show the 'consensus' label at y-axis
1118    :param show_sequence: whether to show the sequence at the y-axis
1119
1120    :return: matplotlib axes
1121    """
1122
1123    # validate input
1124    aln, ax = _validate_input_parameters(aln, ax)
1125    # validate colors
1126    for c in [mask_color, ambiguity_color]:
1127        _validate_color(c)
1128    # validate color scheme
1129    _validate_color_scheme(scheme=color_scheme, aln=aln)
1130
1131    # get consensus
1132    consensus = aln.get_consensus(threshold=threshold, use_ambig_nt=use_ambig_nt)
1133
1134    # prepare color mapping
1135    if color_scheme is not None:
1136        # get mapping from config
1137        char_colors = config.CHAR_COLORS[aln.aln_type][color_scheme]
1138    else:
1139        char_colors = None
1140
1141    # Build polygons and colors for a single PolyCollection
1142    polygons = []
1143    poly_colors = []
1144    text_positions = []
1145    text_chars = []
1146    text_colors = []
1147
1148    zoom_offset = aln.zoom[0] if aln.zoom is not None else 0
1149
1150    y_position = len(aln.alignment) - 0.4
1151
1152    for pos, char in enumerate(consensus):
1153        x = pos + zoom_offset
1154        # determine color
1155        if char_colors is not None and char in char_colors:
1156            color = char_colors[char]
1157        else:
1158            # ambiguous nucleotide/aminoacid codes
1159            if char in config.AMBIG_CHARS[aln.aln_type]:
1160                color = ambiguity_color
1161            elif char in ['N', 'X']:
1162                color = mask_color
1163            else:
1164                color = basic_color
1165
1166        rect = [
1167            (x - 0.5, y_position),
1168            (x + 0.5, y_position),
1169            (x + 0.5, y_position+0.8),
1170            (x - 0.5, y_position+0.8),
1171        ]
1172        polygons.append(rect)
1173        poly_colors.append(color)
1174
1175        if show_sequence:
1176            text_positions.append(x)
1177            text_chars.append(char)
1178            # determine readable text color
1179            text_colors.append(_get_contrast_text_color(to_rgba(color)))
1180
1181    # add single PolyCollection
1182    if polygons:
1183        pc = PolyCollection(polygons, facecolors=poly_colors, edgecolors=poly_colors, linewidths=0.5)
1184        ax.add_collection(pc)
1185
1186    # add texts if requested
1187    if show_sequence:
1188        for x, ch, tc in zip(text_positions, text_chars, text_colors):
1189            ax.text(x, y_position+0.4, ch, ha='center', va='center_baseline', c=tc)
1190
1191    # format axis
1192    _format_x_axis(aln=aln, ax=ax, show_x_label=show_x_label, show_left=False)
1193    ax.set_ylim(y_position-0.1, y_position + 0.9)
1194    if show_name:
1195        ax.yaxis.set_ticks_position('none')
1196        ax.set_yticks([y_position + 0.4])
1197        ax.set_yticklabels(['consensus'])
1198    else:
1199        ax.set_yticks([])
1200
1201    return ax

Plot a consensus sequence as a single-row colored sequence.

Parameters
  • aln: alignment MSA class or path
  • ax: matplotlib axes
  • threshold: consensus threshold (0-1) or None for majority rule
  • use_ambig_nt: whether to use ambiguous nucleotide codes when building consensus
  • color_scheme: color scheme for characters. see config.CHAR_COLORS for available options or None
  • mask_color: color used for mask characters (N/X)
  • ambiguity_color: color used for ambiguous characters
  • show_x_label: whether to show the x-axis label
  • show_name: whether to show the 'consensus' label at y-axis
  • show_sequence: whether to show the sequence at the y-axis
Returns

matplotlib axes