msaexplorer.draw

The draw module

The draw module lets you draw alignments and statistic plots such as SNPs, ORFs, entropy and much more. For each plot a matplotlib axes has to be passed to the plotting function.

Importantly some of the plotting features can only be accessed for nucleotide alignments but not for amino acid alignments. The functions will raise the appropriate exception in such a case.

Functions

   1r"""
   2# The draw module
   3
   4The draw module lets you draw alignments and statistic plots such as SNPs, ORFs, entropy and much more. For each plot a
   5`matplotlib axes` has to be passed to the plotting function.
   6
   7Importantly some of the plotting features can only be accessed for nucleotide alignments but not for amino acid alignments.
   8The functions will raise the appropriate exception in such a case.
   9
  10## Functions
  11
  12"""
  13import pathlib
  14# built-in
  15from itertools import chain
  16from typing import Callable, Dict
  17from copy import deepcopy
  18import os
  19
  20import matplotlib
  21from numpy import ndarray
  22
  23# MSAexplorer
  24from msaexplorer import explore, config
  25
  26# libs
  27import numpy as np
  28import matplotlib.pyplot as plt
  29import matplotlib.patches as patches
  30from matplotlib.cm import ScalarMappable
  31from matplotlib.colors import is_color_like, Normalize, to_rgba, LinearSegmentedColormap
  32from matplotlib.collections import PatchCollection, PolyCollection
  33from matplotlib.text import TextPath
  34from matplotlib.patches import PathPatch
  35from matplotlib.font_manager import FontProperties
  36from matplotlib.transforms import Affine2D
  37
  38
  39def _validate_input_parameters(aln: explore.MSA | str, ax: plt.Axes, annotation: explore.Annotation | str | None = None) \
  40        -> tuple[explore.MSA, plt.Axes, explore.Annotation] | tuple[explore.MSA, plt.Axes]:
  41    """
  42    Validate MSA class and axis.
  43    """
  44    # check if alignment is path
  45    if type(aln) is str:
  46        if os.path.exists(aln):
  47            aln = explore.MSA(aln)
  48        else:
  49            raise FileNotFoundError(f'File {aln} does not exist')
  50    # check if alignment is correct type
  51    if not isinstance(aln, explore.MSA):
  52        raise ValueError('alignment has to be an MSA class. use explore.MSA() to read in alignment')
  53    # check if a axis was created
  54    # if ax not provided generate one from scratch
  55    if ax is None:
  56        ax = plt.gca()
  57    elif not isinstance(ax, plt.Axes):
  58            raise ValueError('ax has to be an matplotlib axis')
  59    # check if annotation is correct type
  60    if annotation is not None:
  61        if type(annotation) is str:
  62            if os.path.exists(annotation):
  63                # reset zoom so the annotation is correctly parsed
  64                msa_temp = deepcopy(aln)
  65                msa_temp.zoom = None
  66                annotation = explore.Annotation(msa_temp, annotation)
  67            else:
  68                raise FileNotFoundError()
  69        if not isinstance(annotation, explore.Annotation):
  70            raise ValueError('annotation has to be an annotation class. use explore.Annotation() to read in annotation')
  71
  72    if annotation is None:
  73        return aln, ax
  74    else:
  75        return aln, ax, annotation
  76
  77
  78def _validate_color(c):
  79    """
  80    validate color and raise error
  81    """
  82    if not is_color_like(c):
  83        raise ValueError(f'{c} is not a color')
  84
  85
  86def _validate_color_scheme(scheme: str | None, aln: explore.MSA):
  87    """
  88    validates colorscheme
  89    """
  90    if scheme is not None:
  91        if scheme not in config.CHAR_COLORS[aln.aln_type].keys():
  92            raise ValueError(f'Scheme not supported. Supported: {config.CHAR_COLORS[aln.aln_type].keys()}')
  93
  94
  95def _create_color_dictionary(aln: explore.MSA, color_scheme: str, identical_char_color: str | None = None, different_char_color: str | None = None, mask_color: str | None = None, ambiguity_color: str | None = None):
  96    """
  97    create the colorscheme dictionary -> this functions adds the respective colors to a dictionary. Some have fixed values and colors of defined
  98    schemes are always the idx + 1
  99    """
 100    # basic color mapping
 101    aln_colors = {}
 102
 103    if identical_char_color is not None:
 104        aln_colors[0] = {'type': 'identical', 'color': identical_char_color}
 105    if different_char_color is not None:
 106        aln_colors[-1] = {'type': 'different', 'color': different_char_color}
 107    if mask_color is not None:
 108        aln_colors[-2] = {'type': 'mask', 'color': mask_color}
 109    if ambiguity_color is not None:
 110        aln_colors[-3] = {'type': 'ambiguity', 'color': ambiguity_color}
 111
 112    # use the standard setting for the index (same as in aln.calc_identity_alignment)
 113    # and map the corresponding color scheme to it
 114    if color_scheme is not None:
 115        for idx, char in enumerate(config.CHAR_COLORS[aln.aln_type]['standard']):
 116            aln_colors[idx + 1] = {'type': char, 'color': config.CHAR_COLORS[aln.aln_type][color_scheme][char]}
 117
 118    return aln_colors
 119
 120
 121def _format_x_axis(aln: explore.MSA, ax: plt.Axes, show_x_label: bool, show_left: bool):
 122    """
 123    General x axis formatting.
 124    """
 125    ax.set_xlim(
 126        (aln.zoom[0] - 0.5, aln.zoom[0] + aln.length - 0.5) if aln.zoom is not None else (-0.5, aln.length - 0.5)
 127    )
 128    ax.spines['top'].set_visible(False)
 129    ax.spines['right'].set_visible(False)
 130    if show_x_label:
 131        ax.set_xlabel('alignment position')
 132    if not show_left:
 133        ax.spines['left'].set_visible(False)
 134
 135
 136def _seq_names(aln: explore.MSA, ax: plt.Axes, custom_seq_names: tuple, show_seq_names: bool, include_consensus: bool = False):
 137    """
 138    Validate custom names and set show names to True. Format axis accordingly.
 139    """
 140    if custom_seq_names:
 141        show_seq_names = True
 142        if not isinstance(custom_seq_names, tuple):
 143            raise ValueError('configure your custom names list: custom_names=(name1, name2...)')
 144        if len(custom_seq_names) != len(aln.alignment.keys()):
 145            raise ValueError('length of sequences not equal to number of custom names')
 146    if show_seq_names:
 147        ax.yaxis.set_ticks_position('none')
 148        ax.set_yticks(np.arange(len(aln.alignment)))
 149        if custom_seq_names:
 150            names = custom_seq_names[::-1]
 151        else:
 152            names = [x.split(' ')[0] for x in list(aln.alignment.keys())[::-1]]
 153        if include_consensus:
 154            names = names + ['consensus']
 155            y_ticks = np.arange(len(aln.alignment) + 1)
 156        else:
 157            y_ticks = np.arange(len(aln.alignment))
 158        ax.set_yticks(y_ticks)
 159        ax.set_yticklabels(names)
 160    else:
 161        ax.set_yticks([])
 162
 163
 164def _create_legend(color_scheme: str, aln_colors: dict, aln: explore.MSA, detected_identity_values: set, ax: plt.Axes, bbox_to_anchor: tuple | list):
 165    """
 166    create the legend for the alignment and identity alignment plot
 167    """
 168    if color_scheme is not None and color_scheme != 'standard':
 169        for x in aln_colors:
 170            for group in config.CHAR_GROUPS[aln.aln_type][color_scheme]:
 171                if aln_colors[x]['type'] in config.CHAR_GROUPS[aln.aln_type][color_scheme][group]:
 172                    aln_colors[x]['type'] = group
 173                    break
 174    # create it
 175    handels, labels, detected_groups = [], [], set()
 176    for x in aln_colors:
 177        if x in detected_identity_values and aln_colors[x]['type'] not in detected_groups:
 178            handels.append(
 179                ax.add_line(
 180                    plt.Line2D(
 181                        [],
 182                        [],
 183                        color=aln_colors[x]['color'] if color_scheme != 'hydrophobicity' or x == 0 else
 184                        config.CHAR_COLORS[aln.aln_type]['hydrophobicity'][
 185                            config.CHAR_GROUPS[aln.aln_type]['hydrophobicity'][aln_colors[x]['type']][0]
 186                        ],
 187                        marker='s',
 188                        markeredgecolor='grey',
 189                        linestyle='',
 190                        markersize=10))
 191            )
 192            labels.append(aln_colors[x]['type'])
 193            detected_groups.add(aln_colors[x]['type'])
 194
 195    # ncols
 196    if color_scheme is None or aln.aln_type != 'AA':
 197        ncols = len(detected_identity_values)
 198    elif color_scheme == 'standard':
 199        ncols = (len(detected_identity_values) + 1) / 2
 200    else:
 201        ncols = (len(detected_groups) + 1) / 2
 202
 203    # plot it
 204    ax.legend(
 205        handels,
 206        labels,
 207        loc='lower right',
 208        bbox_to_anchor=bbox_to_anchor,
 209        ncols=ncols,
 210        frameon=False
 211    )
 212
 213
 214def _get_contrast_text_color(rgba_color):
 215        """
 216        compute the brightness of a color
 217        """
 218        r, g, b, a = rgba_color
 219        brightness = (r * 299 + g * 587 + b * 114) / 1000
 220
 221        return 'white' if brightness < 0.5 else 'black'
 222
 223
 224def _create_alignment(aln: explore.MSA, ax: plt.Axes, matrix: ndarray, aln_colors: dict | ScalarMappable, fancy_gaps: bool, create_identity_patch: bool,
 225                      show_gaps: bool, show_different_sequence: bool, show_sequence_all: bool, reference_color: str | None, values_to_plot: list,
 226                      identical_value: int | float = 0):
 227    """
 228    create the polygon patch collection for an alignment
 229    """
 230
 231    # helper functions
 232    def _create_identity_patch(row, aln: explore.MSA, col: list, zoom: tuple[int, int], y_position: float | int,
 233                               reference_color: str | None, seq_name: str, identity_color: str | ndarray,
 234                               show_gaps: bool):
 235        """
 236        Creates the initial patch and adds it to the collection.
 237        """
 238
 239        color = reference_color if seq_name == aln.reference_id and reference_color is not None else identity_color
 240
 241        if show_gaps:
 242            # plot a rectangle for parts that do not have gaps
 243            for stretch in _find_stretches(row, True):
 244                col.append(
 245                    patches.Rectangle(
 246                        (stretch[0] + zoom[0] - 0.5, y_position), stretch[1] - stretch[0] + 1, 0.8,
 247                        facecolor=color,
 248                        edgecolor=color,
 249                        linewidth=0.5
 250                    )
 251                )
 252        # just plot a rectangle
 253        else:
 254            col.append(
 255                patches.Rectangle(
 256                    (zoom[0] - 0.5, y_position), zoom[1] - zoom[0], 0.8,
 257                    facecolor=color,
 258                    edgecolor=color,
 259                    linewidth=0.5
 260                )
 261            )
 262
 263        return col
 264
 265    def _find_stretches(row, non_nan_only=False) -> list[tuple[int, int, int]] | list[tuple[int, int]]:
 266        """
 267        Finds consecutive stretches of values in an array, with an option to exclude NaN stretches and return start, end, value at start
 268        """
 269        if row.size == 0:
 270            return []
 271
 272        if non_nan_only:
 273            # Create a boolean mask for non-NaN values
 274            non_nan_mask = ~np.isnan(row)
 275            # Find changes in the mask
 276            changes = np.diff(non_nan_mask.astype(int)) != 0
 277            change_idx = np.nonzero(changes)[0]
 278            starts = np.concatenate(([0], change_idx + 1))
 279            ends = np.concatenate((change_idx, [len(row) - 1]))
 280
 281            # Return only stretches that start with non-NaN values
 282            return [(start, end) for start, end in zip(starts, ends) if non_nan_mask[start]]
 283
 284        else:
 285            # Find change points: where adjacent cells differ.
 286            changes = np.diff(row) != 0
 287            change_idx = np.nonzero(changes)[0]
 288            starts = np.concatenate(([0], change_idx + 1))
 289            ends = np.concatenate((change_idx, [len(row) - 1]))
 290
 291            return [(start, end, row[start]) for start, end in zip(starts, ends)]
 292
 293    def _create_polygons(stretches: list, values_to_plot: list | ndarray, zoom: tuple, y_position: int | float,
 294                         polygons: list, aln_colors: dict | ScalarMappable, polygon_colors: list,
 295                         detected_identity_values: set | None = None):
 296        """
 297        create the individual polygons for the heatmap (do not plot each cell but pre-compute if cells are the same and adjacent to each other)
 298        """
 299
 300        for start, end, value in stretches:
 301            # check if this is a value for which we might want to create a polygon
 302            # e.g. we might not want to create a polygon for identical values
 303            if value not in values_to_plot:
 304                continue
 305            # add values to this set - > this is important for correct legend creation
 306            if detected_identity_values is not None:
 307                detected_identity_values.add(value)
 308            width = end + 1 - start
 309            # Calculate x coordinates adjusted for zoom and centering
 310            x0 = start + zoom[0] - 0.5
 311            x1 = x0 + width
 312            # Define the rectangle corners
 313            rect_coords = [
 314                (x0, y_position),
 315                (x1, y_position),
 316                (x1, y_position + 0.8),
 317                (x0, y_position + 0.8),
 318                (x0, y_position)
 319            ]
 320            polygons.append(rect_coords)
 321            if type(aln_colors) != ScalarMappable:
 322                polygon_colors.append(aln_colors[value]['color'])
 323            else:
 324                polygon_colors.append(aln_colors.to_rgba(value))
 325
 326        return detected_identity_values, polygons, polygon_colors
 327
 328    def _plot_sequence_text(aln: explore.MSA, seq_name: str, ref_name: str | None, always_text: bool, values: ndarray,
 329                            matrix: ndarray, ax: plt.Axes, zoom: tuple, y_position: int | float, value_to_skip: int | None,
 330                            ref_color: str, show_gaps: bool, aln_colors: dict | ScalarMappable = None):
 331        """
 332        Plot sequence text - however this will be done even if there is not enough space.
 333        """
 334        x_text = 0
 335        if seq_name == ref_name and ref_color is not None:
 336            different_cols = np.any((matrix != value_to_skip) & ~np.isnan(matrix), axis=0)
 337        else:
 338            different_cols = [False] * aln.length
 339
 340        for idx, (character, value) in enumerate(zip(aln.alignment[seq_name], values)):
 341            if value != value_to_skip and character != '-' or seq_name == ref_name and character != '-' or character == '-' and not show_gaps or always_text and character != '-':
 342                if seq_name == ref_name and ref_color is not None:
 343                    text_color = _get_contrast_text_color(to_rgba(ref_color))
 344                elif type(aln_colors) is ScalarMappable:
 345                    text_color = _get_contrast_text_color(aln_colors.to_rgba(value))
 346                else:
 347                    text_color = _get_contrast_text_color(to_rgba(aln_colors[value]['color']))
 348
 349                ax.text(
 350                    x=x_text + zoom[0] if zoom is not None else x_text,
 351                    y=y_position + 0.4,
 352                    s=character,
 353                    fontweight='bold' if different_cols[idx] else 'normal',
 354                    ha='center',
 355                    va='center_baseline',
 356                    c=text_color if value != value_to_skip or seq_name == ref_name and ref_color is not None else 'dimgrey'
 357                )
 358            x_text += 1
 359
 360    # List to store polygons
 361    detected_identity_values = {0}
 362    polygons, polygon_colors, patch_list = [], [], []
 363    # determine zoom
 364    zoom = (0, aln.length) if aln.zoom is None else aln.zoom
 365    for i, seq_name in enumerate(aln.alignment):
 366        # define initial y position
 367        y_position = len(aln.alignment) - i - 1.4
 368        # now plot relevant stuff for the current row
 369        row = matrix[i]
 370        # plot a line below everything for fancy gaps
 371        if fancy_gaps:
 372            ax.hlines(
 373                y_position + 0.4,
 374                xmin=zoom[0] - 0.5,
 375                xmax=zoom[1] + 0.5,
 376                color='black',
 377                linestyle='-',
 378                zorder=0,
 379                linewidth=0.75
 380            )
 381        # plot the basic shape per sequence with gaps
 382        if create_identity_patch:
 383            if type(aln_colors) == dict:
 384                identity_color = aln_colors[0]['color']
 385            else:
 386                identity_color = aln_colors.to_rgba(1)  # for similarity alignment
 387            patch_list = _create_identity_patch(row, aln, patch_list, zoom, y_position, reference_color, seq_name, identity_color, show_gaps)
 388        # find consecutive stretches
 389        stretches = _find_stretches(row)
 390        # create polygons per stretch
 391        detected_identity_values, polygons, polygon_colors = _create_polygons(
 392            stretches=stretches, values_to_plot=values_to_plot, zoom=zoom, y_position=y_position, polygons=polygons,
 393            aln_colors=aln_colors, polygon_colors=polygon_colors, detected_identity_values=detected_identity_values
 394        )
 395        # add sequence text
 396        if show_different_sequence or show_sequence_all:
 397            _plot_sequence_text(
 398                aln=aln, seq_name=list(aln.alignment.keys())[i], ref_name=aln.reference_id, always_text=show_sequence_all,
 399                values=matrix[i], matrix=matrix, ax=ax, zoom=zoom, y_position=y_position, value_to_skip=identical_value, ref_color=reference_color,
 400                show_gaps=show_gaps, aln_colors=aln_colors
 401            )
 402    # Create the LineCollection: each segment is drawn in a single call.
 403    ax.add_collection(PatchCollection(patch_list, match_original=True, linewidths='none', joinstyle='miter', capstyle='butt'))
 404    # NOTE: linewidth are important so in large overview plot the differences are still visible
 405    ax.add_collection(PolyCollection(polygons, facecolors=polygon_colors, linewidths=0.5, edgecolors=polygon_colors))
 406
 407
 408    return detected_identity_values
 409
 410
 411def alignment(aln: explore.MSA | str, ax: plt.Axes | None = None, show_sequence_all: bool = False, show_seq_names: bool = False,
 412              custom_seq_names: tuple | list = (), mask_color: str = 'dimgrey', ambiguity_color: str = 'black', basic_color: str = 'lightgrey',
 413              show_mask:bool = True, fancy_gaps:bool = False, show_ambiguities: bool = False, color_scheme: str | None = 'standard',
 414              show_x_label: bool = True, show_legend: bool = False, bbox_to_anchor: tuple[float|int, float|int] | list= (1, 1),
 415              show_consensus: bool = False) -> plt.Axes:
 416    """
 417
 418    Plot an alignment with each character colored as defined in the scheme. This is computationally more intensive as 
 419    the identity alignments and similarity alignment function as each square for each character is individually plotted.
 420
 421    :param aln: alignment MSA class or path
 422    :param ax: matplotlib axes
 423    :param show_seq_names: whether to show seq names
 424    :param show_sequence_all: whether to show all sequences - zoom in to avoid plotting issues
 425    :param custom_seq_names: custom seq names
 426    :param mask_color: color for masked nucleotides/aminoacids
 427    :param ambiguity_color: color for ambiguous nucleotides/aminoacids
 428    :param basic_color: color that will be used for all normal chars if the colorscheme is None
 429    :param show_mask: whether to show N or X chars otherwise it will be shown as match or mismatch
 430    :param fancy_gaps: show gaps with a small black bar
 431    :param show_ambiguities: whether to show non-N ambiguities -> only relevant for RNA/DNA sequences
 432    :param color_scheme: color mismatching chars with their unique color. Options for DNA/RNA are: standard, purine_pyrimidine, strong_weak; and for AS: standard, clustal, zappo, hydrophobicity. Will overwrite different_char_color.
 433    :param show_x_label: whether to show x label
 434    :param show_legend: whether to show the legend
 435    :param bbox_to_anchor: bounding box coordinates for the legend - see: https://matplotlib.org/stable/api/legend_api.html
 436    :param show_consensus: whether to show the majority consensus sequence (standard-color scheme)
 437    :return  matplotlib axis
 438    """
 439    # Validate aln, ax inputs
 440    aln, ax = _validate_input_parameters(aln=aln, ax=ax)
 441    # Validate colors
 442    for c in [mask_color, ambiguity_color, basic_color]:
 443        _validate_color(c)
 444    # Validate color scheme
 445    _validate_color_scheme(scheme=color_scheme, aln=aln)
 446    # create color mapping
 447    aln_colors = _create_color_dictionary(
 448        aln=aln, color_scheme=color_scheme, identical_char_color=basic_color,
 449        different_char_color=None, mask_color=mask_color, ambiguity_color=ambiguity_color
 450    )
 451    # Compute identity alignment
 452    numerical_alignment = aln.calc_numerical_alignment(encode_ambiguities=show_ambiguities, encode_mask=show_mask)
 453    if color_scheme is None:
 454        numerical_alignment[np.where(numerical_alignment > 0)] = 0
 455    # create the alignment
 456    detected_identity_values = _create_alignment(
 457        aln=aln, ax=ax, matrix=numerical_alignment, fancy_gaps=fancy_gaps, create_identity_patch=True if color_scheme is None else False, show_gaps=True,
 458        show_different_sequence=False, show_sequence_all=show_sequence_all, reference_color=None, aln_colors=aln_colors,
 459        values_to_plot = list(aln_colors.keys())
 460    )
 461    # custom legend
 462    if show_legend:
 463        aln_colors.pop(0)  # otherwise legend is wrong (here there is no check if a position is identical or not)
 464        _create_legend(
 465            color_scheme=color_scheme, aln_colors=aln_colors, aln=aln,
 466            detected_identity_values=detected_identity_values,
 467            ax=ax, bbox_to_anchor=bbox_to_anchor
 468        )
 469    if show_consensus:
 470        consensus_plot(aln=aln, ax=ax, show_x_label=show_x_label, show_name=False, show_sequence=show_sequence_all,
 471            color_scheme='standard', basic_color=basic_color, mask_color=mask_color, ambiguity_color=ambiguity_color
 472        )
 473        ax.set_ylim(-0.5, len(aln.alignment) + 1)
 474    else:
 475        ax.set_ylim(-0.5, len(aln.alignment))
 476
 477    _seq_names(aln=aln, ax=ax, custom_seq_names=custom_seq_names, show_seq_names=show_seq_names, include_consensus=show_consensus)
 478    # configure axis
 479    _format_x_axis(aln=aln, ax=ax, show_x_label=show_x_label, show_left=False)
 480
 481    return ax
 482
 483
 484
 485def identity_alignment(aln: explore.MSA | str, ax: plt.Axes | None = None, show_title: bool = True, show_identity_sequence: bool = False,
 486                       show_sequence_all: bool = False, show_seq_names: bool = False, custom_seq_names: tuple | list = (),
 487                       reference_color: str = 'lightsteelblue', basic_color: str = 'lightgrey', different_char_color: str = 'peru',
 488                       mask_color: str = 'dimgrey', ambiguity_color: str = 'black', show_mask:bool = True, show_gaps:bool = True,
 489                       fancy_gaps:bool = False, show_mismatches: bool = True, show_ambiguities: bool = False,
 490                       color_scheme: str | None = None, show_x_label: bool = True, show_legend: bool = False,
 491                       bbox_to_anchor: tuple[float|int, float|int] | list = (1, 1), show_consensus: bool = False) -> plt.Axes:
 492    """
 493    Generates an identity alignment plot.
 494    :param aln: alignment MSA class or path
 495    :param ax: matplotlib axes
 496    :param show_title: whether to show title
 497    :param show_seq_names: whether to show seq names
 498    :param show_identity_sequence: whether to show sequence for only differences and reference - zoom in to avoid plotting issues
 499    :param show_sequence_all: whether to show all sequences - zoom in to avoid plotting issues
 500    :param custom_seq_names: custom seq names
 501    :param reference_color: color of reference sequence
 502    :param basic_color: color for identical nucleotides/aminoacids
 503    :param different_char_color: color for different nucleotides/aminoacids
 504    :param mask_color: color for masked nucleotides/aminoacids
 505    :param ambiguity_color: color for ambiguous nucleotides/aminoacids
 506    :param show_mask: whether to show N or X chars otherwise it will be shown as match or mismatch
 507    :param show_gaps: whether to show gaps otherwise it will be shown as match or mismatch
 508    :param fancy_gaps: show gaps with a small black bar
 509    :param show_mismatches: whether to show mismatches otherwise it will be shown as match
 510    :param show_ambiguities: whether to show non-N ambiguities -> only relevant for RNA/DNA sequences
 511    :param color_scheme: color mismatching chars with their unique color. Options for DNA/RNA are: standard, purine_pyrimidine, strong_weak; and for AS: standard, clustal, zappo, hydrophobicity. Will overwrite different_char_color.
 512    :param show_x_label: whether to show x label
 513    :param show_legend: whether to show the legend
 514    :param bbox_to_anchor: bounding box coordinates for the legend - see: https://matplotlib.org/stable/api/legend_api.html
 515    :param show_consensus: whether to show the majority consensus sequence (standard-color scheme)
 516
 517    :return  matplotlib axis
 518    """
 519
 520    # Both options for gaps work hand in hand
 521    if fancy_gaps:
 522        show_gaps = True
 523    # Validate aln, ax inputs
 524    aln, ax = _validate_input_parameters(aln=aln, ax=ax)
 525    # Validate colors
 526    for c in [reference_color, basic_color, different_char_color, mask_color, ambiguity_color]:
 527        _validate_color(c)
 528    # Validate color scheme
 529    _validate_color_scheme(scheme=color_scheme, aln=aln)
 530    # create color mapping
 531    aln_colors = _create_color_dictionary(
 532        aln=aln, color_scheme=color_scheme, identical_char_color=basic_color, different_char_color=different_char_color,
 533        mask_color=mask_color, ambiguity_color=ambiguity_color
 534    )
 535    # Compute identity alignment
 536    identity_aln = aln.calc_identity_alignment(
 537        encode_mask=show_mask, encode_gaps=show_gaps, encode_mismatches=show_mismatches,encode_ambiguities=show_ambiguities,
 538        encode_each_mismatch_char=True if color_scheme is not None else False
 539    )
 540    # create the alignment
 541    detected_identity_values = _create_alignment(
 542        aln=aln, ax=ax, matrix=identity_aln, fancy_gaps=fancy_gaps, create_identity_patch=True, show_gaps=show_gaps,
 543        show_different_sequence=show_identity_sequence, show_sequence_all=show_sequence_all, reference_color=reference_color,
 544        aln_colors=aln_colors, values_to_plot=list(aln_colors.keys())[1:]
 545    )
 546    # custom legend
 547    if show_legend:
 548        _create_legend(
 549            color_scheme=color_scheme, aln_colors=aln_colors, aln=aln, detected_identity_values=detected_identity_values,
 550            ax=ax, bbox_to_anchor=bbox_to_anchor
 551        )
 552    _seq_names(aln=aln, ax=ax, custom_seq_names=custom_seq_names, show_seq_names=show_seq_names)
 553    # configure axis
 554    if show_consensus:
 555        consensus_plot(aln=aln, ax=ax, show_x_label=show_x_label, show_name=False, show_sequence=any([show_sequence_all, show_identity_sequence]),
 556                       color_scheme='standard', basic_color=basic_color, mask_color=mask_color,
 557                       ambiguity_color=ambiguity_color
 558                       )
 559        ax.set_ylim(-0.5, len(aln.alignment) + 1)
 560    else:
 561        ax.set_ylim(-0.5, len(aln.alignment))
 562
 563    _seq_names(aln=aln, ax=ax, custom_seq_names=custom_seq_names, show_seq_names=show_seq_names,
 564               include_consensus=show_consensus)
 565    if show_title:
 566        ax.set_title('identity', loc='left')
 567    _format_x_axis(aln=aln, ax=ax, show_x_label=show_x_label, show_left=False)
 568
 569    return ax
 570
 571
 572def similarity_alignment(aln: explore.MSA | str, ax: plt.Axes | None = None, matrix_type: str | None = None, show_title: bool = True,
 573                         show_similarity_sequence: bool = False, show_sequence_all: bool = False, show_seq_names: bool = False,
 574                         custom_seq_names: tuple | list = (), reference_color: str = 'lightsteelblue', different_char_color: str = 'peru', basic_color: str = 'lightgrey',
 575                         show_gaps:bool = True, fancy_gaps:bool = False, show_x_label: bool = True, show_cbar: bool = False,
 576                         cbar_fraction: float = 0.1, show_consensus: bool = False)  -> plt.Axes:
 577    """
 578    Generates a similarity alignment plot. Importantly the similarity values are normalized!
 579    :param aln: alignment MSA class or path
 580    :param ax: matplotlib axes
 581    :param matrix_type: substitution matrix - see config.SUBS_MATRICES, standard: NT - TRANS, AA - BLOSUM65
 582    :param show_title: whether to show title
 583    :param show_similarity_sequence: whether to show sequence only for differences and reference - zoom in to avoid plotting issues
 584    :param show_sequence_all: whether to show all sequences - zoom in to avoid plotting issues
 585    :param show_seq_names: whether to show seq names
 586    :param custom_seq_names: custom seq names
 587    :param reference_color: color of reference sequence
 588    :param different_char_color: color for the lowest similarity
 589    :param basic_color: basic color for the highest similarity
 590    :param show_gaps: whether to show gaps otherwise it will be ignored
 591    :param fancy_gaps: show gaps with a small black bar
 592    :param show_x_label: whether to show x label
 593    :param show_cbar: whether to show the legend - see https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.colorbar.html
 594    :param cbar_fraction: fraction of the original ax reserved for the legend
 595    :param show_consensus: whether to show the majority consensus sequence (standard color scheme, no specical handling for special characters)
 596    :return  matplotlib axis
 597    """
 598    # Both options for gaps work hand in hand
 599    if fancy_gaps:
 600        show_gaps = True
 601    # Validate aln, ax inputs
 602    aln, ax = _validate_input_parameters(aln=aln, ax=ax)
 603    # Validate colors
 604    for c in [reference_color, different_char_color, basic_color]:
 605        _validate_color(c)
 606    # Compute similarity alignment
 607    similarity_aln = aln.calc_similarity_alignment(matrix_type=matrix_type)  # use normalized values here
 608    similarity_aln = similarity_aln.round(2)  # round data for good color mapping
 609    # create cmap
 610    cmap = LinearSegmentedColormap.from_list(
 611        "extended",
 612        [
 613            (0.0, different_char_color),
 614            (1.0, basic_color)
 615        ],
 616    )
 617    cmap = ScalarMappable(norm=Normalize(vmin=0, vmax=1), cmap=cmap)
 618    # create similarity values
 619    similarity_values = np.arange(start=0, stop=1, step=0.01)
 620    # round it to be absolutely sure that values match with rounded sim alignment
 621    similarity_values = list(similarity_values.round(2))
 622    # create the alignment
 623    _create_alignment(
 624        aln=aln, ax=ax, matrix=similarity_aln, fancy_gaps=fancy_gaps, create_identity_patch=True, show_gaps=show_gaps,
 625        show_different_sequence=show_similarity_sequence, show_sequence_all=show_sequence_all, reference_color=reference_color,
 626        aln_colors=cmap, values_to_plot=similarity_values, identical_value=1
 627    )
 628    # legend
 629    if show_cbar:
 630        cbar = plt.colorbar(cmap, ax=ax, location= 'top', anchor=(1,0), shrink=0.2, pad=2/ax.bbox.height, fraction=cbar_fraction)
 631        cbar.set_ticks([0, 1])
 632        cbar.set_ticklabels(['low', 'high'])
 633
 634    # format seq names
 635    _seq_names(aln, ax, custom_seq_names, show_seq_names)
 636
 637    # configure axis
 638    if show_consensus:
 639        consensus_plot(aln=aln, ax=ax, show_x_label=show_x_label, show_name=False, show_sequence=any([show_sequence_all, show_similarity_sequence]),
 640                       color_scheme='standard', basic_color=basic_color)
 641        ax.set_ylim(-0.5, len(aln.alignment) + 1)
 642    else:
 643        ax.set_ylim(-0.5, len(aln.alignment))
 644
 645    _seq_names(aln=aln, ax=ax, custom_seq_names=custom_seq_names, show_seq_names=show_seq_names,
 646               include_consensus=show_consensus)
 647    if show_title:
 648        ax.set_title('similarity', loc='left')
 649    _format_x_axis(aln, ax, show_x_label, show_left=False)
 650
 651    return ax
 652
 653
 654def _moving_average(arr: ndarray, window_size: int, zoom: tuple | None, aln_length: int) -> tuple[ndarray, ndarray]:
 655    """
 656    Calculate the moving average of an array.
 657    :param arr: array with values
 658    :param window_size: size of the moving average
 659    :param zoom: zoom of the alignment
 660    :param aln_length: length of the alignment
 661    :return: new array with moving average
 662    """
 663    if window_size > 1:
 664        i = 0
 665        moving_averages, plotting_idx = [], []
 666        while i < len(arr) + 1:
 667            half_window_size = window_size // 2
 668            window_left = arr[i - half_window_size : i] if i > half_window_size else arr[0:i]
 669            window_right = arr[i: i + half_window_size] if i < len(arr) - half_window_size else arr[i: len(arr)]
 670            moving_averages.append((sum(window_left) + sum(window_right)) / (len(window_left) + len(window_right)))
 671            plotting_idx.append(i)
 672            i += 1
 673
 674        return np.array(moving_averages), np.array(plotting_idx) if zoom is None else np.array(plotting_idx) + zoom[0]
 675    else:
 676        return arr, np.arange(zoom[0], zoom[1]) if zoom is not None else np.arange(aln_length)
 677
 678
 679def stat_plot(aln: explore.MSA | str, stat_type: str, ax: plt.Axes | None = None, line_color: str = 'burlywood',
 680              line_width: int | float = 2, rolling_average: int = 20, show_x_label: bool = False, show_title: bool = True) -> plt.Axes:
 681    """
 682    Generate a plot for the various alignment stats.
 683    :param aln: alignment MSA class or path
 684    :param ax: matplotlib axes
 685    :param stat_type: 'entropy', 'gc', 'coverage', 'ts tv score', gap frequency, 'identity' or 'similarity' -> (here default matrices are used NT - TRANS, AA - BLOSUM65)
 686    :param line_color: color of the line
 687    :param line_width: width of the line
 688    :param rolling_average: average rolling window size left and right of a position in nucleotides or amino acids
 689    :param show_x_label: whether to show the x-axis label
 690    :param show_title: whether to show the title
 691    :return matplotlib axes
 692    """
 693
 694    # input check
 695    aln, ax = _validate_input_parameters(aln, ax)
 696
 697    # define possible functions to calc here
 698    stat_functions: Dict[str, Callable[[], list | ndarray]] = {
 699        'gc': aln.calc_gc,
 700        'entropy': aln.calc_entropy,
 701        'coverage': aln.calc_coverage,
 702        'identity': aln.calc_identity_alignment,
 703        'similarity': aln.calc_similarity_alignment,
 704        'ts tv score': aln.calc_transition_transversion_score,
 705        'gap frequency': aln.calc_gap_frequency
 706    }
 707
 708    if stat_type not in stat_functions:
 709        raise ValueError('stat_type must be one of {}'.format(list(stat_functions.keys())))
 710
 711    _validate_color(line_color)
 712    if rolling_average < 1 or rolling_average > aln.length:
 713        raise ValueError('rolling_average must be between 1 and length of sequence')
 714
 715    # generate input data
 716    array = stat_functions[stat_type]()
 717
 718    if stat_type == 'identity':
 719        min_value, max_value = -1, 0
 720    elif stat_type == 'ts tv score':
 721        min_value, max_value = -1, 1
 722    else:
 723        min_value, max_value = 0, 1
 724    if stat_type in ['identity', 'similarity']:
 725        # for the mean nan values get handled as the lowest possible number in the matrix
 726        array = np.nan_to_num(array, True, min_value)
 727        array = np.mean(array, axis=0)
 728    data, plot_idx = _moving_average(array, rolling_average, aln.zoom, aln.length)
 729
 730    # plot the data
 731    ax.fill_between(
 732        # this add dummy data left and right for better plotting
 733        # otherwise only half of the step is shown
 734        np.concatenate(([plot_idx[0] - 0.5], plot_idx, [plot_idx[-1] + 0.5])) if rolling_average == 1 else plot_idx,
 735        np.concatenate(([data[0]], data, [data[-1]])) if rolling_average == 1 else data,
 736        min_value,
 737        linewidth = line_width,
 738        edgecolor=line_color,
 739        step='mid' if rolling_average == 1 else None,
 740        facecolor=(line_color, 0.6) if stat_type not in ['ts tv score', 'gc'] else 'none'
 741    )
 742    if stat_type == 'gc':
 743        ax.hlines(0.5, xmin=0, xmax=aln.zoom[0] + aln.length if aln.zoom is not None else aln.length, color='black', linestyles='--', linewidth=1)
 744
 745    # format axis
 746    ax.set_ylim(min_value, max_value*0.1+max_value)
 747    ax.set_yticks([min_value, max_value])
 748    if stat_type == 'gc':
 749        ax.set_yticklabels(['0', '100'])
 750    elif stat_type == 'ts tv score':
 751        ax.set_yticklabels(['tv', 'ts'])
 752    else:
 753        ax.set_yticklabels(['low', 'high'])
 754
 755    # show title
 756    if show_title:
 757        ax.set_title(
 758            f'{stat_type} (average over {rolling_average} positions)' if rolling_average > 1 else f'{stat_type} for each position',
 759            loc='left'
 760        )
 761
 762    _format_x_axis(aln, ax, show_x_label, show_left=True)
 763
 764    return ax
 765
 766
 767def variant_plot(aln: explore.MSA | str, ax: plt.Axes | None = None, lollisize: tuple[int, int] | list = (1, 3),
 768                 color_scheme: str = 'standard', show_x_label: bool = False, show_legend: bool = True,
 769                 bbox_to_anchor: tuple[float|int, float|int] | list = (1, 1)) -> plt.Axes:
 770    """
 771    Plots variants.
 772    :param aln: alignment MSA class or path
 773    :param ax: matplotlib axes
 774    :param lollisize: (stem_size, head_size)
 775    :param color_scheme: color scheme for characters. see config.CHAR_COLORS for available options
 776    :param show_x_label:  whether to show the x-axis label
 777    :param show_legend: whether to show the legend
 778    :param bbox_to_anchor: bounding box coordinates for the legend - see: https://matplotlib.org/stable/api/legend_api.html
 779    :return matplotlib axes
 780    """
 781
 782    # validate input
 783    aln, ax = _validate_input_parameters(aln, ax)
 784    _validate_color_scheme(color_scheme, aln)
 785    if not isinstance(lollisize, tuple) or len(lollisize) != 2:
 786        raise ValueError('lollisize must be tuple of length 2 (stem, head)')
 787    for _size in lollisize:
 788        if not isinstance(_size, float | int) or _size <= 0:
 789            raise ValueError('lollisize must be floats greater than zero')
 790
 791    # define colors
 792    colors = config.CHAR_COLORS[aln.aln_type][color_scheme]
 793    # get snps
 794    snps = aln.get_snps()
 795    # define where to plot (each ref type gets a separate line)
 796    ref_y_positions, y_pos, detected_var = {}, 0, set()
 797
 798    # iterate over snp dict
 799    for pos in snps['POS']:
 800        for identifier in snps['POS'][pos]:
 801            # fill in y pos dict
 802            if identifier == 'ref':
 803                if snps['POS'][pos]['ref'] not in ref_y_positions:
 804                    ref_y_positions[snps['POS'][pos]['ref']] = y_pos
 805                    y_pos += 1.1
 806                    continue
 807            # plot
 808            if identifier == 'ALT':
 809                for alt in snps['POS'][pos]['ALT']:
 810                    ax.vlines(x=pos + aln.zoom[0] if aln.zoom is not None else pos,
 811                              ymin=ref_y_positions[snps['POS'][pos]['ref']],
 812                              ymax=ref_y_positions[snps['POS'][pos]['ref']] + snps['POS'][pos]['ALT'][alt]['AF'],
 813                              color=colors[alt],
 814                              zorder=100,
 815                              linewidth=lollisize[0]
 816                              )
 817                    ax.plot(pos + aln.zoom[0] if aln.zoom is not None else pos,
 818                            ref_y_positions[snps['POS'][pos]['ref']] + snps['POS'][pos]['ALT'][alt]['AF'],
 819                            color=colors[alt],
 820                            marker='o',
 821                            markersize=lollisize[1]
 822                            )
 823                    detected_var.add(alt)
 824
 825    # plot hlines
 826    for y_char in ref_y_positions:
 827        ax.hlines(
 828            ref_y_positions[y_char],
 829            xmin=aln.zoom[0] - 0.5 if aln.zoom is not None else -0.5,
 830            xmax=aln.zoom[0] + aln.length + 0.5 if aln.zoom is not None else aln.length + 0.5,
 831            color='black',
 832            linestyle='-',
 833            zorder=0,
 834            linewidth=0.75
 835        )
 836    # create a custom legend
 837    if show_legend:
 838        custom_legend = [
 839            ax.add_line(
 840                plt.Line2D(
 841                    [],
 842                    [],
 843                    color=colors[char],
 844                    marker='o',
 845                    linestyle='',
 846                    markersize=5
 847                )
 848            ) for char in colors if char in detected_var
 849        ]
 850        ax.legend(
 851            custom_legend,
 852            [char for char in colors if char in detected_var],  # ensures correct sorting
 853            loc='lower right',
 854            title='variant',
 855            bbox_to_anchor=bbox_to_anchor,
 856            ncols=len(detected_var)/2 if aln.aln_type == 'AA' else len(detected_var),
 857            frameon=False
 858        )
 859
 860    # format axis
 861    _format_x_axis(aln, ax, show_x_label, show_left=False)
 862    ax.spines['left'].set_visible(False)
 863    ax.set_yticks([ref_y_positions[x] for x in ref_y_positions])
 864    ax.set_yticklabels(ref_y_positions.keys())
 865    ax.set_ylim(0, y_pos)
 866    ax.set_ylabel('reference')
 867
 868    return ax
 869
 870
 871
 872def _plot_annotation(annotation_dict: dict, ax: plt.Axes, direction_marker_size: int | None, color: str | ScalarMappable):
 873    """
 874    Plot annotation rectangles
 875    """
 876    for annotation in annotation_dict:
 877        for locations in annotation_dict[annotation]['location']:
 878            x_value = locations[0]
 879            length = locations[1] - locations[0]
 880            ax.add_patch(
 881                patches.FancyBboxPatch(
 882                    (x_value, annotation_dict[annotation]['track'] + 1),
 883                    length,
 884                    0.8,
 885                    boxstyle="Round, pad=0",
 886                    ec="black",
 887                    fc=color.to_rgba(annotation_dict[annotation]['conservation']) if isinstance(color, ScalarMappable) else color,
 888                )
 889            )
 890            if direction_marker_size is not None:
 891                if annotation_dict[annotation]['strand'] == '-':
 892                    marker = '<'
 893                else:
 894                    marker = '>'
 895                ax.plot(x_value + length/2, annotation_dict[annotation]['track'] + 1.4, marker=marker, markersize=direction_marker_size, color='white', markeredgecolor='black')
 896
 897        # plot linked annotations (such as splicing)
 898        if len(annotation_dict[annotation]['location']) > 1:
 899            y_value = annotation_dict[annotation]['track'] + 1.4
 900            start = None
 901            for locations in annotation_dict[annotation]['location']:
 902                if start is None:
 903                    start = locations[1]
 904                    continue
 905                ax.plot([start, locations[0]], [y_value, y_value], '--', linewidth=2, color='black')
 906                start = locations[1]
 907
 908
 909def _add_track_positions(annotation_dic):
 910    """
 911    define the position for annotations square so that overlapping annotations do not overlap in the plot
 912    """
 913    # create a dict and sort
 914    annotation_dic = dict(sorted(annotation_dic.items(), key=lambda x: x[1]['location'][0][0]))
 915
 916    # remember for each track the largest stop
 917    track_stops = [0]
 918
 919    for ann in annotation_dic:
 920        flattened_locations = list(chain.from_iterable(annotation_dic[ann]['location']))  # flatten list
 921        track = 0
 922        # check if a start of a gene is smaller than the stop of the current track
 923        # -> move to new track
 924        while flattened_locations[0] < track_stops[track]:
 925            track += 1
 926            # if all prior tracks are potentially causing an overlap
 927            # create a new track and break
 928            if len(track_stops) <= track:
 929                track_stops.append(0)
 930                break
 931        # in the current track remember the stop of the current gene
 932        track_stops[track] = flattened_locations[-1]
 933        # and indicate the track in the dict
 934        annotation_dic[ann]['track'] = track
 935
 936    return annotation_dic
 937
 938
 939def orf_plot(aln: explore.MSA | str, ax: plt.Axes | None = None, min_length: int = 500, non_overlapping_orfs: bool = True,
 940             cmap: str = 'Blues', direction_marker_size: int | None = 5, show_x_label: bool = False, show_cbar: bool = False,
 941             cbar_fraction: float = 0.1) -> plt.Axes:
 942    """
 943    Plot conserved ORFs.
 944    :param aln: alignment MSA class or path
 945    :param ax: matplotlib axes
 946    :param min_length: minimum length of orf
 947    :param non_overlapping_orfs: whether to consider overlapping orfs
 948    :param cmap: color mapping for % identity - see https://matplotlib.org/stable/users/explain/colors/colormaps.html
 949    :param direction_marker_size: marker size for direction marker, not shown if marker_size == None
 950    :param show_x_label: whether to show the x-axis label
 951    :param show_cbar: whether to show the colorbar - see https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.colorbar.html
 952    :param cbar_fraction: fraction of the original ax reserved for the colorbar
 953    :return matplotlib axes
 954    """
 955
 956    # normalize colorbar
 957    cmap = ScalarMappable(norm=Normalize(0, 100), cmap=plt.get_cmap(cmap))
 958    # validate input
 959    aln, ax = _validate_input_parameters(aln, ax)
 960
 961    # get orfs --> first deepcopy and reset zoom that the orfs are also zoomed in (by default, the orfs are only
 962    # searched within the zoomed region)
 963    aln_temp = deepcopy(aln)
 964    aln_temp.zoom = None
 965    if non_overlapping_orfs:
 966        annotation_dict = aln_temp.get_non_overlapping_conserved_orfs(min_length=min_length)
 967    else:
 968        annotation_dict = aln_temp.get_conserved_orfs(min_length=min_length)
 969    # filter dict for zoom
 970    if aln.zoom is not None:
 971        annotation_dict = {key:val for key, val in annotation_dict.items() if max(val['location'][0][0], aln.zoom[0]) <= min(val['location'][0][1], aln.zoom[1])}
 972    # add track for plotting
 973    _add_track_positions(annotation_dict)
 974    # plot
 975    _plot_annotation(annotation_dict, ax, direction_marker_size=direction_marker_size, color=cmap)
 976
 977    # legend
 978    if show_cbar:
 979        cbar = plt.colorbar(cmap,ax=ax, location= 'top', orientation='horizontal', anchor=(1,0), shrink=0.2, pad=2/ax.bbox.height, fraction=cbar_fraction)
 980        cbar.set_label('% identity')
 981        cbar.set_ticks([0, 100])
 982
 983    # format axis
 984    _format_x_axis(aln, ax, show_x_label, show_left=False)
 985    ax.set_ylim(bottom=0.8)
 986    ax.set_yticks([])
 987    ax.set_yticklabels([])
 988    ax.set_title('conserved orfs', loc='left')
 989
 990    return ax
 991
 992
 993def annotation_plot(aln: explore.MSA | str, annotation: explore.Annotation | str, feature_to_plot: str, ax: plt.Axes | None = None,
 994                    color: str = 'wheat', direction_marker_size: int | None = 5, show_x_label: bool = False) -> plt.Axes:
 995    """
 996    Plot annotations from bed, gff or gb files. Are automatically mapped to alignment.
 997    :param aln: alignment MSA class
 998    :param annotation: annotation class | path to annotation file
 999    :param ax: matplotlib axes
1000    :param feature_to_plot: potential feature to plot (not for bed files as it is parsed as one feature)
1001    :param color: color for the annotation
1002    :param direction_marker_size: marker size for direction marker, only relevant if show_direction is True
1003    :param show_x_label: whether to show the x-axis label
1004    :return matplotlib axes
1005    """
1006    # validate input
1007    aln, ax, annotation = _validate_input_parameters(aln, ax, annotation)
1008    _validate_color(color)
1009
1010    # ignore features to plot for bed files (here it is written into one feature)
1011    if annotation.ann_type == 'bed':
1012        annotation_dict = annotation.features['region']
1013        feature_to_plot = 'bed regions'
1014    else:
1015        # try to subset the annotation dict
1016        try:
1017            annotation_dict = annotation.features[feature_to_plot]
1018        except KeyError:
1019            raise KeyError(f'Feature {feature_to_plot} not found. Use annotation.features.keys() to see available features.')
1020
1021    # plotting and formating
1022    _add_track_positions(annotation_dict)
1023    _plot_annotation(annotation_dict, ax, direction_marker_size=direction_marker_size, color=color)
1024    _format_x_axis(aln, ax, show_x_label, show_left=False)
1025    ax.set_ylim(bottom=0.8)
1026    ax.set_yticks([])
1027    ax.set_yticklabels([])
1028    ax.set_title(f'{annotation.locus} ({feature_to_plot})', loc='left')
1029
1030    return ax
1031
1032
1033def sequence_logo(aln:explore.MSA | str, ax:plt.Axes | None = None, color_scheme: str = 'standard', plot_type: str = 'stacked',
1034                  show_x_label:bool = False) -> plt.Axes:
1035    """
1036    Plot sequence logo or stacked area plot (use the first one with appropriate zoom levels). The
1037    logo visualizes the relative frequency of nt or aa characters in the alignment. The char frequency
1038    is scaled to the information content at each position. --> identical to how Geneious calculates it.
1039
1040    :param aln: alignment MSA class or path
1041    :param ax: matplotlib axes
1042    :param color_scheme: color scheme for characters. see config.CHAR_COLORS for available options
1043    :param plot_type: 'logo' for sequence logo, 'stacked' for stacked area plot
1044    :param show_x_label: whether to show the x-axis label
1045    :return matplotlib axes
1046    """
1047
1048    aln, ax = _validate_input_parameters(aln, ax)
1049    # calc matrix
1050    matrix = aln.calc_position_matrix('IC') * aln.calc_position_matrix('PPM')
1051    letters_to_plot = list(config.CHAR_COLORS[aln.aln_type]['standard'].keys())
1052
1053    # plot
1054    if plot_type == 'logo':
1055        for pos in range(matrix.shape[1]):
1056            # sort the positive matrix row values by size
1057            items = [(letters_to_plot[i], matrix[i, pos]) for i in range(len(letters_to_plot)) if matrix[i, pos] > 0]
1058            items.sort(key=lambda x: x[1])
1059            # plot each position
1060            y_offset = 0
1061            for letter, h in items:
1062                tp = TextPath((aln.zoom[0] - 0.325 if aln.zoom is not None else - 0.325, 0), letter, size=1, prop=FontProperties(weight='bold'))
1063                bb = tp.get_extents()
1064                glyph_height = bb.height if bb.height > 0 else 1e-6  # avoid div by zero
1065                scale_to_1 = 1.0 / glyph_height
1066
1067                transform = (Affine2D()
1068                             .scale(1.0, h * scale_to_1)  # scale manually by IC and normalize font
1069                             .translate(pos, y_offset)
1070                             + ax.transData)
1071
1072                patch = PathPatch(tp, transform=transform,
1073                                  facecolor=config.CHAR_COLORS[aln.aln_type][color_scheme][letter],
1074                                  edgecolor='none')
1075                ax.add_patch(patch)
1076                y_offset += h
1077    elif plot_type == 'stacked':
1078        y_values = np.zeros(matrix.shape[1])
1079        x_values = np.arange(0, matrix.shape[1]) if aln.zoom is None else np.arange(aln.zoom[0], aln.zoom[1])
1080        for row in range(matrix.shape[0]):
1081            y = matrix[row]
1082            ax.fill_between(x_values,
1083                            y_values,
1084                            y_values + y,
1085                            fc=config.CHAR_COLORS[aln.aln_type][color_scheme].get(letters_to_plot[row]),
1086                            ec='None',
1087                            label=letters_to_plot[row],
1088                            step='mid')
1089
1090    # adjust limits & labels
1091    _format_x_axis(aln, ax, show_x_label, show_left=True)
1092    if aln.aln_type == 'AA':
1093        ax.set_ylim(bottom=0, top=5)
1094    else:
1095        ax.set_ylim(bottom=0, top=2)
1096    ax.spines['top'].set_visible(False)
1097    ax.spines['right'].set_visible(False)
1098    ax.set_ylabel('bits')
1099
1100    return ax
1101
1102
1103def consensus_plot(aln: explore.MSA | str, ax: plt.Axes | None = None, threshold: float | None = None, use_ambig_nt: bool = False,
1104                   color_scheme: str | None = 'standard', mask_color: str = 'dimgrey', ambiguity_color: str = 'black', basic_color: str = 'lightgrey',
1105                   show_x_label: bool = False, show_name: bool = True, show_sequence: bool = False) -> plt.Axes:
1106    """
1107    Plot a consensus sequence as a single-row colored sequence.
1108
1109    :param aln: alignment MSA class or path
1110    :param ax: matplotlib axes
1111    :param threshold: consensus threshold (0-1) or None for majority rule
1112    :param use_ambig_nt: whether to use ambiguous nucleotide codes when building consensus
1113    :param color_scheme: color scheme for characters. see config.CHAR_COLORS for available options or None
1114    :param mask_color: color used for mask characters (N/X)
1115    :param ambiguity_color: color used for ambiguous characters
1116    :param show_x_label: whether to show the x-axis label
1117    :param show_name: whether to show the 'consensus' label at y-axis
1118    :param show_sequence: whether to show the sequence at the y-axis
1119
1120    :return: matplotlib axes
1121    """
1122
1123    # validate input
1124    aln, ax = _validate_input_parameters(aln, ax)
1125    # validate colors
1126    for c in [mask_color, ambiguity_color]:
1127        _validate_color(c)
1128    # validate color scheme
1129    _validate_color_scheme(scheme=color_scheme, aln=aln)
1130
1131    # get consensus
1132    consensus = aln.get_consensus(threshold=threshold, use_ambig_nt=use_ambig_nt)
1133
1134    # prepare color mapping
1135    if color_scheme is not None:
1136        # get mapping from config
1137        char_colors = config.CHAR_COLORS[aln.aln_type][color_scheme]
1138    else:
1139        char_colors = None
1140
1141    # Build polygons and colors for a single PolyCollection
1142    polygons = []
1143    poly_colors = []
1144    text_positions = []
1145    text_chars = []
1146    text_colors = []
1147
1148    zoom_offset = aln.zoom[0] if aln.zoom is not None else 0
1149
1150    y_position = len(aln.alignment) - 0.4
1151
1152    for pos, char in enumerate(consensus):
1153        x = pos + zoom_offset
1154        # determine color
1155        if char_colors is not None and char in char_colors:
1156            color = char_colors[char]
1157        else:
1158            # ambiguous nucleotide/aminoacid codes
1159            if char in config.AMBIG_CHARS[aln.aln_type]:
1160                color = ambiguity_color
1161            elif char in ['N', 'X']:
1162                color = mask_color
1163            else:
1164                color = basic_color
1165
1166        rect = [
1167            (x - 0.5, y_position),
1168            (x + 0.5, y_position),
1169            (x + 0.5, y_position+0.8),
1170            (x - 0.5, y_position+0.8),
1171        ]
1172        polygons.append(rect)
1173        poly_colors.append(color)
1174
1175        if show_sequence:
1176            text_positions.append(x)
1177            text_chars.append(char)
1178            # determine readable text color
1179            text_colors.append(_get_contrast_text_color(to_rgba(color)))
1180
1181    # add single PolyCollection
1182    if polygons:
1183        pc = PolyCollection(polygons, facecolors=poly_colors, edgecolors=poly_colors, linewidths=0.5)
1184        ax.add_collection(pc)
1185
1186    # add texts if requested
1187    if show_sequence:
1188        for x, ch, tc in zip(text_positions, text_chars, text_colors):
1189            ax.text(x, y_position+0.4, ch, ha='center', va='center_baseline', c=tc)
1190
1191    # format axis
1192    _format_x_axis(aln=aln, ax=ax, show_x_label=show_x_label, show_left=False)
1193    ax.set_ylim(y_position-0.1, y_position + 0.9)
1194    if show_name:
1195        ax.yaxis.set_ticks_position('none')
1196        ax.set_yticks([y_position + 0.4])
1197        ax.set_yticklabels(['consensus'])
1198    else:
1199        ax.set_yticks([])
1200
1201    return ax
def alignment( aln: msaexplorer.explore.MSA | str, ax: matplotlib.axes._axes.Axes | None = None, show_sequence_all: bool = False, show_seq_names: bool = False, custom_seq_names: tuple | list = (), mask_color: str = 'dimgrey', ambiguity_color: str = 'black', basic_color: str = 'lightgrey', show_mask: bool = True, fancy_gaps: bool = False, show_ambiguities: bool = False, color_scheme: str | None = 'standard', show_x_label: bool = True, show_legend: bool = False, bbox_to_anchor: tuple[float | int, float | int] | list = (1, 1), show_consensus: bool = False) -> matplotlib.axes._axes.Axes:
412def alignment(aln: explore.MSA | str, ax: plt.Axes | None = None, show_sequence_all: bool = False, show_seq_names: bool = False,
413              custom_seq_names: tuple | list = (), mask_color: str = 'dimgrey', ambiguity_color: str = 'black', basic_color: str = 'lightgrey',
414              show_mask:bool = True, fancy_gaps:bool = False, show_ambiguities: bool = False, color_scheme: str | None = 'standard',
415              show_x_label: bool = True, show_legend: bool = False, bbox_to_anchor: tuple[float|int, float|int] | list= (1, 1),
416              show_consensus: bool = False) -> plt.Axes:
417    """
418
419    Plot an alignment with each character colored as defined in the scheme. This is computationally more intensive as 
420    the identity alignments and similarity alignment function as each square for each character is individually plotted.
421
422    :param aln: alignment MSA class or path
423    :param ax: matplotlib axes
424    :param show_seq_names: whether to show seq names
425    :param show_sequence_all: whether to show all sequences - zoom in to avoid plotting issues
426    :param custom_seq_names: custom seq names
427    :param mask_color: color for masked nucleotides/aminoacids
428    :param ambiguity_color: color for ambiguous nucleotides/aminoacids
429    :param basic_color: color that will be used for all normal chars if the colorscheme is None
430    :param show_mask: whether to show N or X chars otherwise it will be shown as match or mismatch
431    :param fancy_gaps: show gaps with a small black bar
432    :param show_ambiguities: whether to show non-N ambiguities -> only relevant for RNA/DNA sequences
433    :param color_scheme: color mismatching chars with their unique color. Options for DNA/RNA are: standard, purine_pyrimidine, strong_weak; and for AS: standard, clustal, zappo, hydrophobicity. Will overwrite different_char_color.
434    :param show_x_label: whether to show x label
435    :param show_legend: whether to show the legend
436    :param bbox_to_anchor: bounding box coordinates for the legend - see: https://matplotlib.org/stable/api/legend_api.html
437    :param show_consensus: whether to show the majority consensus sequence (standard-color scheme)
438    :return  matplotlib axis
439    """
440    # Validate aln, ax inputs
441    aln, ax = _validate_input_parameters(aln=aln, ax=ax)
442    # Validate colors
443    for c in [mask_color, ambiguity_color, basic_color]:
444        _validate_color(c)
445    # Validate color scheme
446    _validate_color_scheme(scheme=color_scheme, aln=aln)
447    # create color mapping
448    aln_colors = _create_color_dictionary(
449        aln=aln, color_scheme=color_scheme, identical_char_color=basic_color,
450        different_char_color=None, mask_color=mask_color, ambiguity_color=ambiguity_color
451    )
452    # Compute identity alignment
453    numerical_alignment = aln.calc_numerical_alignment(encode_ambiguities=show_ambiguities, encode_mask=show_mask)
454    if color_scheme is None:
455        numerical_alignment[np.where(numerical_alignment > 0)] = 0
456    # create the alignment
457    detected_identity_values = _create_alignment(
458        aln=aln, ax=ax, matrix=numerical_alignment, fancy_gaps=fancy_gaps, create_identity_patch=True if color_scheme is None else False, show_gaps=True,
459        show_different_sequence=False, show_sequence_all=show_sequence_all, reference_color=None, aln_colors=aln_colors,
460        values_to_plot = list(aln_colors.keys())
461    )
462    # custom legend
463    if show_legend:
464        aln_colors.pop(0)  # otherwise legend is wrong (here there is no check if a position is identical or not)
465        _create_legend(
466            color_scheme=color_scheme, aln_colors=aln_colors, aln=aln,
467            detected_identity_values=detected_identity_values,
468            ax=ax, bbox_to_anchor=bbox_to_anchor
469        )
470    if show_consensus:
471        consensus_plot(aln=aln, ax=ax, show_x_label=show_x_label, show_name=False, show_sequence=show_sequence_all,
472            color_scheme='standard', basic_color=basic_color, mask_color=mask_color, ambiguity_color=ambiguity_color
473        )
474        ax.set_ylim(-0.5, len(aln.alignment) + 1)
475    else:
476        ax.set_ylim(-0.5, len(aln.alignment))
477
478    _seq_names(aln=aln, ax=ax, custom_seq_names=custom_seq_names, show_seq_names=show_seq_names, include_consensus=show_consensus)
479    # configure axis
480    _format_x_axis(aln=aln, ax=ax, show_x_label=show_x_label, show_left=False)
481
482    return ax

Plot an alignment with each character colored as defined in the scheme. This is computationally more intensive as the identity alignments and similarity alignment function as each square for each character is individually plotted.

Parameters
  • aln: alignment MSA class or path
  • ax: matplotlib axes
  • show_seq_names: whether to show seq names
  • show_sequence_all: whether to show all sequences - zoom in to avoid plotting issues
  • custom_seq_names: custom seq names
  • mask_color: color for masked nucleotides/aminoacids
  • ambiguity_color: color for ambiguous nucleotides/aminoacids
  • basic_color: color that will be used for all normal chars if the colorscheme is None
  • show_mask: whether to show N or X chars otherwise it will be shown as match or mismatch
  • fancy_gaps: show gaps with a small black bar
  • show_ambiguities: whether to show non-N ambiguities -> only relevant for RNA/DNA sequences
  • color_scheme: color mismatching chars with their unique color. Options for DNA/RNA are: standard, purine_pyrimidine, strong_weak; and for AS: standard, clustal, zappo, hydrophobicity. Will overwrite different_char_color.
  • show_x_label: whether to show x label
  • show_legend: whether to show the legend
  • bbox_to_anchor: bounding box coordinates for the legend - see: https: //matplotlib.org/stable/api/legend_api.html
  • show_consensus: whether to show the majority consensus sequence (standard-color scheme) :return matplotlib axis
def identity_alignment( aln: msaexplorer.explore.MSA | str, ax: matplotlib.axes._axes.Axes | None = None, show_title: bool = True, show_identity_sequence: bool = False, show_sequence_all: bool = False, show_seq_names: bool = False, custom_seq_names: tuple | list = (), reference_color: str = 'lightsteelblue', basic_color: str = 'lightgrey', different_char_color: str = 'peru', mask_color: str = 'dimgrey', ambiguity_color: str = 'black', show_mask: bool = True, show_gaps: bool = True, fancy_gaps: bool = False, show_mismatches: bool = True, show_ambiguities: bool = False, color_scheme: str | None = None, show_x_label: bool = True, show_legend: bool = False, bbox_to_anchor: tuple[float | int, float | int] | list = (1, 1), show_consensus: bool = False) -> matplotlib.axes._axes.Axes:
486def identity_alignment(aln: explore.MSA | str, ax: plt.Axes | None = None, show_title: bool = True, show_identity_sequence: bool = False,
487                       show_sequence_all: bool = False, show_seq_names: bool = False, custom_seq_names: tuple | list = (),
488                       reference_color: str = 'lightsteelblue', basic_color: str = 'lightgrey', different_char_color: str = 'peru',
489                       mask_color: str = 'dimgrey', ambiguity_color: str = 'black', show_mask:bool = True, show_gaps:bool = True,
490                       fancy_gaps:bool = False, show_mismatches: bool = True, show_ambiguities: bool = False,
491                       color_scheme: str | None = None, show_x_label: bool = True, show_legend: bool = False,
492                       bbox_to_anchor: tuple[float|int, float|int] | list = (1, 1), show_consensus: bool = False) -> plt.Axes:
493    """
494    Generates an identity alignment plot.
495    :param aln: alignment MSA class or path
496    :param ax: matplotlib axes
497    :param show_title: whether to show title
498    :param show_seq_names: whether to show seq names
499    :param show_identity_sequence: whether to show sequence for only differences and reference - zoom in to avoid plotting issues
500    :param show_sequence_all: whether to show all sequences - zoom in to avoid plotting issues
501    :param custom_seq_names: custom seq names
502    :param reference_color: color of reference sequence
503    :param basic_color: color for identical nucleotides/aminoacids
504    :param different_char_color: color for different nucleotides/aminoacids
505    :param mask_color: color for masked nucleotides/aminoacids
506    :param ambiguity_color: color for ambiguous nucleotides/aminoacids
507    :param show_mask: whether to show N or X chars otherwise it will be shown as match or mismatch
508    :param show_gaps: whether to show gaps otherwise it will be shown as match or mismatch
509    :param fancy_gaps: show gaps with a small black bar
510    :param show_mismatches: whether to show mismatches otherwise it will be shown as match
511    :param show_ambiguities: whether to show non-N ambiguities -> only relevant for RNA/DNA sequences
512    :param color_scheme: color mismatching chars with their unique color. Options for DNA/RNA are: standard, purine_pyrimidine, strong_weak; and for AS: standard, clustal, zappo, hydrophobicity. Will overwrite different_char_color.
513    :param show_x_label: whether to show x label
514    :param show_legend: whether to show the legend
515    :param bbox_to_anchor: bounding box coordinates for the legend - see: https://matplotlib.org/stable/api/legend_api.html
516    :param show_consensus: whether to show the majority consensus sequence (standard-color scheme)
517
518    :return  matplotlib axis
519    """
520
521    # Both options for gaps work hand in hand
522    if fancy_gaps:
523        show_gaps = True
524    # Validate aln, ax inputs
525    aln, ax = _validate_input_parameters(aln=aln, ax=ax)
526    # Validate colors
527    for c in [reference_color, basic_color, different_char_color, mask_color, ambiguity_color]:
528        _validate_color(c)
529    # Validate color scheme
530    _validate_color_scheme(scheme=color_scheme, aln=aln)
531    # create color mapping
532    aln_colors = _create_color_dictionary(
533        aln=aln, color_scheme=color_scheme, identical_char_color=basic_color, different_char_color=different_char_color,
534        mask_color=mask_color, ambiguity_color=ambiguity_color
535    )
536    # Compute identity alignment
537    identity_aln = aln.calc_identity_alignment(
538        encode_mask=show_mask, encode_gaps=show_gaps, encode_mismatches=show_mismatches,encode_ambiguities=show_ambiguities,
539        encode_each_mismatch_char=True if color_scheme is not None else False
540    )
541    # create the alignment
542    detected_identity_values = _create_alignment(
543        aln=aln, ax=ax, matrix=identity_aln, fancy_gaps=fancy_gaps, create_identity_patch=True, show_gaps=show_gaps,
544        show_different_sequence=show_identity_sequence, show_sequence_all=show_sequence_all, reference_color=reference_color,
545        aln_colors=aln_colors, values_to_plot=list(aln_colors.keys())[1:]
546    )
547    # custom legend
548    if show_legend:
549        _create_legend(
550            color_scheme=color_scheme, aln_colors=aln_colors, aln=aln, detected_identity_values=detected_identity_values,
551            ax=ax, bbox_to_anchor=bbox_to_anchor
552        )
553    _seq_names(aln=aln, ax=ax, custom_seq_names=custom_seq_names, show_seq_names=show_seq_names)
554    # configure axis
555    if show_consensus:
556        consensus_plot(aln=aln, ax=ax, show_x_label=show_x_label, show_name=False, show_sequence=any([show_sequence_all, show_identity_sequence]),
557                       color_scheme='standard', basic_color=basic_color, mask_color=mask_color,
558                       ambiguity_color=ambiguity_color
559                       )
560        ax.set_ylim(-0.5, len(aln.alignment) + 1)
561    else:
562        ax.set_ylim(-0.5, len(aln.alignment))
563
564    _seq_names(aln=aln, ax=ax, custom_seq_names=custom_seq_names, show_seq_names=show_seq_names,
565               include_consensus=show_consensus)
566    if show_title:
567        ax.set_title('identity', loc='left')
568    _format_x_axis(aln=aln, ax=ax, show_x_label=show_x_label, show_left=False)
569
570    return ax

Generates an identity alignment plot.

Parameters
  • aln: alignment MSA class or path
  • ax: matplotlib axes
  • show_title: whether to show title
  • show_seq_names: whether to show seq names
  • show_identity_sequence: whether to show sequence for only differences and reference - zoom in to avoid plotting issues
  • show_sequence_all: whether to show all sequences - zoom in to avoid plotting issues
  • custom_seq_names: custom seq names
  • reference_color: color of reference sequence
  • basic_color: color for identical nucleotides/aminoacids
  • different_char_color: color for different nucleotides/aminoacids
  • mask_color: color for masked nucleotides/aminoacids
  • ambiguity_color: color for ambiguous nucleotides/aminoacids
  • show_mask: whether to show N or X chars otherwise it will be shown as match or mismatch
  • show_gaps: whether to show gaps otherwise it will be shown as match or mismatch
  • fancy_gaps: show gaps with a small black bar
  • show_mismatches: whether to show mismatches otherwise it will be shown as match
  • show_ambiguities: whether to show non-N ambiguities -> only relevant for RNA/DNA sequences
  • color_scheme: color mismatching chars with their unique color. Options for DNA/RNA are: standard, purine_pyrimidine, strong_weak; and for AS: standard, clustal, zappo, hydrophobicity. Will overwrite different_char_color.
  • show_x_label: whether to show x label
  • show_legend: whether to show the legend
  • bbox_to_anchor: bounding box coordinates for the legend - see: https: //matplotlib.org/stable/api/legend_api.html
  • show_consensus: whether to show the majority consensus sequence (standard-color scheme)

:return matplotlib axis

def similarity_alignment( aln: msaexplorer.explore.MSA | str, ax: matplotlib.axes._axes.Axes | None = None, matrix_type: str | None = None, show_title: bool = True, show_similarity_sequence: bool = False, show_sequence_all: bool = False, show_seq_names: bool = False, custom_seq_names: tuple | list = (), reference_color: str = 'lightsteelblue', different_char_color: str = 'peru', basic_color: str = 'lightgrey', show_gaps: bool = True, fancy_gaps: bool = False, show_x_label: bool = True, show_cbar: bool = False, cbar_fraction: float = 0.1, show_consensus: bool = False) -> matplotlib.axes._axes.Axes:
573def similarity_alignment(aln: explore.MSA | str, ax: plt.Axes | None = None, matrix_type: str | None = None, show_title: bool = True,
574                         show_similarity_sequence: bool = False, show_sequence_all: bool = False, show_seq_names: bool = False,
575                         custom_seq_names: tuple | list = (), reference_color: str = 'lightsteelblue', different_char_color: str = 'peru', basic_color: str = 'lightgrey',
576                         show_gaps:bool = True, fancy_gaps:bool = False, show_x_label: bool = True, show_cbar: bool = False,
577                         cbar_fraction: float = 0.1, show_consensus: bool = False)  -> plt.Axes:
578    """
579    Generates a similarity alignment plot. Importantly the similarity values are normalized!
580    :param aln: alignment MSA class or path
581    :param ax: matplotlib axes
582    :param matrix_type: substitution matrix - see config.SUBS_MATRICES, standard: NT - TRANS, AA - BLOSUM65
583    :param show_title: whether to show title
584    :param show_similarity_sequence: whether to show sequence only for differences and reference - zoom in to avoid plotting issues
585    :param show_sequence_all: whether to show all sequences - zoom in to avoid plotting issues
586    :param show_seq_names: whether to show seq names
587    :param custom_seq_names: custom seq names
588    :param reference_color: color of reference sequence
589    :param different_char_color: color for the lowest similarity
590    :param basic_color: basic color for the highest similarity
591    :param show_gaps: whether to show gaps otherwise it will be ignored
592    :param fancy_gaps: show gaps with a small black bar
593    :param show_x_label: whether to show x label
594    :param show_cbar: whether to show the legend - see https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.colorbar.html
595    :param cbar_fraction: fraction of the original ax reserved for the legend
596    :param show_consensus: whether to show the majority consensus sequence (standard color scheme, no specical handling for special characters)
597    :return  matplotlib axis
598    """
599    # Both options for gaps work hand in hand
600    if fancy_gaps:
601        show_gaps = True
602    # Validate aln, ax inputs
603    aln, ax = _validate_input_parameters(aln=aln, ax=ax)
604    # Validate colors
605    for c in [reference_color, different_char_color, basic_color]:
606        _validate_color(c)
607    # Compute similarity alignment
608    similarity_aln = aln.calc_similarity_alignment(matrix_type=matrix_type)  # use normalized values here
609    similarity_aln = similarity_aln.round(2)  # round data for good color mapping
610    # create cmap
611    cmap = LinearSegmentedColormap.from_list(
612        "extended",
613        [
614            (0.0, different_char_color),
615            (1.0, basic_color)
616        ],
617    )
618    cmap = ScalarMappable(norm=Normalize(vmin=0, vmax=1), cmap=cmap)
619    # create similarity values
620    similarity_values = np.arange(start=0, stop=1, step=0.01)
621    # round it to be absolutely sure that values match with rounded sim alignment
622    similarity_values = list(similarity_values.round(2))
623    # create the alignment
624    _create_alignment(
625        aln=aln, ax=ax, matrix=similarity_aln, fancy_gaps=fancy_gaps, create_identity_patch=True, show_gaps=show_gaps,
626        show_different_sequence=show_similarity_sequence, show_sequence_all=show_sequence_all, reference_color=reference_color,
627        aln_colors=cmap, values_to_plot=similarity_values, identical_value=1
628    )
629    # legend
630    if show_cbar:
631        cbar = plt.colorbar(cmap, ax=ax, location= 'top', anchor=(1,0), shrink=0.2, pad=2/ax.bbox.height, fraction=cbar_fraction)
632        cbar.set_ticks([0, 1])
633        cbar.set_ticklabels(['low', 'high'])
634
635    # format seq names
636    _seq_names(aln, ax, custom_seq_names, show_seq_names)
637
638    # configure axis
639    if show_consensus:
640        consensus_plot(aln=aln, ax=ax, show_x_label=show_x_label, show_name=False, show_sequence=any([show_sequence_all, show_similarity_sequence]),
641                       color_scheme='standard', basic_color=basic_color)
642        ax.set_ylim(-0.5, len(aln.alignment) + 1)
643    else:
644        ax.set_ylim(-0.5, len(aln.alignment))
645
646    _seq_names(aln=aln, ax=ax, custom_seq_names=custom_seq_names, show_seq_names=show_seq_names,
647               include_consensus=show_consensus)
648    if show_title:
649        ax.set_title('similarity', loc='left')
650    _format_x_axis(aln, ax, show_x_label, show_left=False)
651
652    return ax

Generates a similarity alignment plot. Importantly the similarity values are normalized!

Parameters
  • aln: alignment MSA class or path
  • ax: matplotlib axes
  • matrix_type: substitution matrix - see config.SUBS_MATRICES, standard: NT - TRANS, AA - BLOSUM65
  • show_title: whether to show title
  • show_similarity_sequence: whether to show sequence only for differences and reference - zoom in to avoid plotting issues
  • show_sequence_all: whether to show all sequences - zoom in to avoid plotting issues
  • show_seq_names: whether to show seq names
  • custom_seq_names: custom seq names
  • reference_color: color of reference sequence
  • different_char_color: color for the lowest similarity
  • basic_color: basic color for the highest similarity
  • show_gaps: whether to show gaps otherwise it will be ignored
  • fancy_gaps: show gaps with a small black bar
  • show_x_label: whether to show x label
  • show_cbar: whether to show the legend - see https: //matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.colorbar.html
  • cbar_fraction: fraction of the original ax reserved for the legend
  • show_consensus: whether to show the majority consensus sequence (standard color scheme, no specical handling for special characters) :return matplotlib axis
def stat_plot( aln: msaexplorer.explore.MSA | str, stat_type: str, ax: matplotlib.axes._axes.Axes | None = None, line_color: str = 'burlywood', line_width: int | float = 2, rolling_average: int = 20, show_x_label: bool = False, show_title: bool = True) -> matplotlib.axes._axes.Axes:
680def stat_plot(aln: explore.MSA | str, stat_type: str, ax: plt.Axes | None = None, line_color: str = 'burlywood',
681              line_width: int | float = 2, rolling_average: int = 20, show_x_label: bool = False, show_title: bool = True) -> plt.Axes:
682    """
683    Generate a plot for the various alignment stats.
684    :param aln: alignment MSA class or path
685    :param ax: matplotlib axes
686    :param stat_type: 'entropy', 'gc', 'coverage', 'ts tv score', gap frequency, 'identity' or 'similarity' -> (here default matrices are used NT - TRANS, AA - BLOSUM65)
687    :param line_color: color of the line
688    :param line_width: width of the line
689    :param rolling_average: average rolling window size left and right of a position in nucleotides or amino acids
690    :param show_x_label: whether to show the x-axis label
691    :param show_title: whether to show the title
692    :return matplotlib axes
693    """
694
695    # input check
696    aln, ax = _validate_input_parameters(aln, ax)
697
698    # define possible functions to calc here
699    stat_functions: Dict[str, Callable[[], list | ndarray]] = {
700        'gc': aln.calc_gc,
701        'entropy': aln.calc_entropy,
702        'coverage': aln.calc_coverage,
703        'identity': aln.calc_identity_alignment,
704        'similarity': aln.calc_similarity_alignment,
705        'ts tv score': aln.calc_transition_transversion_score,
706        'gap frequency': aln.calc_gap_frequency
707    }
708
709    if stat_type not in stat_functions:
710        raise ValueError('stat_type must be one of {}'.format(list(stat_functions.keys())))
711
712    _validate_color(line_color)
713    if rolling_average < 1 or rolling_average > aln.length:
714        raise ValueError('rolling_average must be between 1 and length of sequence')
715
716    # generate input data
717    array = stat_functions[stat_type]()
718
719    if stat_type == 'identity':
720        min_value, max_value = -1, 0
721    elif stat_type == 'ts tv score':
722        min_value, max_value = -1, 1
723    else:
724        min_value, max_value = 0, 1
725    if stat_type in ['identity', 'similarity']:
726        # for the mean nan values get handled as the lowest possible number in the matrix
727        array = np.nan_to_num(array, True, min_value)
728        array = np.mean(array, axis=0)
729    data, plot_idx = _moving_average(array, rolling_average, aln.zoom, aln.length)
730
731    # plot the data
732    ax.fill_between(
733        # this add dummy data left and right for better plotting
734        # otherwise only half of the step is shown
735        np.concatenate(([plot_idx[0] - 0.5], plot_idx, [plot_idx[-1] + 0.5])) if rolling_average == 1 else plot_idx,
736        np.concatenate(([data[0]], data, [data[-1]])) if rolling_average == 1 else data,
737        min_value,
738        linewidth = line_width,
739        edgecolor=line_color,
740        step='mid' if rolling_average == 1 else None,
741        facecolor=(line_color, 0.6) if stat_type not in ['ts tv score', 'gc'] else 'none'
742    )
743    if stat_type == 'gc':
744        ax.hlines(0.5, xmin=0, xmax=aln.zoom[0] + aln.length if aln.zoom is not None else aln.length, color='black', linestyles='--', linewidth=1)
745
746    # format axis
747    ax.set_ylim(min_value, max_value*0.1+max_value)
748    ax.set_yticks([min_value, max_value])
749    if stat_type == 'gc':
750        ax.set_yticklabels(['0', '100'])
751    elif stat_type == 'ts tv score':
752        ax.set_yticklabels(['tv', 'ts'])
753    else:
754        ax.set_yticklabels(['low', 'high'])
755
756    # show title
757    if show_title:
758        ax.set_title(
759            f'{stat_type} (average over {rolling_average} positions)' if rolling_average > 1 else f'{stat_type} for each position',
760            loc='left'
761        )
762
763    _format_x_axis(aln, ax, show_x_label, show_left=True)
764
765    return ax

Generate a plot for the various alignment stats.

Parameters
  • aln: alignment MSA class or path
  • ax: matplotlib axes
  • stat_type: 'entropy', 'gc', 'coverage', 'ts tv score', gap frequency, 'identity' or 'similarity' -> (here default matrices are used NT - TRANS, AA - BLOSUM65)
  • line_color: color of the line
  • line_width: width of the line
  • rolling_average: average rolling window size left and right of a position in nucleotides or amino acids
  • show_x_label: whether to show the x-axis label
  • show_title: whether to show the title :return matplotlib axes
def variant_plot( aln: msaexplorer.explore.MSA | str, ax: matplotlib.axes._axes.Axes | None = None, lollisize: tuple[int, int] | list = (1, 3), color_scheme: str = 'standard', show_x_label: bool = False, show_legend: bool = True, bbox_to_anchor: tuple[float | int, float | int] | list = (1, 1)) -> matplotlib.axes._axes.Axes:
768def variant_plot(aln: explore.MSA | str, ax: plt.Axes | None = None, lollisize: tuple[int, int] | list = (1, 3),
769                 color_scheme: str = 'standard', show_x_label: bool = False, show_legend: bool = True,
770                 bbox_to_anchor: tuple[float|int, float|int] | list = (1, 1)) -> plt.Axes:
771    """
772    Plots variants.
773    :param aln: alignment MSA class or path
774    :param ax: matplotlib axes
775    :param lollisize: (stem_size, head_size)
776    :param color_scheme: color scheme for characters. see config.CHAR_COLORS for available options
777    :param show_x_label:  whether to show the x-axis label
778    :param show_legend: whether to show the legend
779    :param bbox_to_anchor: bounding box coordinates for the legend - see: https://matplotlib.org/stable/api/legend_api.html
780    :return matplotlib axes
781    """
782
783    # validate input
784    aln, ax = _validate_input_parameters(aln, ax)
785    _validate_color_scheme(color_scheme, aln)
786    if not isinstance(lollisize, tuple) or len(lollisize) != 2:
787        raise ValueError('lollisize must be tuple of length 2 (stem, head)')
788    for _size in lollisize:
789        if not isinstance(_size, float | int) or _size <= 0:
790            raise ValueError('lollisize must be floats greater than zero')
791
792    # define colors
793    colors = config.CHAR_COLORS[aln.aln_type][color_scheme]
794    # get snps
795    snps = aln.get_snps()
796    # define where to plot (each ref type gets a separate line)
797    ref_y_positions, y_pos, detected_var = {}, 0, set()
798
799    # iterate over snp dict
800    for pos in snps['POS']:
801        for identifier in snps['POS'][pos]:
802            # fill in y pos dict
803            if identifier == 'ref':
804                if snps['POS'][pos]['ref'] not in ref_y_positions:
805                    ref_y_positions[snps['POS'][pos]['ref']] = y_pos
806                    y_pos += 1.1
807                    continue
808            # plot
809            if identifier == 'ALT':
810                for alt in snps['POS'][pos]['ALT']:
811                    ax.vlines(x=pos + aln.zoom[0] if aln.zoom is not None else pos,
812                              ymin=ref_y_positions[snps['POS'][pos]['ref']],
813                              ymax=ref_y_positions[snps['POS'][pos]['ref']] + snps['POS'][pos]['ALT'][alt]['AF'],
814                              color=colors[alt],
815                              zorder=100,
816                              linewidth=lollisize[0]
817                              )
818                    ax.plot(pos + aln.zoom[0] if aln.zoom is not None else pos,
819                            ref_y_positions[snps['POS'][pos]['ref']] + snps['POS'][pos]['ALT'][alt]['AF'],
820                            color=colors[alt],
821                            marker='o',
822                            markersize=lollisize[1]
823                            )
824                    detected_var.add(alt)
825
826    # plot hlines
827    for y_char in ref_y_positions:
828        ax.hlines(
829            ref_y_positions[y_char],
830            xmin=aln.zoom[0] - 0.5 if aln.zoom is not None else -0.5,
831            xmax=aln.zoom[0] + aln.length + 0.5 if aln.zoom is not None else aln.length + 0.5,
832            color='black',
833            linestyle='-',
834            zorder=0,
835            linewidth=0.75
836        )
837    # create a custom legend
838    if show_legend:
839        custom_legend = [
840            ax.add_line(
841                plt.Line2D(
842                    [],
843                    [],
844                    color=colors[char],
845                    marker='o',
846                    linestyle='',
847                    markersize=5
848                )
849            ) for char in colors if char in detected_var
850        ]
851        ax.legend(
852            custom_legend,
853            [char for char in colors if char in detected_var],  # ensures correct sorting
854            loc='lower right',
855            title='variant',
856            bbox_to_anchor=bbox_to_anchor,
857            ncols=len(detected_var)/2 if aln.aln_type == 'AA' else len(detected_var),
858            frameon=False
859        )
860
861    # format axis
862    _format_x_axis(aln, ax, show_x_label, show_left=False)
863    ax.spines['left'].set_visible(False)
864    ax.set_yticks([ref_y_positions[x] for x in ref_y_positions])
865    ax.set_yticklabels(ref_y_positions.keys())
866    ax.set_ylim(0, y_pos)
867    ax.set_ylabel('reference')
868
869    return ax

Plots variants.

Parameters
  • aln: alignment MSA class or path
  • ax: matplotlib axes
  • lollisize: (stem_size, head_size)
  • color_scheme: color scheme for characters. see config.CHAR_COLORS for available options
  • show_x_label: whether to show the x-axis label
  • show_legend: whether to show the legend
  • bbox_to_anchor: bounding box coordinates for the legend - see: https: //matplotlib.org/stable/api/legend_api.html :return matplotlib axes
def orf_plot( aln: msaexplorer.explore.MSA | str, ax: matplotlib.axes._axes.Axes | None = None, min_length: int = 500, non_overlapping_orfs: bool = True, cmap: str = 'Blues', direction_marker_size: int | None = 5, show_x_label: bool = False, show_cbar: bool = False, cbar_fraction: float = 0.1) -> matplotlib.axes._axes.Axes:
940def orf_plot(aln: explore.MSA | str, ax: plt.Axes | None = None, min_length: int = 500, non_overlapping_orfs: bool = True,
941             cmap: str = 'Blues', direction_marker_size: int | None = 5, show_x_label: bool = False, show_cbar: bool = False,
942             cbar_fraction: float = 0.1) -> plt.Axes:
943    """
944    Plot conserved ORFs.
945    :param aln: alignment MSA class or path
946    :param ax: matplotlib axes
947    :param min_length: minimum length of orf
948    :param non_overlapping_orfs: whether to consider overlapping orfs
949    :param cmap: color mapping for % identity - see https://matplotlib.org/stable/users/explain/colors/colormaps.html
950    :param direction_marker_size: marker size for direction marker, not shown if marker_size == None
951    :param show_x_label: whether to show the x-axis label
952    :param show_cbar: whether to show the colorbar - see https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.colorbar.html
953    :param cbar_fraction: fraction of the original ax reserved for the colorbar
954    :return matplotlib axes
955    """
956
957    # normalize colorbar
958    cmap = ScalarMappable(norm=Normalize(0, 100), cmap=plt.get_cmap(cmap))
959    # validate input
960    aln, ax = _validate_input_parameters(aln, ax)
961
962    # get orfs --> first deepcopy and reset zoom that the orfs are also zoomed in (by default, the orfs are only
963    # searched within the zoomed region)
964    aln_temp = deepcopy(aln)
965    aln_temp.zoom = None
966    if non_overlapping_orfs:
967        annotation_dict = aln_temp.get_non_overlapping_conserved_orfs(min_length=min_length)
968    else:
969        annotation_dict = aln_temp.get_conserved_orfs(min_length=min_length)
970    # filter dict for zoom
971    if aln.zoom is not None:
972        annotation_dict = {key:val for key, val in annotation_dict.items() if max(val['location'][0][0], aln.zoom[0]) <= min(val['location'][0][1], aln.zoom[1])}
973    # add track for plotting
974    _add_track_positions(annotation_dict)
975    # plot
976    _plot_annotation(annotation_dict, ax, direction_marker_size=direction_marker_size, color=cmap)
977
978    # legend
979    if show_cbar:
980        cbar = plt.colorbar(cmap,ax=ax, location= 'top', orientation='horizontal', anchor=(1,0), shrink=0.2, pad=2/ax.bbox.height, fraction=cbar_fraction)
981        cbar.set_label('% identity')
982        cbar.set_ticks([0, 100])
983
984    # format axis
985    _format_x_axis(aln, ax, show_x_label, show_left=False)
986    ax.set_ylim(bottom=0.8)
987    ax.set_yticks([])
988    ax.set_yticklabels([])
989    ax.set_title('conserved orfs', loc='left')
990
991    return ax

Plot conserved ORFs.

Parameters
  • aln: alignment MSA class or path
  • ax: matplotlib axes
  • min_length: minimum length of orf
  • non_overlapping_orfs: whether to consider overlapping orfs
  • cmap: color mapping for % identity - see https: //matplotlib.org/stable/users/explain/colors/colormaps.html
  • direction_marker_size: marker size for direction marker, not shown if marker_size == None
  • show_x_label: whether to show the x-axis label
  • show_cbar: whether to show the colorbar - see https: //matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.colorbar.html
  • cbar_fraction: fraction of the original ax reserved for the colorbar :return matplotlib axes
def annotation_plot( aln: msaexplorer.explore.MSA | str, annotation: msaexplorer.explore.Annotation | str, feature_to_plot: str, ax: matplotlib.axes._axes.Axes | None = None, color: str = 'wheat', direction_marker_size: int | None = 5, show_x_label: bool = False) -> matplotlib.axes._axes.Axes:
 994def annotation_plot(aln: explore.MSA | str, annotation: explore.Annotation | str, feature_to_plot: str, ax: plt.Axes | None = None,
 995                    color: str = 'wheat', direction_marker_size: int | None = 5, show_x_label: bool = False) -> plt.Axes:
 996    """
 997    Plot annotations from bed, gff or gb files. Are automatically mapped to alignment.
 998    :param aln: alignment MSA class
 999    :param annotation: annotation class | path to annotation file
1000    :param ax: matplotlib axes
1001    :param feature_to_plot: potential feature to plot (not for bed files as it is parsed as one feature)
1002    :param color: color for the annotation
1003    :param direction_marker_size: marker size for direction marker, only relevant if show_direction is True
1004    :param show_x_label: whether to show the x-axis label
1005    :return matplotlib axes
1006    """
1007    # validate input
1008    aln, ax, annotation = _validate_input_parameters(aln, ax, annotation)
1009    _validate_color(color)
1010
1011    # ignore features to plot for bed files (here it is written into one feature)
1012    if annotation.ann_type == 'bed':
1013        annotation_dict = annotation.features['region']
1014        feature_to_plot = 'bed regions'
1015    else:
1016        # try to subset the annotation dict
1017        try:
1018            annotation_dict = annotation.features[feature_to_plot]
1019        except KeyError:
1020            raise KeyError(f'Feature {feature_to_plot} not found. Use annotation.features.keys() to see available features.')
1021
1022    # plotting and formating
1023    _add_track_positions(annotation_dict)
1024    _plot_annotation(annotation_dict, ax, direction_marker_size=direction_marker_size, color=color)
1025    _format_x_axis(aln, ax, show_x_label, show_left=False)
1026    ax.set_ylim(bottom=0.8)
1027    ax.set_yticks([])
1028    ax.set_yticklabels([])
1029    ax.set_title(f'{annotation.locus} ({feature_to_plot})', loc='left')
1030
1031    return ax

Plot annotations from bed, gff or gb files. Are automatically mapped to alignment.

Parameters
  • aln: alignment MSA class
  • annotation: annotation class | path to annotation file
  • ax: matplotlib axes
  • feature_to_plot: potential feature to plot (not for bed files as it is parsed as one feature)
  • color: color for the annotation
  • direction_marker_size: marker size for direction marker, only relevant if show_direction is True
  • show_x_label: whether to show the x-axis label :return matplotlib axes
def consensus_plot( aln: msaexplorer.explore.MSA | str, ax: matplotlib.axes._axes.Axes | None = None, threshold: float | None = None, use_ambig_nt: bool = False, color_scheme: str | None = 'standard', mask_color: str = 'dimgrey', ambiguity_color: str = 'black', basic_color: str = 'lightgrey', show_x_label: bool = False, show_name: bool = True, show_sequence: bool = False) -> matplotlib.axes._axes.Axes:
1104def consensus_plot(aln: explore.MSA | str, ax: plt.Axes | None = None, threshold: float | None = None, use_ambig_nt: bool = False,
1105                   color_scheme: str | None = 'standard', mask_color: str = 'dimgrey', ambiguity_color: str = 'black', basic_color: str = 'lightgrey',
1106                   show_x_label: bool = False, show_name: bool = True, show_sequence: bool = False) -> plt.Axes:
1107    """
1108    Plot a consensus sequence as a single-row colored sequence.
1109
1110    :param aln: alignment MSA class or path
1111    :param ax: matplotlib axes
1112    :param threshold: consensus threshold (0-1) or None for majority rule
1113    :param use_ambig_nt: whether to use ambiguous nucleotide codes when building consensus
1114    :param color_scheme: color scheme for characters. see config.CHAR_COLORS for available options or None
1115    :param mask_color: color used for mask characters (N/X)
1116    :param ambiguity_color: color used for ambiguous characters
1117    :param show_x_label: whether to show the x-axis label
1118    :param show_name: whether to show the 'consensus' label at y-axis
1119    :param show_sequence: whether to show the sequence at the y-axis
1120
1121    :return: matplotlib axes
1122    """
1123
1124    # validate input
1125    aln, ax = _validate_input_parameters(aln, ax)
1126    # validate colors
1127    for c in [mask_color, ambiguity_color]:
1128        _validate_color(c)
1129    # validate color scheme
1130    _validate_color_scheme(scheme=color_scheme, aln=aln)
1131
1132    # get consensus
1133    consensus = aln.get_consensus(threshold=threshold, use_ambig_nt=use_ambig_nt)
1134
1135    # prepare color mapping
1136    if color_scheme is not None:
1137        # get mapping from config
1138        char_colors = config.CHAR_COLORS[aln.aln_type][color_scheme]
1139    else:
1140        char_colors = None
1141
1142    # Build polygons and colors for a single PolyCollection
1143    polygons = []
1144    poly_colors = []
1145    text_positions = []
1146    text_chars = []
1147    text_colors = []
1148
1149    zoom_offset = aln.zoom[0] if aln.zoom is not None else 0
1150
1151    y_position = len(aln.alignment) - 0.4
1152
1153    for pos, char in enumerate(consensus):
1154        x = pos + zoom_offset
1155        # determine color
1156        if char_colors is not None and char in char_colors:
1157            color = char_colors[char]
1158        else:
1159            # ambiguous nucleotide/aminoacid codes
1160            if char in config.AMBIG_CHARS[aln.aln_type]:
1161                color = ambiguity_color
1162            elif char in ['N', 'X']:
1163                color = mask_color
1164            else:
1165                color = basic_color
1166
1167        rect = [
1168            (x - 0.5, y_position),
1169            (x + 0.5, y_position),
1170            (x + 0.5, y_position+0.8),
1171            (x - 0.5, y_position+0.8),
1172        ]
1173        polygons.append(rect)
1174        poly_colors.append(color)
1175
1176        if show_sequence:
1177            text_positions.append(x)
1178            text_chars.append(char)
1179            # determine readable text color
1180            text_colors.append(_get_contrast_text_color(to_rgba(color)))
1181
1182    # add single PolyCollection
1183    if polygons:
1184        pc = PolyCollection(polygons, facecolors=poly_colors, edgecolors=poly_colors, linewidths=0.5)
1185        ax.add_collection(pc)
1186
1187    # add texts if requested
1188    if show_sequence:
1189        for x, ch, tc in zip(text_positions, text_chars, text_colors):
1190            ax.text(x, y_position+0.4, ch, ha='center', va='center_baseline', c=tc)
1191
1192    # format axis
1193    _format_x_axis(aln=aln, ax=ax, show_x_label=show_x_label, show_left=False)
1194    ax.set_ylim(y_position-0.1, y_position + 0.9)
1195    if show_name:
1196        ax.yaxis.set_ticks_position('none')
1197        ax.set_yticks([y_position + 0.4])
1198        ax.set_yticklabels(['consensus'])
1199    else:
1200        ax.set_yticks([])
1201
1202    return ax

Plot a consensus sequence as a single-row colored sequence.

Parameters
  • aln: alignment MSA class or path
  • ax: matplotlib axes
  • threshold: consensus threshold (0-1) or None for majority rule
  • use_ambig_nt: whether to use ambiguous nucleotide codes when building consensus
  • color_scheme: color scheme for characters. see config.CHAR_COLORS for available options or None
  • mask_color: color used for mask characters (N/X)
  • ambiguity_color: color used for ambiguous characters
  • show_x_label: whether to show the x-axis label
  • show_name: whether to show the 'consensus' label at y-axis
  • show_sequence: whether to show the sequence at the y-axis
Returns

matplotlib axes