msaexplorer.draw
The draw module
The draw module lets you draw alignments and statistic plots such as SNPs, ORFs, entropy and much more. For each plot a
matplotlib axes has to be passed to the plotting function.
Importantly some of the plotting features can only be accessed for nucleotide alignments but not for amino acid alignments. The functions will raise the appropriate exception in such a case.
Functions
1r""" 2# The draw module 3 4The draw module lets you draw alignments and statistic plots such as SNPs, ORFs, entropy and much more. For each plot a 5`matplotlib axes` has to be passed to the plotting function. 6 7Importantly some of the plotting features can only be accessed for nucleotide alignments but not for amino acid alignments. 8The functions will raise the appropriate exception in such a case. 9 10## Functions 11 12""" 13import pathlib 14# built-in 15from itertools import chain 16from typing import Callable, Dict 17from copy import deepcopy 18import os 19 20import matplotlib 21from numpy import ndarray 22 23# MSAexplorer 24from msaexplorer import explore, config 25 26# libs 27import numpy as np 28import matplotlib.pyplot as plt 29import matplotlib.patches as patches 30from matplotlib.cm import ScalarMappable 31from matplotlib.colors import is_color_like, Normalize, to_rgba, LinearSegmentedColormap 32from matplotlib.collections import PatchCollection, PolyCollection 33from matplotlib.text import TextPath 34from matplotlib.patches import PathPatch 35from matplotlib.font_manager import FontProperties 36from matplotlib.transforms import Affine2D 37 38 39def _validate_input_parameters(aln: explore.MSA | str, ax: plt.Axes, annotation: explore.Annotation | str | None = None) \ 40 -> tuple[explore.MSA, plt.Axes, explore.Annotation] | tuple[explore.MSA, plt.Axes]: 41 """ 42 Validate MSA class and axis. 43 """ 44 # check if alignment is path 45 if type(aln) is str: 46 if os.path.exists(aln): 47 aln = explore.MSA(aln) 48 else: 49 raise FileNotFoundError(f'File {aln} does not exist') 50 # check if alignment is correct type 51 if not isinstance(aln, explore.MSA): 52 raise ValueError('alignment has to be an MSA class. use explore.MSA() to read in alignment') 53 # check if a axis was created 54 # if ax not provided generate one from scratch 55 if ax is None: 56 ax = plt.gca() 57 elif not isinstance(ax, plt.Axes): 58 raise ValueError('ax has to be an matplotlib axis') 59 # check if annotation is correct type 60 if annotation is not None: 61 if type(annotation) is str: 62 if os.path.exists(annotation): 63 # reset zoom so the annotation is correctly parsed 64 msa_temp = deepcopy(aln) 65 msa_temp.zoom = None 66 annotation = explore.Annotation(msa_temp, annotation) 67 else: 68 raise FileNotFoundError() 69 if not isinstance(annotation, explore.Annotation): 70 raise ValueError('annotation has to be an annotation class. use explore.Annotation() to read in annotation') 71 72 if annotation is None: 73 return aln, ax 74 else: 75 return aln, ax, annotation 76 77 78def _validate_color(c): 79 """ 80 validate color and raise error 81 """ 82 if not is_color_like(c): 83 raise ValueError(f'{c} is not a color') 84 85 86def _validate_color_scheme(scheme: str | None, aln: explore.MSA): 87 """ 88 validates colorscheme 89 """ 90 if scheme is not None: 91 if scheme not in config.CHAR_COLORS[aln.aln_type].keys(): 92 raise ValueError(f'Scheme not supported. Supported: {config.CHAR_COLORS[aln.aln_type].keys()}') 93 94 95def _create_color_dictionary(aln: explore.MSA, color_scheme: str, identical_char_color: str | None = None, different_char_color: str | None = None, mask_color: str | None = None, ambiguity_color: str | None = None): 96 """ 97 create the colorscheme dictionary -> this functions adds the respective colors to a dictionary. Some have fixed values and colors of defined 98 schemes are always the idx + 1 99 """ 100 # basic color mapping 101 aln_colors = {} 102 103 if identical_char_color is not None: 104 aln_colors[0] = {'type': 'identical', 'color': identical_char_color} 105 if different_char_color is not None: 106 aln_colors[-1] = {'type': 'different', 'color': different_char_color} 107 if mask_color is not None: 108 aln_colors[-2] = {'type': 'mask', 'color': mask_color} 109 if ambiguity_color is not None: 110 aln_colors[-3] = {'type': 'ambiguity', 'color': ambiguity_color} 111 112 # use the standard setting for the index (same as in aln.calc_identity_alignment) 113 # and map the corresponding color scheme to it 114 if color_scheme is not None: 115 for idx, char in enumerate(config.CHAR_COLORS[aln.aln_type]['standard']): 116 aln_colors[idx + 1] = {'type': char, 'color': config.CHAR_COLORS[aln.aln_type][color_scheme][char]} 117 118 return aln_colors 119 120 121def _format_x_axis(aln: explore.MSA, ax: plt.Axes, show_x_label: bool, show_left: bool): 122 """ 123 General x axis formatting. 124 """ 125 ax.set_xlim( 126 (aln.zoom[0] - 0.5, aln.zoom[0] + aln.length - 0.5) if aln.zoom is not None else (-0.5, aln.length - 0.5) 127 ) 128 ax.spines['top'].set_visible(False) 129 ax.spines['right'].set_visible(False) 130 if show_x_label: 131 ax.set_xlabel('alignment position') 132 if not show_left: 133 ax.spines['left'].set_visible(False) 134 135 136def _seq_names(aln: explore.MSA, ax: plt.Axes, custom_seq_names: tuple, show_seq_names: bool, include_consensus: bool = False): 137 """ 138 Validate custom names and set show names to True. Format axis accordingly. 139 """ 140 if custom_seq_names: 141 show_seq_names = True 142 if not isinstance(custom_seq_names, tuple): 143 raise ValueError('configure your custom names list: custom_names=(name1, name2...)') 144 if len(custom_seq_names) != len(aln.alignment.keys()): 145 raise ValueError('length of sequences not equal to number of custom names') 146 if show_seq_names: 147 ax.yaxis.set_ticks_position('none') 148 ax.set_yticks(np.arange(len(aln.alignment))) 149 if custom_seq_names: 150 names = custom_seq_names[::-1] 151 else: 152 names = [x.split(' ')[0] for x in list(aln.alignment.keys())[::-1]] 153 if include_consensus: 154 names = names + ['consensus'] 155 y_ticks = np.arange(len(aln.alignment) + 1) 156 else: 157 y_ticks = np.arange(len(aln.alignment)) 158 ax.set_yticks(y_ticks) 159 ax.set_yticklabels(names) 160 else: 161 ax.set_yticks([]) 162 163 164def _create_legend(color_scheme: str, aln_colors: dict, aln: explore.MSA, detected_identity_values: set, ax: plt.Axes, bbox_to_anchor: tuple | list): 165 """ 166 create the legend for the alignment and identity alignment plot 167 """ 168 if color_scheme is not None and color_scheme != 'standard': 169 for x in aln_colors: 170 for group in config.CHAR_GROUPS[aln.aln_type][color_scheme]: 171 if aln_colors[x]['type'] in config.CHAR_GROUPS[aln.aln_type][color_scheme][group]: 172 aln_colors[x]['type'] = group 173 break 174 # create it 175 handels, labels, detected_groups = [], [], set() 176 for x in aln_colors: 177 if x in detected_identity_values and aln_colors[x]['type'] not in detected_groups: 178 handels.append( 179 ax.add_line( 180 plt.Line2D( 181 [], 182 [], 183 color=aln_colors[x]['color'] if color_scheme != 'hydrophobicity' or x == 0 else 184 config.CHAR_COLORS[aln.aln_type]['hydrophobicity'][ 185 config.CHAR_GROUPS[aln.aln_type]['hydrophobicity'][aln_colors[x]['type']][0] 186 ], 187 marker='s', 188 markeredgecolor='grey', 189 linestyle='', 190 markersize=10)) 191 ) 192 labels.append(aln_colors[x]['type']) 193 detected_groups.add(aln_colors[x]['type']) 194 195 # ncols 196 if color_scheme is None or aln.aln_type != 'AA': 197 ncols = len(detected_identity_values) 198 elif color_scheme == 'standard': 199 ncols = (len(detected_identity_values) + 1) / 2 200 else: 201 ncols = (len(detected_groups) + 1) / 2 202 203 # plot it 204 ax.legend( 205 handels, 206 labels, 207 loc='lower right', 208 bbox_to_anchor=bbox_to_anchor, 209 ncols=ncols, 210 frameon=False 211 ) 212 213 214def _get_contrast_text_color(rgba_color): 215 """ 216 compute the brightness of a color 217 """ 218 r, g, b, a = rgba_color 219 brightness = (r * 299 + g * 587 + b * 114) / 1000 220 221 return 'white' if brightness < 0.5 else 'black' 222 223 224def _create_alignment(aln: explore.MSA, ax: plt.Axes, matrix: ndarray, aln_colors: dict | ScalarMappable, fancy_gaps: bool, create_identity_patch: bool, 225 show_gaps: bool, show_different_sequence: bool, show_sequence_all: bool, reference_color: str | None, values_to_plot: list, 226 identical_value: int | float = 0): 227 """ 228 create the polygon patch collection for an alignment 229 """ 230 231 # helper functions 232 def _create_identity_patch(row, aln: explore.MSA, col: list, zoom: tuple[int, int], y_position: float | int, 233 reference_color: str | None, seq_name: str, identity_color: str | ndarray, 234 show_gaps: bool): 235 """ 236 Creates the initial patch and adds it to the collection. 237 """ 238 239 color = reference_color if seq_name == aln.reference_id and reference_color is not None else identity_color 240 241 if show_gaps: 242 # plot a rectangle for parts that do not have gaps 243 for stretch in _find_stretches(row, True): 244 col.append( 245 patches.Rectangle( 246 (stretch[0] + zoom[0] - 0.5, y_position), stretch[1] - stretch[0] + 1, 0.8, 247 facecolor=color, 248 edgecolor=color, 249 linewidth=0.5 250 ) 251 ) 252 # just plot a rectangle 253 else: 254 col.append( 255 patches.Rectangle( 256 (zoom[0] - 0.5, y_position), zoom[1] - zoom[0], 0.8, 257 facecolor=color, 258 edgecolor=color, 259 linewidth=0.5 260 ) 261 ) 262 263 return col 264 265 def _find_stretches(row, non_nan_only=False) -> list[tuple[int, int, int]] | list[tuple[int, int]]: 266 """ 267 Finds consecutive stretches of values in an array, with an option to exclude NaN stretches and return start, end, value at start 268 """ 269 if row.size == 0: 270 return [] 271 272 if non_nan_only: 273 # Create a boolean mask for non-NaN values 274 non_nan_mask = ~np.isnan(row) 275 # Find changes in the mask 276 changes = np.diff(non_nan_mask.astype(int)) != 0 277 change_idx = np.nonzero(changes)[0] 278 starts = np.concatenate(([0], change_idx + 1)) 279 ends = np.concatenate((change_idx, [len(row) - 1])) 280 281 # Return only stretches that start with non-NaN values 282 return [(start, end) for start, end in zip(starts, ends) if non_nan_mask[start]] 283 284 else: 285 # Find change points: where adjacent cells differ. 286 changes = np.diff(row) != 0 287 change_idx = np.nonzero(changes)[0] 288 starts = np.concatenate(([0], change_idx + 1)) 289 ends = np.concatenate((change_idx, [len(row) - 1])) 290 291 return [(start, end, row[start]) for start, end in zip(starts, ends)] 292 293 def _create_polygons(stretches: list, values_to_plot: list | ndarray, zoom: tuple, y_position: int | float, 294 polygons: list, aln_colors: dict | ScalarMappable, polygon_colors: list, 295 detected_identity_values: set | None = None): 296 """ 297 create the individual polygons for the heatmap (do not plot each cell but pre-compute if cells are the same and adjacent to each other) 298 """ 299 300 for start, end, value in stretches: 301 # check if this is a value for which we might want to create a polygon 302 # e.g. we might not want to create a polygon for identical values 303 if value not in values_to_plot: 304 continue 305 # add values to this set - > this is important for correct legend creation 306 if detected_identity_values is not None: 307 detected_identity_values.add(value) 308 width = end + 1 - start 309 # Calculate x coordinates adjusted for zoom and centering 310 x0 = start + zoom[0] - 0.5 311 x1 = x0 + width 312 # Define the rectangle corners 313 rect_coords = [ 314 (x0, y_position), 315 (x1, y_position), 316 (x1, y_position + 0.8), 317 (x0, y_position + 0.8), 318 (x0, y_position) 319 ] 320 polygons.append(rect_coords) 321 if type(aln_colors) != ScalarMappable: 322 polygon_colors.append(aln_colors[value]['color']) 323 else: 324 polygon_colors.append(aln_colors.to_rgba(value)) 325 326 return detected_identity_values, polygons, polygon_colors 327 328 def _plot_sequence_text(aln: explore.MSA, seq_name: str, ref_name: str | None, always_text: bool, values: ndarray, 329 matrix: ndarray, ax: plt.Axes, zoom: tuple, y_position: int | float, value_to_skip: int | None, 330 ref_color: str, show_gaps: bool, aln_colors: dict | ScalarMappable = None): 331 """ 332 Plot sequence text - however this will be done even if there is not enough space. 333 """ 334 x_text = 0 335 if seq_name == ref_name and ref_color is not None: 336 different_cols = np.any((matrix != value_to_skip) & ~np.isnan(matrix), axis=0) 337 else: 338 different_cols = [False] * aln.length 339 340 for idx, (character, value) in enumerate(zip(aln.alignment[seq_name], values)): 341 if value != value_to_skip and character != '-' or seq_name == ref_name and character != '-' or character == '-' and not show_gaps or always_text and character != '-': 342 if seq_name == ref_name and ref_color is not None: 343 text_color = _get_contrast_text_color(to_rgba(ref_color)) 344 elif type(aln_colors) is ScalarMappable: 345 text_color = _get_contrast_text_color(aln_colors.to_rgba(value)) 346 else: 347 text_color = _get_contrast_text_color(to_rgba(aln_colors[value]['color'])) 348 349 ax.text( 350 x=x_text + zoom[0] if zoom is not None else x_text, 351 y=y_position + 0.4, 352 s=character, 353 fontweight='bold' if different_cols[idx] else 'normal', 354 ha='center', 355 va='center_baseline', 356 c=text_color if value != value_to_skip or seq_name == ref_name and ref_color is not None else 'dimgrey' 357 ) 358 x_text += 1 359 360 # List to store polygons 361 detected_identity_values = {0} 362 polygons, polygon_colors, patch_list = [], [], [] 363 # determine zoom 364 zoom = (0, aln.length) if aln.zoom is None else aln.zoom 365 for i, seq_name in enumerate(aln.alignment): 366 # define initial y position 367 y_position = len(aln.alignment) - i - 1.4 368 # now plot relevant stuff for the current row 369 row = matrix[i] 370 # plot a line below everything for fancy gaps 371 if fancy_gaps: 372 ax.hlines( 373 y_position + 0.4, 374 xmin=zoom[0] - 0.5, 375 xmax=zoom[1] + 0.5, 376 color='black', 377 linestyle='-', 378 zorder=0, 379 linewidth=0.75 380 ) 381 # plot the basic shape per sequence with gaps 382 if create_identity_patch: 383 if type(aln_colors) == dict: 384 identity_color = aln_colors[0]['color'] 385 else: 386 identity_color = aln_colors.to_rgba(1) # for similarity alignment 387 patch_list = _create_identity_patch(row, aln, patch_list, zoom, y_position, reference_color, seq_name, identity_color, show_gaps) 388 # find consecutive stretches 389 stretches = _find_stretches(row) 390 # create polygons per stretch 391 detected_identity_values, polygons, polygon_colors = _create_polygons( 392 stretches=stretches, values_to_plot=values_to_plot, zoom=zoom, y_position=y_position, polygons=polygons, 393 aln_colors=aln_colors, polygon_colors=polygon_colors, detected_identity_values=detected_identity_values 394 ) 395 # add sequence text 396 if show_different_sequence or show_sequence_all: 397 _plot_sequence_text( 398 aln=aln, seq_name=list(aln.alignment.keys())[i], ref_name=aln.reference_id, always_text=show_sequence_all, 399 values=matrix[i], matrix=matrix, ax=ax, zoom=zoom, y_position=y_position, value_to_skip=identical_value, ref_color=reference_color, 400 show_gaps=show_gaps, aln_colors=aln_colors 401 ) 402 # Create the LineCollection: each segment is drawn in a single call. 403 ax.add_collection(PatchCollection(patch_list, match_original=True, linewidths='none', joinstyle='miter', capstyle='butt')) 404 # NOTE: linewidth are important so in large overview plot the differences are still visible 405 ax.add_collection(PolyCollection(polygons, facecolors=polygon_colors, linewidths=0.5, edgecolors=polygon_colors)) 406 407 408 return detected_identity_values 409 410 411def alignment(aln: explore.MSA | str, ax: plt.Axes | None = None, show_sequence_all: bool = False, show_seq_names: bool = False, 412 custom_seq_names: tuple | list = (), mask_color: str = 'dimgrey', ambiguity_color: str = 'black', basic_color: str = 'lightgrey', 413 show_mask:bool = True, fancy_gaps:bool = False, show_ambiguities: bool = False, color_scheme: str | None = 'standard', 414 show_x_label: bool = True, show_legend: bool = False, bbox_to_anchor: tuple[float|int, float|int] | list= (1, 1), 415 show_consensus: bool = False) -> plt.Axes: 416 """ 417 418 Plot an alignment with each character colored as defined in the scheme. This is computationally more intensive as 419 the identity alignments and similarity alignment function as each square for each character is individually plotted. 420 421 :param aln: alignment MSA class or path 422 :param ax: matplotlib axes 423 :param show_seq_names: whether to show seq names 424 :param show_sequence_all: whether to show all sequences - zoom in to avoid plotting issues 425 :param custom_seq_names: custom seq names 426 :param mask_color: color for masked nucleotides/aminoacids 427 :param ambiguity_color: color for ambiguous nucleotides/aminoacids 428 :param basic_color: color that will be used for all normal chars if the colorscheme is None 429 :param show_mask: whether to show N or X chars otherwise it will be shown as match or mismatch 430 :param fancy_gaps: show gaps with a small black bar 431 :param show_ambiguities: whether to show non-N ambiguities -> only relevant for RNA/DNA sequences 432 :param color_scheme: color mismatching chars with their unique color. Options for DNA/RNA are: standard, purine_pyrimidine, strong_weak; and for AS: standard, clustal, zappo, hydrophobicity. Will overwrite different_char_color. 433 :param show_x_label: whether to show x label 434 :param show_legend: whether to show the legend 435 :param bbox_to_anchor: bounding box coordinates for the legend - see: https://matplotlib.org/stable/api/legend_api.html 436 :param show_consensus: whether to show the majority consensus sequence (standard-color scheme) 437 :return matplotlib axis 438 """ 439 # Validate aln, ax inputs 440 aln, ax = _validate_input_parameters(aln=aln, ax=ax) 441 # Validate colors 442 for c in [mask_color, ambiguity_color, basic_color]: 443 _validate_color(c) 444 # Validate color scheme 445 _validate_color_scheme(scheme=color_scheme, aln=aln) 446 # create color mapping 447 aln_colors = _create_color_dictionary( 448 aln=aln, color_scheme=color_scheme, identical_char_color=basic_color, 449 different_char_color=None, mask_color=mask_color, ambiguity_color=ambiguity_color 450 ) 451 # Compute identity alignment 452 numerical_alignment = aln.calc_numerical_alignment(encode_ambiguities=show_ambiguities, encode_mask=show_mask) 453 if color_scheme is None: 454 numerical_alignment[np.where(numerical_alignment > 0)] = 0 455 # create the alignment 456 detected_identity_values = _create_alignment( 457 aln=aln, ax=ax, matrix=numerical_alignment, fancy_gaps=fancy_gaps, create_identity_patch=True if color_scheme is None else False, show_gaps=True, 458 show_different_sequence=False, show_sequence_all=show_sequence_all, reference_color=None, aln_colors=aln_colors, 459 values_to_plot = list(aln_colors.keys()) 460 ) 461 # custom legend 462 if show_legend: 463 aln_colors.pop(0) # otherwise legend is wrong (here there is no check if a position is identical or not) 464 _create_legend( 465 color_scheme=color_scheme, aln_colors=aln_colors, aln=aln, 466 detected_identity_values=detected_identity_values, 467 ax=ax, bbox_to_anchor=bbox_to_anchor 468 ) 469 if show_consensus: 470 consensus_plot(aln=aln, ax=ax, show_x_label=show_x_label, show_name=False, show_sequence=show_sequence_all, 471 color_scheme='standard', basic_color=basic_color, mask_color=mask_color, ambiguity_color=ambiguity_color 472 ) 473 ax.set_ylim(-0.5, len(aln.alignment) + 1) 474 else: 475 ax.set_ylim(-0.5, len(aln.alignment)) 476 477 _seq_names(aln=aln, ax=ax, custom_seq_names=custom_seq_names, show_seq_names=show_seq_names, include_consensus=show_consensus) 478 # configure axis 479 _format_x_axis(aln=aln, ax=ax, show_x_label=show_x_label, show_left=False) 480 481 return ax 482 483 484 485def identity_alignment(aln: explore.MSA | str, ax: plt.Axes | None = None, show_title: bool = True, show_identity_sequence: bool = False, 486 show_sequence_all: bool = False, show_seq_names: bool = False, custom_seq_names: tuple | list = (), 487 reference_color: str = 'lightsteelblue', basic_color: str = 'lightgrey', different_char_color: str = 'peru', 488 mask_color: str = 'dimgrey', ambiguity_color: str = 'black', show_mask:bool = True, show_gaps:bool = True, 489 fancy_gaps:bool = False, show_mismatches: bool = True, show_ambiguities: bool = False, 490 color_scheme: str | None = None, show_x_label: bool = True, show_legend: bool = False, 491 bbox_to_anchor: tuple[float|int, float|int] | list = (1, 1), show_consensus: bool = False) -> plt.Axes: 492 """ 493 Generates an identity alignment plot. 494 :param aln: alignment MSA class or path 495 :param ax: matplotlib axes 496 :param show_title: whether to show title 497 :param show_seq_names: whether to show seq names 498 :param show_identity_sequence: whether to show sequence for only differences and reference - zoom in to avoid plotting issues 499 :param show_sequence_all: whether to show all sequences - zoom in to avoid plotting issues 500 :param custom_seq_names: custom seq names 501 :param reference_color: color of reference sequence 502 :param basic_color: color for identical nucleotides/aminoacids 503 :param different_char_color: color for different nucleotides/aminoacids 504 :param mask_color: color for masked nucleotides/aminoacids 505 :param ambiguity_color: color for ambiguous nucleotides/aminoacids 506 :param show_mask: whether to show N or X chars otherwise it will be shown as match or mismatch 507 :param show_gaps: whether to show gaps otherwise it will be shown as match or mismatch 508 :param fancy_gaps: show gaps with a small black bar 509 :param show_mismatches: whether to show mismatches otherwise it will be shown as match 510 :param show_ambiguities: whether to show non-N ambiguities -> only relevant for RNA/DNA sequences 511 :param color_scheme: color mismatching chars with their unique color. Options for DNA/RNA are: standard, purine_pyrimidine, strong_weak; and for AS: standard, clustal, zappo, hydrophobicity. Will overwrite different_char_color. 512 :param show_x_label: whether to show x label 513 :param show_legend: whether to show the legend 514 :param bbox_to_anchor: bounding box coordinates for the legend - see: https://matplotlib.org/stable/api/legend_api.html 515 :param show_consensus: whether to show the majority consensus sequence (standard-color scheme) 516 517 :return matplotlib axis 518 """ 519 520 # Both options for gaps work hand in hand 521 if fancy_gaps: 522 show_gaps = True 523 # Validate aln, ax inputs 524 aln, ax = _validate_input_parameters(aln=aln, ax=ax) 525 # Validate colors 526 for c in [reference_color, basic_color, different_char_color, mask_color, ambiguity_color]: 527 _validate_color(c) 528 # Validate color scheme 529 _validate_color_scheme(scheme=color_scheme, aln=aln) 530 # create color mapping 531 aln_colors = _create_color_dictionary( 532 aln=aln, color_scheme=color_scheme, identical_char_color=basic_color, different_char_color=different_char_color, 533 mask_color=mask_color, ambiguity_color=ambiguity_color 534 ) 535 # Compute identity alignment 536 identity_aln = aln.calc_identity_alignment( 537 encode_mask=show_mask, encode_gaps=show_gaps, encode_mismatches=show_mismatches,encode_ambiguities=show_ambiguities, 538 encode_each_mismatch_char=True if color_scheme is not None else False 539 ) 540 # create the alignment 541 detected_identity_values = _create_alignment( 542 aln=aln, ax=ax, matrix=identity_aln, fancy_gaps=fancy_gaps, create_identity_patch=True, show_gaps=show_gaps, 543 show_different_sequence=show_identity_sequence, show_sequence_all=show_sequence_all, reference_color=reference_color, 544 aln_colors=aln_colors, values_to_plot=list(aln_colors.keys())[1:] 545 ) 546 # custom legend 547 if show_legend: 548 _create_legend( 549 color_scheme=color_scheme, aln_colors=aln_colors, aln=aln, detected_identity_values=detected_identity_values, 550 ax=ax, bbox_to_anchor=bbox_to_anchor 551 ) 552 _seq_names(aln=aln, ax=ax, custom_seq_names=custom_seq_names, show_seq_names=show_seq_names) 553 # configure axis 554 if show_consensus: 555 consensus_plot(aln=aln, ax=ax, show_x_label=show_x_label, show_name=False, show_sequence=any([show_sequence_all, show_identity_sequence]), 556 color_scheme='standard', basic_color=basic_color, mask_color=mask_color, 557 ambiguity_color=ambiguity_color 558 ) 559 ax.set_ylim(-0.5, len(aln.alignment) + 1) 560 else: 561 ax.set_ylim(-0.5, len(aln.alignment)) 562 563 _seq_names(aln=aln, ax=ax, custom_seq_names=custom_seq_names, show_seq_names=show_seq_names, 564 include_consensus=show_consensus) 565 if show_title: 566 ax.set_title('identity', loc='left') 567 _format_x_axis(aln=aln, ax=ax, show_x_label=show_x_label, show_left=False) 568 569 return ax 570 571 572def similarity_alignment(aln: explore.MSA | str, ax: plt.Axes | None = None, matrix_type: str | None = None, show_title: bool = True, 573 show_similarity_sequence: bool = False, show_sequence_all: bool = False, show_seq_names: bool = False, 574 custom_seq_names: tuple | list = (), reference_color: str = 'lightsteelblue', different_char_color: str = 'peru', basic_color: str = 'lightgrey', 575 show_gaps:bool = True, fancy_gaps:bool = False, show_x_label: bool = True, show_cbar: bool = False, 576 cbar_fraction: float = 0.1, show_consensus: bool = False) -> plt.Axes: 577 """ 578 Generates a similarity alignment plot. Importantly the similarity values are normalized! 579 :param aln: alignment MSA class or path 580 :param ax: matplotlib axes 581 :param matrix_type: substitution matrix - see config.SUBS_MATRICES, standard: NT - TRANS, AA - BLOSUM65 582 :param show_title: whether to show title 583 :param show_similarity_sequence: whether to show sequence only for differences and reference - zoom in to avoid plotting issues 584 :param show_sequence_all: whether to show all sequences - zoom in to avoid plotting issues 585 :param show_seq_names: whether to show seq names 586 :param custom_seq_names: custom seq names 587 :param reference_color: color of reference sequence 588 :param different_char_color: color for the lowest similarity 589 :param basic_color: basic color for the highest similarity 590 :param show_gaps: whether to show gaps otherwise it will be ignored 591 :param fancy_gaps: show gaps with a small black bar 592 :param show_x_label: whether to show x label 593 :param show_cbar: whether to show the legend - see https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.colorbar.html 594 :param cbar_fraction: fraction of the original ax reserved for the legend 595 :param show_consensus: whether to show the majority consensus sequence (standard color scheme, no specical handling for special characters) 596 :return matplotlib axis 597 """ 598 # Both options for gaps work hand in hand 599 if fancy_gaps: 600 show_gaps = True 601 # Validate aln, ax inputs 602 aln, ax = _validate_input_parameters(aln=aln, ax=ax) 603 # Validate colors 604 for c in [reference_color, different_char_color, basic_color]: 605 _validate_color(c) 606 # Compute similarity alignment 607 similarity_aln = aln.calc_similarity_alignment(matrix_type=matrix_type) # use normalized values here 608 similarity_aln = similarity_aln.round(2) # round data for good color mapping 609 # create cmap 610 cmap = LinearSegmentedColormap.from_list( 611 "extended", 612 [ 613 (0.0, different_char_color), 614 (1.0, basic_color) 615 ], 616 ) 617 cmap = ScalarMappable(norm=Normalize(vmin=0, vmax=1), cmap=cmap) 618 # create similarity values 619 similarity_values = np.arange(start=0, stop=1, step=0.01) 620 # round it to be absolutely sure that values match with rounded sim alignment 621 similarity_values = list(similarity_values.round(2)) 622 # create the alignment 623 _create_alignment( 624 aln=aln, ax=ax, matrix=similarity_aln, fancy_gaps=fancy_gaps, create_identity_patch=True, show_gaps=show_gaps, 625 show_different_sequence=show_similarity_sequence, show_sequence_all=show_sequence_all, reference_color=reference_color, 626 aln_colors=cmap, values_to_plot=similarity_values, identical_value=1 627 ) 628 # legend 629 if show_cbar: 630 cbar = plt.colorbar(cmap, ax=ax, location= 'top', anchor=(1,0), shrink=0.2, pad=2/ax.bbox.height, fraction=cbar_fraction) 631 cbar.set_ticks([0, 1]) 632 cbar.set_ticklabels(['low', 'high']) 633 634 # format seq names 635 _seq_names(aln, ax, custom_seq_names, show_seq_names) 636 637 # configure axis 638 if show_consensus: 639 consensus_plot(aln=aln, ax=ax, show_x_label=show_x_label, show_name=False, show_sequence=any([show_sequence_all, show_similarity_sequence]), 640 color_scheme='standard', basic_color=basic_color) 641 ax.set_ylim(-0.5, len(aln.alignment) + 1) 642 else: 643 ax.set_ylim(-0.5, len(aln.alignment)) 644 645 _seq_names(aln=aln, ax=ax, custom_seq_names=custom_seq_names, show_seq_names=show_seq_names, 646 include_consensus=show_consensus) 647 if show_title: 648 ax.set_title('similarity', loc='left') 649 _format_x_axis(aln, ax, show_x_label, show_left=False) 650 651 return ax 652 653 654def _moving_average(arr: ndarray, window_size: int, zoom: tuple | None, aln_length: int) -> tuple[ndarray, ndarray]: 655 """ 656 Calculate the moving average of an array. 657 :param arr: array with values 658 :param window_size: size of the moving average 659 :param zoom: zoom of the alignment 660 :param aln_length: length of the alignment 661 :return: new array with moving average 662 """ 663 if window_size > 1: 664 i = 0 665 moving_averages, plotting_idx = [], [] 666 while i < len(arr) + 1: 667 half_window_size = window_size // 2 668 window_left = arr[i - half_window_size : i] if i > half_window_size else arr[0:i] 669 window_right = arr[i: i + half_window_size] if i < len(arr) - half_window_size else arr[i: len(arr)] 670 moving_averages.append((sum(window_left) + sum(window_right)) / (len(window_left) + len(window_right))) 671 plotting_idx.append(i) 672 i += 1 673 674 return np.array(moving_averages), np.array(plotting_idx) if zoom is None else np.array(plotting_idx) + zoom[0] 675 else: 676 return arr, np.arange(zoom[0], zoom[1]) if zoom is not None else np.arange(aln_length) 677 678 679def stat_plot(aln: explore.MSA | str, stat_type: str, ax: plt.Axes | None = None, line_color: str = 'burlywood', 680 line_width: int | float = 2, rolling_average: int = 20, show_x_label: bool = False, show_title: bool = True) -> plt.Axes: 681 """ 682 Generate a plot for the various alignment stats. 683 :param aln: alignment MSA class or path 684 :param ax: matplotlib axes 685 :param stat_type: 'entropy', 'gc', 'coverage', 'ts tv score', 'identity' or 'similarity' -> (here default matrices are used NT - TRANS, AA - BLOSUM65) 686 :param line_color: color of the line 687 :param line_width: width of the line 688 :param rolling_average: average rolling window size left and right of a position in nucleotides or amino acids 689 :param show_x_label: whether to show the x-axis label 690 :param show_title: whether to show the title 691 :return matplotlib axes 692 """ 693 694 # input check 695 aln, ax = _validate_input_parameters(aln, ax) 696 697 # define possible functions to calc here 698 stat_functions: Dict[str, Callable[[], list | ndarray]] = { 699 'gc': aln.calc_gc, 700 'entropy': aln.calc_entropy, 701 'coverage': aln.calc_coverage, 702 'identity': aln.calc_identity_alignment, 703 'similarity': aln.calc_similarity_alignment, 704 'ts tv score': aln.calc_transition_transversion_score 705 } 706 707 if stat_type not in stat_functions: 708 raise ValueError('stat_type must be one of {}'.format(list(stat_functions.keys()))) 709 710 _validate_color(line_color) 711 if rolling_average < 1 or rolling_average > aln.length: 712 raise ValueError('rolling_average must be between 1 and length of sequence') 713 714 # generate input data 715 array = stat_functions[stat_type]() 716 717 if stat_type == 'identity': 718 min_value, max_value = -1, 0 719 elif stat_type == 'ts tv score': 720 min_value, max_value = -1, 1 721 else: 722 min_value, max_value = 0, 1 723 if stat_type in ['identity', 'similarity']: 724 # for the mean nan values get handled as the lowest possible number in the matrix 725 array = np.nan_to_num(array, True, min_value) 726 array = np.mean(array, axis=0) 727 data, plot_idx = _moving_average(array, rolling_average, aln.zoom, aln.length) 728 729 # plot the data 730 ax.fill_between( 731 # this add dummy data left and right for better plotting 732 # otherwise only half of the step is shown 733 np.concatenate(([plot_idx[0] - 0.5], plot_idx, [plot_idx[-1] + 0.5])) if rolling_average == 1 else plot_idx, 734 np.concatenate(([data[0]], data, [data[-1]])) if rolling_average == 1 else data, 735 min_value, 736 linewidth = line_width, 737 edgecolor=line_color, 738 step='mid' if rolling_average == 1 else None, 739 facecolor=(line_color, 0.6) if stat_type not in ['ts tv score', 'gc'] else 'none' 740 ) 741 if stat_type == 'gc': 742 ax.hlines(0.5, xmin=0, xmax=aln.zoom[0] + aln.length if aln.zoom is not None else aln.length, color='black', linestyles='--', linewidth=1) 743 744 # format axis 745 ax.set_ylim(min_value, max_value*0.1+max_value) 746 ax.set_yticks([min_value, max_value]) 747 if stat_type == 'gc': 748 ax.set_yticklabels(['0', '100']) 749 elif stat_type == 'ts tv score': 750 ax.set_yticklabels(['tv', 'ts']) 751 else: 752 ax.set_yticklabels(['low', 'high']) 753 754 # show title 755 if show_title: 756 ax.set_title( 757 f'{stat_type} (average over {rolling_average} positions)' if rolling_average > 1 else f'{stat_type} for each position', 758 loc='left' 759 ) 760 761 _format_x_axis(aln, ax, show_x_label, show_left=True) 762 763 return ax 764 765 766def variant_plot(aln: explore.MSA | str, ax: plt.Axes | None = None, lollisize: tuple[int, int] | list = (1, 3), 767 color_scheme: str = 'standard', show_x_label: bool = False, show_legend: bool = True, 768 bbox_to_anchor: tuple[float|int, float|int] | list = (1, 1)) -> plt.Axes: 769 """ 770 Plots variants. 771 :param aln: alignment MSA class or path 772 :param ax: matplotlib axes 773 :param lollisize: (stem_size, head_size) 774 :param color_scheme: color scheme for characters. see config.CHAR_COLORS for available options 775 :param show_x_label: whether to show the x-axis label 776 :param show_legend: whether to show the legend 777 :param bbox_to_anchor: bounding box coordinates for the legend - see: https://matplotlib.org/stable/api/legend_api.html 778 :return matplotlib axes 779 """ 780 781 # validate input 782 aln, ax = _validate_input_parameters(aln, ax) 783 _validate_color_scheme(color_scheme, aln) 784 if not isinstance(lollisize, tuple) or len(lollisize) != 2: 785 raise ValueError('lollisize must be tuple of length 2 (stem, head)') 786 for _size in lollisize: 787 if not isinstance(_size, float | int) or _size <= 0: 788 raise ValueError('lollisize must be floats greater than zero') 789 790 # define colors 791 colors = config.CHAR_COLORS[aln.aln_type][color_scheme] 792 # get snps 793 snps = aln.get_snps() 794 # define where to plot (each ref type gets a separate line) 795 ref_y_positions, y_pos, detected_var = {}, 0, set() 796 797 # iterate over snp dict 798 for pos in snps['POS']: 799 for identifier in snps['POS'][pos]: 800 # fill in y pos dict 801 if identifier == 'ref': 802 if snps['POS'][pos]['ref'] not in ref_y_positions: 803 ref_y_positions[snps['POS'][pos]['ref']] = y_pos 804 y_pos += 1.1 805 continue 806 # plot 807 if identifier == 'ALT': 808 for alt in snps['POS'][pos]['ALT']: 809 ax.vlines(x=pos + aln.zoom[0] if aln.zoom is not None else pos, 810 ymin=ref_y_positions[snps['POS'][pos]['ref']], 811 ymax=ref_y_positions[snps['POS'][pos]['ref']] + snps['POS'][pos]['ALT'][alt]['AF'], 812 color=colors[alt], 813 zorder=100, 814 linewidth=lollisize[0] 815 ) 816 ax.plot(pos + aln.zoom[0] if aln.zoom is not None else pos, 817 ref_y_positions[snps['POS'][pos]['ref']] + snps['POS'][pos]['ALT'][alt]['AF'], 818 color=colors[alt], 819 marker='o', 820 markersize=lollisize[1] 821 ) 822 detected_var.add(alt) 823 824 # plot hlines 825 for y_char in ref_y_positions: 826 ax.hlines( 827 ref_y_positions[y_char], 828 xmin=aln.zoom[0] - 0.5 if aln.zoom is not None else -0.5, 829 xmax=aln.zoom[0] + aln.length + 0.5 if aln.zoom is not None else aln.length + 0.5, 830 color='black', 831 linestyle='-', 832 zorder=0, 833 linewidth=0.75 834 ) 835 # create a custom legend 836 if show_legend: 837 custom_legend = [ 838 ax.add_line( 839 plt.Line2D( 840 [], 841 [], 842 color=colors[char], 843 marker='o', 844 linestyle='', 845 markersize=5 846 ) 847 ) for char in colors if char in detected_var 848 ] 849 ax.legend( 850 custom_legend, 851 [char for char in colors if char in detected_var], # ensures correct sorting 852 loc='lower right', 853 title='variant', 854 bbox_to_anchor=bbox_to_anchor, 855 ncols=len(detected_var)/2 if aln.aln_type == 'AA' else len(detected_var), 856 frameon=False 857 ) 858 859 # format axis 860 _format_x_axis(aln, ax, show_x_label, show_left=False) 861 ax.spines['left'].set_visible(False) 862 ax.set_yticks([ref_y_positions[x] for x in ref_y_positions]) 863 ax.set_yticklabels(ref_y_positions.keys()) 864 ax.set_ylim(0, y_pos) 865 ax.set_ylabel('reference') 866 867 return ax 868 869 870 871def _plot_annotation(annotation_dict: dict, ax: plt.Axes, direction_marker_size: int | None, color: str | ScalarMappable): 872 """ 873 Plot annotation rectangles 874 """ 875 for annotation in annotation_dict: 876 for locations in annotation_dict[annotation]['location']: 877 x_value = locations[0] 878 length = locations[1] - locations[0] 879 ax.add_patch( 880 patches.FancyBboxPatch( 881 (x_value, annotation_dict[annotation]['track'] + 1), 882 length, 883 0.8, 884 boxstyle="Round, pad=0", 885 ec="black", 886 fc=color.to_rgba(annotation_dict[annotation]['conservation']) if isinstance(color, ScalarMappable) else color, 887 ) 888 ) 889 if direction_marker_size is not None: 890 if annotation_dict[annotation]['strand'] == '-': 891 marker = '<' 892 else: 893 marker = '>' 894 ax.plot(x_value + length/2, annotation_dict[annotation]['track'] + 1.4, marker=marker, markersize=direction_marker_size, color='white', markeredgecolor='black') 895 896 # plot linked annotations (such as splicing) 897 if len(annotation_dict[annotation]['location']) > 1: 898 y_value = annotation_dict[annotation]['track'] + 1.4 899 start = None 900 for locations in annotation_dict[annotation]['location']: 901 if start is None: 902 start = locations[1] 903 continue 904 ax.plot([start, locations[0]], [y_value, y_value], '--', linewidth=2, color='black') 905 start = locations[1] 906 907 908def _add_track_positions(annotation_dic): 909 """ 910 define the position for annotations square so that overlapping annotations do not overlap in the plot 911 """ 912 # create a dict and sort 913 annotation_dic = dict(sorted(annotation_dic.items(), key=lambda x: x[1]['location'][0][0])) 914 915 # remember for each track the largest stop 916 track_stops = [0] 917 918 for ann in annotation_dic: 919 flattened_locations = list(chain.from_iterable(annotation_dic[ann]['location'])) # flatten list 920 track = 0 921 # check if a start of a gene is smaller than the stop of the current track 922 # -> move to new track 923 while flattened_locations[0] < track_stops[track]: 924 track += 1 925 # if all prior tracks are potentially causing an overlap 926 # create a new track and break 927 if len(track_stops) <= track: 928 track_stops.append(0) 929 break 930 # in the current track remember the stop of the current gene 931 track_stops[track] = flattened_locations[-1] 932 # and indicate the track in the dict 933 annotation_dic[ann]['track'] = track 934 935 return annotation_dic 936 937 938def orf_plot(aln: explore.MSA | str, ax: plt.Axes | None = None, min_length: int = 500, non_overlapping_orfs: bool = True, 939 cmap: str = 'Blues', direction_marker_size: int | None = 5, show_x_label: bool = False, show_cbar: bool = False, 940 cbar_fraction: float = 0.1) -> plt.Axes: 941 """ 942 Plot conserved ORFs. 943 :param aln: alignment MSA class or path 944 :param ax: matplotlib axes 945 :param min_length: minimum length of orf 946 :param non_overlapping_orfs: whether to consider overlapping orfs 947 :param cmap: color mapping for % identity - see https://matplotlib.org/stable/users/explain/colors/colormaps.html 948 :param direction_marker_size: marker size for direction marker, not shown if marker_size == None 949 :param show_x_label: whether to show the x-axis label 950 :param show_cbar: whether to show the colorbar - see https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.colorbar.html 951 :param cbar_fraction: fraction of the original ax reserved for the colorbar 952 :return matplotlib axes 953 """ 954 955 # normalize colorbar 956 cmap = ScalarMappable(norm=Normalize(0, 100), cmap=plt.get_cmap(cmap)) 957 # validate input 958 aln, ax = _validate_input_parameters(aln, ax) 959 960 # get orfs --> first deepcopy and reset zoom that the orfs are also zoomed in (by default, the orfs are only 961 # searched within the zoomed region) 962 aln_temp = deepcopy(aln) 963 aln_temp.zoom = None 964 if non_overlapping_orfs: 965 annotation_dict = aln_temp.get_non_overlapping_conserved_orfs(min_length=min_length) 966 else: 967 annotation_dict = aln_temp.get_conserved_orfs(min_length=min_length) 968 # filter dict for zoom 969 if aln.zoom is not None: 970 annotation_dict = {key:val for key, val in annotation_dict.items() if max(val['location'][0][0], aln.zoom[0]) <= min(val['location'][0][1], aln.zoom[1])} 971 # add track for plotting 972 _add_track_positions(annotation_dict) 973 # plot 974 _plot_annotation(annotation_dict, ax, direction_marker_size=direction_marker_size, color=cmap) 975 976 # legend 977 if show_cbar: 978 cbar = plt.colorbar(cmap,ax=ax, location= 'top', orientation='horizontal', anchor=(1,0), shrink=0.2, pad=2/ax.bbox.height, fraction=cbar_fraction) 979 cbar.set_label('% identity') 980 cbar.set_ticks([0, 100]) 981 982 # format axis 983 _format_x_axis(aln, ax, show_x_label, show_left=False) 984 ax.set_ylim(bottom=0.8) 985 ax.set_yticks([]) 986 ax.set_yticklabels([]) 987 ax.set_title('conserved orfs', loc='left') 988 989 return ax 990 991 992def annotation_plot(aln: explore.MSA | str, annotation: explore.Annotation | str, feature_to_plot: str, ax: plt.Axes | None = None, 993 color: str = 'wheat', direction_marker_size: int | None = 5, show_x_label: bool = False) -> plt.Axes: 994 """ 995 Plot annotations from bed, gff or gb files. Are automatically mapped to alignment. 996 :param aln: alignment MSA class 997 :param annotation: annotation class | path to annotation file 998 :param ax: matplotlib axes 999 :param feature_to_plot: potential feature to plot (not for bed files as it is parsed as one feature) 1000 :param color: color for the annotation 1001 :param direction_marker_size: marker size for direction marker, only relevant if show_direction is True 1002 :param show_x_label: whether to show the x-axis label 1003 :return matplotlib axes 1004 """ 1005 # validate input 1006 aln, ax, annotation = _validate_input_parameters(aln, ax, annotation) 1007 _validate_color(color) 1008 1009 # ignore features to plot for bed files (here it is written into one feature) 1010 if annotation.ann_type == 'bed': 1011 annotation_dict = annotation.features['region'] 1012 feature_to_plot = 'bed regions' 1013 else: 1014 # try to subset the annotation dict 1015 try: 1016 annotation_dict = annotation.features[feature_to_plot] 1017 except KeyError: 1018 raise KeyError(f'Feature {feature_to_plot} not found. Use annotation.features.keys() to see available features.') 1019 1020 # plotting and formating 1021 _add_track_positions(annotation_dict) 1022 _plot_annotation(annotation_dict, ax, direction_marker_size=direction_marker_size, color=color) 1023 _format_x_axis(aln, ax, show_x_label, show_left=False) 1024 ax.set_ylim(bottom=0.8) 1025 ax.set_yticks([]) 1026 ax.set_yticklabels([]) 1027 ax.set_title(f'{annotation.locus} ({feature_to_plot})', loc='left') 1028 1029 return ax 1030 1031 1032def sequence_logo(aln:explore.MSA | str, ax:plt.Axes | None = None, color_scheme: str = 'standard', plot_type: str = 'stacked', 1033 show_x_label:bool = False) -> plt.Axes: 1034 """ 1035 Plot sequence logo or stacked area plot (use the first one with appropriate zoom levels). The 1036 logo visualizes the relative frequency of nt or aa characters in the alignment. The char frequency 1037 is scaled to the information content at each position. --> identical to how Geneious calculates it. 1038 1039 :param aln: alignment MSA class or path 1040 :param ax: matplotlib axes 1041 :param color_scheme: color scheme for characters. see config.CHAR_COLORS for available options 1042 :param plot_type: 'logo' for sequence logo, 'stacked' for stacked area plot 1043 :param show_x_label: whether to show the x-axis label 1044 :return matplotlib axes 1045 """ 1046 1047 aln, ax = _validate_input_parameters(aln, ax) 1048 # calc matrix 1049 matrix = aln.calc_position_matrix('IC') * aln.calc_position_matrix('PPM') 1050 letters_to_plot = list(config.CHAR_COLORS[aln.aln_type]['standard'].keys()) 1051 1052 # plot 1053 if plot_type == 'logo': 1054 for pos in range(matrix.shape[1]): 1055 # sort the positive matrix row values by size 1056 items = [(letters_to_plot[i], matrix[i, pos]) for i in range(len(letters_to_plot)) if matrix[i, pos] > 0] 1057 items.sort(key=lambda x: x[1]) 1058 # plot each position 1059 y_offset = 0 1060 for letter, h in items: 1061 tp = TextPath((aln.zoom[0] - 0.325 if aln.zoom is not None else - 0.325, 0), letter, size=1, prop=FontProperties(weight='bold')) 1062 bb = tp.get_extents() 1063 glyph_height = bb.height if bb.height > 0 else 1e-6 # avoid div by zero 1064 scale_to_1 = 1.0 / glyph_height 1065 1066 transform = (Affine2D() 1067 .scale(1.0, h * scale_to_1) # scale manually by IC and normalize font 1068 .translate(pos, y_offset) 1069 + ax.transData) 1070 1071 patch = PathPatch(tp, transform=transform, 1072 facecolor=config.CHAR_COLORS[aln.aln_type][color_scheme][letter], 1073 edgecolor='none') 1074 ax.add_patch(patch) 1075 y_offset += h 1076 elif plot_type == 'stacked': 1077 y_values = np.zeros(matrix.shape[1]) 1078 x_values = np.arange(0, matrix.shape[1]) if aln.zoom is None else np.arange(aln.zoom[0], aln.zoom[1]) 1079 for row in range(matrix.shape[0]): 1080 y = matrix[row] 1081 ax.fill_between(x_values, 1082 y_values, 1083 y_values + y, 1084 fc=config.CHAR_COLORS[aln.aln_type][color_scheme].get(letters_to_plot[row]), 1085 ec='None', 1086 label=letters_to_plot[row], 1087 step='mid') 1088 1089 # adjust limits & labels 1090 _format_x_axis(aln, ax, show_x_label, show_left=True) 1091 if aln.aln_type == 'AA': 1092 ax.set_ylim(bottom=0, top=5) 1093 else: 1094 ax.set_ylim(bottom=0, top=2) 1095 ax.spines['top'].set_visible(False) 1096 ax.spines['right'].set_visible(False) 1097 ax.set_ylabel('bits') 1098 1099 return ax 1100 1101 1102def consensus_plot(aln: explore.MSA | str, ax: plt.Axes | None = None, threshold: float | None = None, use_ambig_nt: bool = False, 1103 color_scheme: str | None = 'standard', mask_color: str = 'dimgrey', ambiguity_color: str = 'black', basic_color: str = 'lightgrey', 1104 show_x_label: bool = False, show_name: bool = True, show_sequence: bool = False) -> plt.Axes: 1105 """ 1106 Plot a consensus sequence as a single-row colored sequence. 1107 1108 :param aln: alignment MSA class or path 1109 :param ax: matplotlib axes 1110 :param threshold: consensus threshold (0-1) or None for majority rule 1111 :param use_ambig_nt: whether to use ambiguous nucleotide codes when building consensus 1112 :param color_scheme: color scheme for characters. see config.CHAR_COLORS for available options or None 1113 :param mask_color: color used for mask characters (N/X) 1114 :param ambiguity_color: color used for ambiguous characters 1115 :param show_x_label: whether to show the x-axis label 1116 :param show_name: whether to show the 'consensus' label at y-axis 1117 :param show_sequence: whether to show the sequence at the y-axis 1118 1119 :return: matplotlib axes 1120 """ 1121 1122 # validate input 1123 aln, ax = _validate_input_parameters(aln, ax) 1124 # validate colors 1125 for c in [mask_color, ambiguity_color]: 1126 _validate_color(c) 1127 # validate color scheme 1128 _validate_color_scheme(scheme=color_scheme, aln=aln) 1129 1130 # get consensus 1131 consensus = aln.get_consensus(threshold=threshold, use_ambig_nt=use_ambig_nt) 1132 1133 # prepare color mapping 1134 if color_scheme is not None: 1135 # get mapping from config 1136 char_colors = config.CHAR_COLORS[aln.aln_type][color_scheme] 1137 else: 1138 char_colors = None 1139 1140 # Build polygons and colors for a single PolyCollection 1141 polygons = [] 1142 poly_colors = [] 1143 text_positions = [] 1144 text_chars = [] 1145 text_colors = [] 1146 1147 zoom_offset = aln.zoom[0] if aln.zoom is not None else 0 1148 1149 y_position = len(aln.alignment) - 0.4 1150 1151 for pos, char in enumerate(consensus): 1152 x = pos + zoom_offset 1153 # determine color 1154 if char_colors is not None and char in char_colors: 1155 color = char_colors[char] 1156 else: 1157 # ambiguous nucleotide/aminoacid codes 1158 if char in config.AMBIG_CHARS[aln.aln_type]: 1159 color = ambiguity_color 1160 elif char in ['N', 'X']: 1161 color = mask_color 1162 else: 1163 color = basic_color 1164 1165 rect = [ 1166 (x - 0.5, y_position), 1167 (x + 0.5, y_position), 1168 (x + 0.5, y_position+0.8), 1169 (x - 0.5, y_position+0.8), 1170 ] 1171 polygons.append(rect) 1172 poly_colors.append(color) 1173 1174 if show_sequence: 1175 text_positions.append(x) 1176 text_chars.append(char) 1177 # determine readable text color 1178 text_colors.append(_get_contrast_text_color(to_rgba(color))) 1179 1180 # add single PolyCollection 1181 if polygons: 1182 pc = PolyCollection(polygons, facecolors=poly_colors, edgecolors=poly_colors, linewidths=0.5) 1183 ax.add_collection(pc) 1184 1185 # add texts if requested 1186 if show_sequence: 1187 for x, ch, tc in zip(text_positions, text_chars, text_colors): 1188 ax.text(x, y_position+0.4, ch, ha='center', va='center_baseline', c=tc) 1189 1190 # format axis 1191 _format_x_axis(aln=aln, ax=ax, show_x_label=show_x_label, show_left=False) 1192 ax.set_ylim(y_position-0.1, y_position + 0.9) 1193 if show_name: 1194 ax.yaxis.set_ticks_position('none') 1195 ax.set_yticks([y_position + 0.4]) 1196 ax.set_yticklabels(['consensus']) 1197 else: 1198 ax.set_yticks([]) 1199 1200 return ax
412def alignment(aln: explore.MSA | str, ax: plt.Axes | None = None, show_sequence_all: bool = False, show_seq_names: bool = False, 413 custom_seq_names: tuple | list = (), mask_color: str = 'dimgrey', ambiguity_color: str = 'black', basic_color: str = 'lightgrey', 414 show_mask:bool = True, fancy_gaps:bool = False, show_ambiguities: bool = False, color_scheme: str | None = 'standard', 415 show_x_label: bool = True, show_legend: bool = False, bbox_to_anchor: tuple[float|int, float|int] | list= (1, 1), 416 show_consensus: bool = False) -> plt.Axes: 417 """ 418 419 Plot an alignment with each character colored as defined in the scheme. This is computationally more intensive as 420 the identity alignments and similarity alignment function as each square for each character is individually plotted. 421 422 :param aln: alignment MSA class or path 423 :param ax: matplotlib axes 424 :param show_seq_names: whether to show seq names 425 :param show_sequence_all: whether to show all sequences - zoom in to avoid plotting issues 426 :param custom_seq_names: custom seq names 427 :param mask_color: color for masked nucleotides/aminoacids 428 :param ambiguity_color: color for ambiguous nucleotides/aminoacids 429 :param basic_color: color that will be used for all normal chars if the colorscheme is None 430 :param show_mask: whether to show N or X chars otherwise it will be shown as match or mismatch 431 :param fancy_gaps: show gaps with a small black bar 432 :param show_ambiguities: whether to show non-N ambiguities -> only relevant for RNA/DNA sequences 433 :param color_scheme: color mismatching chars with their unique color. Options for DNA/RNA are: standard, purine_pyrimidine, strong_weak; and for AS: standard, clustal, zappo, hydrophobicity. Will overwrite different_char_color. 434 :param show_x_label: whether to show x label 435 :param show_legend: whether to show the legend 436 :param bbox_to_anchor: bounding box coordinates for the legend - see: https://matplotlib.org/stable/api/legend_api.html 437 :param show_consensus: whether to show the majority consensus sequence (standard-color scheme) 438 :return matplotlib axis 439 """ 440 # Validate aln, ax inputs 441 aln, ax = _validate_input_parameters(aln=aln, ax=ax) 442 # Validate colors 443 for c in [mask_color, ambiguity_color, basic_color]: 444 _validate_color(c) 445 # Validate color scheme 446 _validate_color_scheme(scheme=color_scheme, aln=aln) 447 # create color mapping 448 aln_colors = _create_color_dictionary( 449 aln=aln, color_scheme=color_scheme, identical_char_color=basic_color, 450 different_char_color=None, mask_color=mask_color, ambiguity_color=ambiguity_color 451 ) 452 # Compute identity alignment 453 numerical_alignment = aln.calc_numerical_alignment(encode_ambiguities=show_ambiguities, encode_mask=show_mask) 454 if color_scheme is None: 455 numerical_alignment[np.where(numerical_alignment > 0)] = 0 456 # create the alignment 457 detected_identity_values = _create_alignment( 458 aln=aln, ax=ax, matrix=numerical_alignment, fancy_gaps=fancy_gaps, create_identity_patch=True if color_scheme is None else False, show_gaps=True, 459 show_different_sequence=False, show_sequence_all=show_sequence_all, reference_color=None, aln_colors=aln_colors, 460 values_to_plot = list(aln_colors.keys()) 461 ) 462 # custom legend 463 if show_legend: 464 aln_colors.pop(0) # otherwise legend is wrong (here there is no check if a position is identical or not) 465 _create_legend( 466 color_scheme=color_scheme, aln_colors=aln_colors, aln=aln, 467 detected_identity_values=detected_identity_values, 468 ax=ax, bbox_to_anchor=bbox_to_anchor 469 ) 470 if show_consensus: 471 consensus_plot(aln=aln, ax=ax, show_x_label=show_x_label, show_name=False, show_sequence=show_sequence_all, 472 color_scheme='standard', basic_color=basic_color, mask_color=mask_color, ambiguity_color=ambiguity_color 473 ) 474 ax.set_ylim(-0.5, len(aln.alignment) + 1) 475 else: 476 ax.set_ylim(-0.5, len(aln.alignment)) 477 478 _seq_names(aln=aln, ax=ax, custom_seq_names=custom_seq_names, show_seq_names=show_seq_names, include_consensus=show_consensus) 479 # configure axis 480 _format_x_axis(aln=aln, ax=ax, show_x_label=show_x_label, show_left=False) 481 482 return ax
Plot an alignment with each character colored as defined in the scheme. This is computationally more intensive as the identity alignments and similarity alignment function as each square for each character is individually plotted.
Parameters
- aln: alignment MSA class or path
- ax: matplotlib axes
- show_seq_names: whether to show seq names
- show_sequence_all: whether to show all sequences - zoom in to avoid plotting issues
- custom_seq_names: custom seq names
- mask_color: color for masked nucleotides/aminoacids
- ambiguity_color: color for ambiguous nucleotides/aminoacids
- basic_color: color that will be used for all normal chars if the colorscheme is None
- show_mask: whether to show N or X chars otherwise it will be shown as match or mismatch
- fancy_gaps: show gaps with a small black bar
- show_ambiguities: whether to show non-N ambiguities -> only relevant for RNA/DNA sequences
- color_scheme: color mismatching chars with their unique color. Options for DNA/RNA are: standard, purine_pyrimidine, strong_weak; and for AS: standard, clustal, zappo, hydrophobicity. Will overwrite different_char_color.
- show_x_label: whether to show x label
- show_legend: whether to show the legend
- bbox_to_anchor: bounding box coordinates for the legend - see: https: //matplotlib.org/stable/api/legend_api.html
- show_consensus: whether to show the majority consensus sequence (standard-color scheme) :return matplotlib axis
486def identity_alignment(aln: explore.MSA | str, ax: plt.Axes | None = None, show_title: bool = True, show_identity_sequence: bool = False, 487 show_sequence_all: bool = False, show_seq_names: bool = False, custom_seq_names: tuple | list = (), 488 reference_color: str = 'lightsteelblue', basic_color: str = 'lightgrey', different_char_color: str = 'peru', 489 mask_color: str = 'dimgrey', ambiguity_color: str = 'black', show_mask:bool = True, show_gaps:bool = True, 490 fancy_gaps:bool = False, show_mismatches: bool = True, show_ambiguities: bool = False, 491 color_scheme: str | None = None, show_x_label: bool = True, show_legend: bool = False, 492 bbox_to_anchor: tuple[float|int, float|int] | list = (1, 1), show_consensus: bool = False) -> plt.Axes: 493 """ 494 Generates an identity alignment plot. 495 :param aln: alignment MSA class or path 496 :param ax: matplotlib axes 497 :param show_title: whether to show title 498 :param show_seq_names: whether to show seq names 499 :param show_identity_sequence: whether to show sequence for only differences and reference - zoom in to avoid plotting issues 500 :param show_sequence_all: whether to show all sequences - zoom in to avoid plotting issues 501 :param custom_seq_names: custom seq names 502 :param reference_color: color of reference sequence 503 :param basic_color: color for identical nucleotides/aminoacids 504 :param different_char_color: color for different nucleotides/aminoacids 505 :param mask_color: color for masked nucleotides/aminoacids 506 :param ambiguity_color: color for ambiguous nucleotides/aminoacids 507 :param show_mask: whether to show N or X chars otherwise it will be shown as match or mismatch 508 :param show_gaps: whether to show gaps otherwise it will be shown as match or mismatch 509 :param fancy_gaps: show gaps with a small black bar 510 :param show_mismatches: whether to show mismatches otherwise it will be shown as match 511 :param show_ambiguities: whether to show non-N ambiguities -> only relevant for RNA/DNA sequences 512 :param color_scheme: color mismatching chars with their unique color. Options for DNA/RNA are: standard, purine_pyrimidine, strong_weak; and for AS: standard, clustal, zappo, hydrophobicity. Will overwrite different_char_color. 513 :param show_x_label: whether to show x label 514 :param show_legend: whether to show the legend 515 :param bbox_to_anchor: bounding box coordinates for the legend - see: https://matplotlib.org/stable/api/legend_api.html 516 :param show_consensus: whether to show the majority consensus sequence (standard-color scheme) 517 518 :return matplotlib axis 519 """ 520 521 # Both options for gaps work hand in hand 522 if fancy_gaps: 523 show_gaps = True 524 # Validate aln, ax inputs 525 aln, ax = _validate_input_parameters(aln=aln, ax=ax) 526 # Validate colors 527 for c in [reference_color, basic_color, different_char_color, mask_color, ambiguity_color]: 528 _validate_color(c) 529 # Validate color scheme 530 _validate_color_scheme(scheme=color_scheme, aln=aln) 531 # create color mapping 532 aln_colors = _create_color_dictionary( 533 aln=aln, color_scheme=color_scheme, identical_char_color=basic_color, different_char_color=different_char_color, 534 mask_color=mask_color, ambiguity_color=ambiguity_color 535 ) 536 # Compute identity alignment 537 identity_aln = aln.calc_identity_alignment( 538 encode_mask=show_mask, encode_gaps=show_gaps, encode_mismatches=show_mismatches,encode_ambiguities=show_ambiguities, 539 encode_each_mismatch_char=True if color_scheme is not None else False 540 ) 541 # create the alignment 542 detected_identity_values = _create_alignment( 543 aln=aln, ax=ax, matrix=identity_aln, fancy_gaps=fancy_gaps, create_identity_patch=True, show_gaps=show_gaps, 544 show_different_sequence=show_identity_sequence, show_sequence_all=show_sequence_all, reference_color=reference_color, 545 aln_colors=aln_colors, values_to_plot=list(aln_colors.keys())[1:] 546 ) 547 # custom legend 548 if show_legend: 549 _create_legend( 550 color_scheme=color_scheme, aln_colors=aln_colors, aln=aln, detected_identity_values=detected_identity_values, 551 ax=ax, bbox_to_anchor=bbox_to_anchor 552 ) 553 _seq_names(aln=aln, ax=ax, custom_seq_names=custom_seq_names, show_seq_names=show_seq_names) 554 # configure axis 555 if show_consensus: 556 consensus_plot(aln=aln, ax=ax, show_x_label=show_x_label, show_name=False, show_sequence=any([show_sequence_all, show_identity_sequence]), 557 color_scheme='standard', basic_color=basic_color, mask_color=mask_color, 558 ambiguity_color=ambiguity_color 559 ) 560 ax.set_ylim(-0.5, len(aln.alignment) + 1) 561 else: 562 ax.set_ylim(-0.5, len(aln.alignment)) 563 564 _seq_names(aln=aln, ax=ax, custom_seq_names=custom_seq_names, show_seq_names=show_seq_names, 565 include_consensus=show_consensus) 566 if show_title: 567 ax.set_title('identity', loc='left') 568 _format_x_axis(aln=aln, ax=ax, show_x_label=show_x_label, show_left=False) 569 570 return ax
Generates an identity alignment plot.
Parameters
- aln: alignment MSA class or path
- ax: matplotlib axes
- show_title: whether to show title
- show_seq_names: whether to show seq names
- show_identity_sequence: whether to show sequence for only differences and reference - zoom in to avoid plotting issues
- show_sequence_all: whether to show all sequences - zoom in to avoid plotting issues
- custom_seq_names: custom seq names
- reference_color: color of reference sequence
- basic_color: color for identical nucleotides/aminoacids
- different_char_color: color for different nucleotides/aminoacids
- mask_color: color for masked nucleotides/aminoacids
- ambiguity_color: color for ambiguous nucleotides/aminoacids
- show_mask: whether to show N or X chars otherwise it will be shown as match or mismatch
- show_gaps: whether to show gaps otherwise it will be shown as match or mismatch
- fancy_gaps: show gaps with a small black bar
- show_mismatches: whether to show mismatches otherwise it will be shown as match
- show_ambiguities: whether to show non-N ambiguities -> only relevant for RNA/DNA sequences
- color_scheme: color mismatching chars with their unique color. Options for DNA/RNA are: standard, purine_pyrimidine, strong_weak; and for AS: standard, clustal, zappo, hydrophobicity. Will overwrite different_char_color.
- show_x_label: whether to show x label
- show_legend: whether to show the legend
- bbox_to_anchor: bounding box coordinates for the legend - see: https: //matplotlib.org/stable/api/legend_api.html
- show_consensus: whether to show the majority consensus sequence (standard-color scheme)
:return matplotlib axis
573def similarity_alignment(aln: explore.MSA | str, ax: plt.Axes | None = None, matrix_type: str | None = None, show_title: bool = True, 574 show_similarity_sequence: bool = False, show_sequence_all: bool = False, show_seq_names: bool = False, 575 custom_seq_names: tuple | list = (), reference_color: str = 'lightsteelblue', different_char_color: str = 'peru', basic_color: str = 'lightgrey', 576 show_gaps:bool = True, fancy_gaps:bool = False, show_x_label: bool = True, show_cbar: bool = False, 577 cbar_fraction: float = 0.1, show_consensus: bool = False) -> plt.Axes: 578 """ 579 Generates a similarity alignment plot. Importantly the similarity values are normalized! 580 :param aln: alignment MSA class or path 581 :param ax: matplotlib axes 582 :param matrix_type: substitution matrix - see config.SUBS_MATRICES, standard: NT - TRANS, AA - BLOSUM65 583 :param show_title: whether to show title 584 :param show_similarity_sequence: whether to show sequence only for differences and reference - zoom in to avoid plotting issues 585 :param show_sequence_all: whether to show all sequences - zoom in to avoid plotting issues 586 :param show_seq_names: whether to show seq names 587 :param custom_seq_names: custom seq names 588 :param reference_color: color of reference sequence 589 :param different_char_color: color for the lowest similarity 590 :param basic_color: basic color for the highest similarity 591 :param show_gaps: whether to show gaps otherwise it will be ignored 592 :param fancy_gaps: show gaps with a small black bar 593 :param show_x_label: whether to show x label 594 :param show_cbar: whether to show the legend - see https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.colorbar.html 595 :param cbar_fraction: fraction of the original ax reserved for the legend 596 :param show_consensus: whether to show the majority consensus sequence (standard color scheme, no specical handling for special characters) 597 :return matplotlib axis 598 """ 599 # Both options for gaps work hand in hand 600 if fancy_gaps: 601 show_gaps = True 602 # Validate aln, ax inputs 603 aln, ax = _validate_input_parameters(aln=aln, ax=ax) 604 # Validate colors 605 for c in [reference_color, different_char_color, basic_color]: 606 _validate_color(c) 607 # Compute similarity alignment 608 similarity_aln = aln.calc_similarity_alignment(matrix_type=matrix_type) # use normalized values here 609 similarity_aln = similarity_aln.round(2) # round data for good color mapping 610 # create cmap 611 cmap = LinearSegmentedColormap.from_list( 612 "extended", 613 [ 614 (0.0, different_char_color), 615 (1.0, basic_color) 616 ], 617 ) 618 cmap = ScalarMappable(norm=Normalize(vmin=0, vmax=1), cmap=cmap) 619 # create similarity values 620 similarity_values = np.arange(start=0, stop=1, step=0.01) 621 # round it to be absolutely sure that values match with rounded sim alignment 622 similarity_values = list(similarity_values.round(2)) 623 # create the alignment 624 _create_alignment( 625 aln=aln, ax=ax, matrix=similarity_aln, fancy_gaps=fancy_gaps, create_identity_patch=True, show_gaps=show_gaps, 626 show_different_sequence=show_similarity_sequence, show_sequence_all=show_sequence_all, reference_color=reference_color, 627 aln_colors=cmap, values_to_plot=similarity_values, identical_value=1 628 ) 629 # legend 630 if show_cbar: 631 cbar = plt.colorbar(cmap, ax=ax, location= 'top', anchor=(1,0), shrink=0.2, pad=2/ax.bbox.height, fraction=cbar_fraction) 632 cbar.set_ticks([0, 1]) 633 cbar.set_ticklabels(['low', 'high']) 634 635 # format seq names 636 _seq_names(aln, ax, custom_seq_names, show_seq_names) 637 638 # configure axis 639 if show_consensus: 640 consensus_plot(aln=aln, ax=ax, show_x_label=show_x_label, show_name=False, show_sequence=any([show_sequence_all, show_similarity_sequence]), 641 color_scheme='standard', basic_color=basic_color) 642 ax.set_ylim(-0.5, len(aln.alignment) + 1) 643 else: 644 ax.set_ylim(-0.5, len(aln.alignment)) 645 646 _seq_names(aln=aln, ax=ax, custom_seq_names=custom_seq_names, show_seq_names=show_seq_names, 647 include_consensus=show_consensus) 648 if show_title: 649 ax.set_title('similarity', loc='left') 650 _format_x_axis(aln, ax, show_x_label, show_left=False) 651 652 return ax
Generates a similarity alignment plot. Importantly the similarity values are normalized!
Parameters
- aln: alignment MSA class or path
- ax: matplotlib axes
- matrix_type: substitution matrix - see config.SUBS_MATRICES, standard: NT - TRANS, AA - BLOSUM65
- show_title: whether to show title
- show_similarity_sequence: whether to show sequence only for differences and reference - zoom in to avoid plotting issues
- show_sequence_all: whether to show all sequences - zoom in to avoid plotting issues
- show_seq_names: whether to show seq names
- custom_seq_names: custom seq names
- reference_color: color of reference sequence
- different_char_color: color for the lowest similarity
- basic_color: basic color for the highest similarity
- show_gaps: whether to show gaps otherwise it will be ignored
- fancy_gaps: show gaps with a small black bar
- show_x_label: whether to show x label
- show_cbar: whether to show the legend - see https: //matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.colorbar.html
- cbar_fraction: fraction of the original ax reserved for the legend
- show_consensus: whether to show the majority consensus sequence (standard color scheme, no specical handling for special characters) :return matplotlib axis
680def stat_plot(aln: explore.MSA | str, stat_type: str, ax: plt.Axes | None = None, line_color: str = 'burlywood', 681 line_width: int | float = 2, rolling_average: int = 20, show_x_label: bool = False, show_title: bool = True) -> plt.Axes: 682 """ 683 Generate a plot for the various alignment stats. 684 :param aln: alignment MSA class or path 685 :param ax: matplotlib axes 686 :param stat_type: 'entropy', 'gc', 'coverage', 'ts tv score', 'identity' or 'similarity' -> (here default matrices are used NT - TRANS, AA - BLOSUM65) 687 :param line_color: color of the line 688 :param line_width: width of the line 689 :param rolling_average: average rolling window size left and right of a position in nucleotides or amino acids 690 :param show_x_label: whether to show the x-axis label 691 :param show_title: whether to show the title 692 :return matplotlib axes 693 """ 694 695 # input check 696 aln, ax = _validate_input_parameters(aln, ax) 697 698 # define possible functions to calc here 699 stat_functions: Dict[str, Callable[[], list | ndarray]] = { 700 'gc': aln.calc_gc, 701 'entropy': aln.calc_entropy, 702 'coverage': aln.calc_coverage, 703 'identity': aln.calc_identity_alignment, 704 'similarity': aln.calc_similarity_alignment, 705 'ts tv score': aln.calc_transition_transversion_score 706 } 707 708 if stat_type not in stat_functions: 709 raise ValueError('stat_type must be one of {}'.format(list(stat_functions.keys()))) 710 711 _validate_color(line_color) 712 if rolling_average < 1 or rolling_average > aln.length: 713 raise ValueError('rolling_average must be between 1 and length of sequence') 714 715 # generate input data 716 array = stat_functions[stat_type]() 717 718 if stat_type == 'identity': 719 min_value, max_value = -1, 0 720 elif stat_type == 'ts tv score': 721 min_value, max_value = -1, 1 722 else: 723 min_value, max_value = 0, 1 724 if stat_type in ['identity', 'similarity']: 725 # for the mean nan values get handled as the lowest possible number in the matrix 726 array = np.nan_to_num(array, True, min_value) 727 array = np.mean(array, axis=0) 728 data, plot_idx = _moving_average(array, rolling_average, aln.zoom, aln.length) 729 730 # plot the data 731 ax.fill_between( 732 # this add dummy data left and right for better plotting 733 # otherwise only half of the step is shown 734 np.concatenate(([plot_idx[0] - 0.5], plot_idx, [plot_idx[-1] + 0.5])) if rolling_average == 1 else plot_idx, 735 np.concatenate(([data[0]], data, [data[-1]])) if rolling_average == 1 else data, 736 min_value, 737 linewidth = line_width, 738 edgecolor=line_color, 739 step='mid' if rolling_average == 1 else None, 740 facecolor=(line_color, 0.6) if stat_type not in ['ts tv score', 'gc'] else 'none' 741 ) 742 if stat_type == 'gc': 743 ax.hlines(0.5, xmin=0, xmax=aln.zoom[0] + aln.length if aln.zoom is not None else aln.length, color='black', linestyles='--', linewidth=1) 744 745 # format axis 746 ax.set_ylim(min_value, max_value*0.1+max_value) 747 ax.set_yticks([min_value, max_value]) 748 if stat_type == 'gc': 749 ax.set_yticklabels(['0', '100']) 750 elif stat_type == 'ts tv score': 751 ax.set_yticklabels(['tv', 'ts']) 752 else: 753 ax.set_yticklabels(['low', 'high']) 754 755 # show title 756 if show_title: 757 ax.set_title( 758 f'{stat_type} (average over {rolling_average} positions)' if rolling_average > 1 else f'{stat_type} for each position', 759 loc='left' 760 ) 761 762 _format_x_axis(aln, ax, show_x_label, show_left=True) 763 764 return ax
Generate a plot for the various alignment stats.
Parameters
- aln: alignment MSA class or path
- ax: matplotlib axes
- stat_type: 'entropy', 'gc', 'coverage', 'ts tv score', 'identity' or 'similarity' -> (here default matrices are used NT - TRANS, AA - BLOSUM65)
- line_color: color of the line
- line_width: width of the line
- rolling_average: average rolling window size left and right of a position in nucleotides or amino acids
- show_x_label: whether to show the x-axis label
- show_title: whether to show the title :return matplotlib axes
767def variant_plot(aln: explore.MSA | str, ax: plt.Axes | None = None, lollisize: tuple[int, int] | list = (1, 3), 768 color_scheme: str = 'standard', show_x_label: bool = False, show_legend: bool = True, 769 bbox_to_anchor: tuple[float|int, float|int] | list = (1, 1)) -> plt.Axes: 770 """ 771 Plots variants. 772 :param aln: alignment MSA class or path 773 :param ax: matplotlib axes 774 :param lollisize: (stem_size, head_size) 775 :param color_scheme: color scheme for characters. see config.CHAR_COLORS for available options 776 :param show_x_label: whether to show the x-axis label 777 :param show_legend: whether to show the legend 778 :param bbox_to_anchor: bounding box coordinates for the legend - see: https://matplotlib.org/stable/api/legend_api.html 779 :return matplotlib axes 780 """ 781 782 # validate input 783 aln, ax = _validate_input_parameters(aln, ax) 784 _validate_color_scheme(color_scheme, aln) 785 if not isinstance(lollisize, tuple) or len(lollisize) != 2: 786 raise ValueError('lollisize must be tuple of length 2 (stem, head)') 787 for _size in lollisize: 788 if not isinstance(_size, float | int) or _size <= 0: 789 raise ValueError('lollisize must be floats greater than zero') 790 791 # define colors 792 colors = config.CHAR_COLORS[aln.aln_type][color_scheme] 793 # get snps 794 snps = aln.get_snps() 795 # define where to plot (each ref type gets a separate line) 796 ref_y_positions, y_pos, detected_var = {}, 0, set() 797 798 # iterate over snp dict 799 for pos in snps['POS']: 800 for identifier in snps['POS'][pos]: 801 # fill in y pos dict 802 if identifier == 'ref': 803 if snps['POS'][pos]['ref'] not in ref_y_positions: 804 ref_y_positions[snps['POS'][pos]['ref']] = y_pos 805 y_pos += 1.1 806 continue 807 # plot 808 if identifier == 'ALT': 809 for alt in snps['POS'][pos]['ALT']: 810 ax.vlines(x=pos + aln.zoom[0] if aln.zoom is not None else pos, 811 ymin=ref_y_positions[snps['POS'][pos]['ref']], 812 ymax=ref_y_positions[snps['POS'][pos]['ref']] + snps['POS'][pos]['ALT'][alt]['AF'], 813 color=colors[alt], 814 zorder=100, 815 linewidth=lollisize[0] 816 ) 817 ax.plot(pos + aln.zoom[0] if aln.zoom is not None else pos, 818 ref_y_positions[snps['POS'][pos]['ref']] + snps['POS'][pos]['ALT'][alt]['AF'], 819 color=colors[alt], 820 marker='o', 821 markersize=lollisize[1] 822 ) 823 detected_var.add(alt) 824 825 # plot hlines 826 for y_char in ref_y_positions: 827 ax.hlines( 828 ref_y_positions[y_char], 829 xmin=aln.zoom[0] - 0.5 if aln.zoom is not None else -0.5, 830 xmax=aln.zoom[0] + aln.length + 0.5 if aln.zoom is not None else aln.length + 0.5, 831 color='black', 832 linestyle='-', 833 zorder=0, 834 linewidth=0.75 835 ) 836 # create a custom legend 837 if show_legend: 838 custom_legend = [ 839 ax.add_line( 840 plt.Line2D( 841 [], 842 [], 843 color=colors[char], 844 marker='o', 845 linestyle='', 846 markersize=5 847 ) 848 ) for char in colors if char in detected_var 849 ] 850 ax.legend( 851 custom_legend, 852 [char for char in colors if char in detected_var], # ensures correct sorting 853 loc='lower right', 854 title='variant', 855 bbox_to_anchor=bbox_to_anchor, 856 ncols=len(detected_var)/2 if aln.aln_type == 'AA' else len(detected_var), 857 frameon=False 858 ) 859 860 # format axis 861 _format_x_axis(aln, ax, show_x_label, show_left=False) 862 ax.spines['left'].set_visible(False) 863 ax.set_yticks([ref_y_positions[x] for x in ref_y_positions]) 864 ax.set_yticklabels(ref_y_positions.keys()) 865 ax.set_ylim(0, y_pos) 866 ax.set_ylabel('reference') 867 868 return ax
Plots variants.
Parameters
- aln: alignment MSA class or path
- ax: matplotlib axes
- lollisize: (stem_size, head_size)
- color_scheme: color scheme for characters. see config.CHAR_COLORS for available options
- show_x_label: whether to show the x-axis label
- show_legend: whether to show the legend
- bbox_to_anchor: bounding box coordinates for the legend - see: https: //matplotlib.org/stable/api/legend_api.html :return matplotlib axes
939def orf_plot(aln: explore.MSA | str, ax: plt.Axes | None = None, min_length: int = 500, non_overlapping_orfs: bool = True, 940 cmap: str = 'Blues', direction_marker_size: int | None = 5, show_x_label: bool = False, show_cbar: bool = False, 941 cbar_fraction: float = 0.1) -> plt.Axes: 942 """ 943 Plot conserved ORFs. 944 :param aln: alignment MSA class or path 945 :param ax: matplotlib axes 946 :param min_length: minimum length of orf 947 :param non_overlapping_orfs: whether to consider overlapping orfs 948 :param cmap: color mapping for % identity - see https://matplotlib.org/stable/users/explain/colors/colormaps.html 949 :param direction_marker_size: marker size for direction marker, not shown if marker_size == None 950 :param show_x_label: whether to show the x-axis label 951 :param show_cbar: whether to show the colorbar - see https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.colorbar.html 952 :param cbar_fraction: fraction of the original ax reserved for the colorbar 953 :return matplotlib axes 954 """ 955 956 # normalize colorbar 957 cmap = ScalarMappable(norm=Normalize(0, 100), cmap=plt.get_cmap(cmap)) 958 # validate input 959 aln, ax = _validate_input_parameters(aln, ax) 960 961 # get orfs --> first deepcopy and reset zoom that the orfs are also zoomed in (by default, the orfs are only 962 # searched within the zoomed region) 963 aln_temp = deepcopy(aln) 964 aln_temp.zoom = None 965 if non_overlapping_orfs: 966 annotation_dict = aln_temp.get_non_overlapping_conserved_orfs(min_length=min_length) 967 else: 968 annotation_dict = aln_temp.get_conserved_orfs(min_length=min_length) 969 # filter dict for zoom 970 if aln.zoom is not None: 971 annotation_dict = {key:val for key, val in annotation_dict.items() if max(val['location'][0][0], aln.zoom[0]) <= min(val['location'][0][1], aln.zoom[1])} 972 # add track for plotting 973 _add_track_positions(annotation_dict) 974 # plot 975 _plot_annotation(annotation_dict, ax, direction_marker_size=direction_marker_size, color=cmap) 976 977 # legend 978 if show_cbar: 979 cbar = plt.colorbar(cmap,ax=ax, location= 'top', orientation='horizontal', anchor=(1,0), shrink=0.2, pad=2/ax.bbox.height, fraction=cbar_fraction) 980 cbar.set_label('% identity') 981 cbar.set_ticks([0, 100]) 982 983 # format axis 984 _format_x_axis(aln, ax, show_x_label, show_left=False) 985 ax.set_ylim(bottom=0.8) 986 ax.set_yticks([]) 987 ax.set_yticklabels([]) 988 ax.set_title('conserved orfs', loc='left') 989 990 return ax
Plot conserved ORFs.
Parameters
- aln: alignment MSA class or path
- ax: matplotlib axes
- min_length: minimum length of orf
- non_overlapping_orfs: whether to consider overlapping orfs
- cmap: color mapping for % identity - see https: //matplotlib.org/stable/users/explain/colors/colormaps.html
- direction_marker_size: marker size for direction marker, not shown if marker_size == None
- show_x_label: whether to show the x-axis label
- show_cbar: whether to show the colorbar - see https: //matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.colorbar.html
- cbar_fraction: fraction of the original ax reserved for the colorbar :return matplotlib axes
993def annotation_plot(aln: explore.MSA | str, annotation: explore.Annotation | str, feature_to_plot: str, ax: plt.Axes | None = None, 994 color: str = 'wheat', direction_marker_size: int | None = 5, show_x_label: bool = False) -> plt.Axes: 995 """ 996 Plot annotations from bed, gff or gb files. Are automatically mapped to alignment. 997 :param aln: alignment MSA class 998 :param annotation: annotation class | path to annotation file 999 :param ax: matplotlib axes 1000 :param feature_to_plot: potential feature to plot (not for bed files as it is parsed as one feature) 1001 :param color: color for the annotation 1002 :param direction_marker_size: marker size for direction marker, only relevant if show_direction is True 1003 :param show_x_label: whether to show the x-axis label 1004 :return matplotlib axes 1005 """ 1006 # validate input 1007 aln, ax, annotation = _validate_input_parameters(aln, ax, annotation) 1008 _validate_color(color) 1009 1010 # ignore features to plot for bed files (here it is written into one feature) 1011 if annotation.ann_type == 'bed': 1012 annotation_dict = annotation.features['region'] 1013 feature_to_plot = 'bed regions' 1014 else: 1015 # try to subset the annotation dict 1016 try: 1017 annotation_dict = annotation.features[feature_to_plot] 1018 except KeyError: 1019 raise KeyError(f'Feature {feature_to_plot} not found. Use annotation.features.keys() to see available features.') 1020 1021 # plotting and formating 1022 _add_track_positions(annotation_dict) 1023 _plot_annotation(annotation_dict, ax, direction_marker_size=direction_marker_size, color=color) 1024 _format_x_axis(aln, ax, show_x_label, show_left=False) 1025 ax.set_ylim(bottom=0.8) 1026 ax.set_yticks([]) 1027 ax.set_yticklabels([]) 1028 ax.set_title(f'{annotation.locus} ({feature_to_plot})', loc='left') 1029 1030 return ax
Plot annotations from bed, gff or gb files. Are automatically mapped to alignment.
Parameters
- aln: alignment MSA class
- annotation: annotation class | path to annotation file
- ax: matplotlib axes
- feature_to_plot: potential feature to plot (not for bed files as it is parsed as one feature)
- color: color for the annotation
- direction_marker_size: marker size for direction marker, only relevant if show_direction is True
- show_x_label: whether to show the x-axis label :return matplotlib axes
1033def sequence_logo(aln:explore.MSA | str, ax:plt.Axes | None = None, color_scheme: str = 'standard', plot_type: str = 'stacked', 1034 show_x_label:bool = False) -> plt.Axes: 1035 """ 1036 Plot sequence logo or stacked area plot (use the first one with appropriate zoom levels). The 1037 logo visualizes the relative frequency of nt or aa characters in the alignment. The char frequency 1038 is scaled to the information content at each position. --> identical to how Geneious calculates it. 1039 1040 :param aln: alignment MSA class or path 1041 :param ax: matplotlib axes 1042 :param color_scheme: color scheme for characters. see config.CHAR_COLORS for available options 1043 :param plot_type: 'logo' for sequence logo, 'stacked' for stacked area plot 1044 :param show_x_label: whether to show the x-axis label 1045 :return matplotlib axes 1046 """ 1047 1048 aln, ax = _validate_input_parameters(aln, ax) 1049 # calc matrix 1050 matrix = aln.calc_position_matrix('IC') * aln.calc_position_matrix('PPM') 1051 letters_to_plot = list(config.CHAR_COLORS[aln.aln_type]['standard'].keys()) 1052 1053 # plot 1054 if plot_type == 'logo': 1055 for pos in range(matrix.shape[1]): 1056 # sort the positive matrix row values by size 1057 items = [(letters_to_plot[i], matrix[i, pos]) for i in range(len(letters_to_plot)) if matrix[i, pos] > 0] 1058 items.sort(key=lambda x: x[1]) 1059 # plot each position 1060 y_offset = 0 1061 for letter, h in items: 1062 tp = TextPath((aln.zoom[0] - 0.325 if aln.zoom is not None else - 0.325, 0), letter, size=1, prop=FontProperties(weight='bold')) 1063 bb = tp.get_extents() 1064 glyph_height = bb.height if bb.height > 0 else 1e-6 # avoid div by zero 1065 scale_to_1 = 1.0 / glyph_height 1066 1067 transform = (Affine2D() 1068 .scale(1.0, h * scale_to_1) # scale manually by IC and normalize font 1069 .translate(pos, y_offset) 1070 + ax.transData) 1071 1072 patch = PathPatch(tp, transform=transform, 1073 facecolor=config.CHAR_COLORS[aln.aln_type][color_scheme][letter], 1074 edgecolor='none') 1075 ax.add_patch(patch) 1076 y_offset += h 1077 elif plot_type == 'stacked': 1078 y_values = np.zeros(matrix.shape[1]) 1079 x_values = np.arange(0, matrix.shape[1]) if aln.zoom is None else np.arange(aln.zoom[0], aln.zoom[1]) 1080 for row in range(matrix.shape[0]): 1081 y = matrix[row] 1082 ax.fill_between(x_values, 1083 y_values, 1084 y_values + y, 1085 fc=config.CHAR_COLORS[aln.aln_type][color_scheme].get(letters_to_plot[row]), 1086 ec='None', 1087 label=letters_to_plot[row], 1088 step='mid') 1089 1090 # adjust limits & labels 1091 _format_x_axis(aln, ax, show_x_label, show_left=True) 1092 if aln.aln_type == 'AA': 1093 ax.set_ylim(bottom=0, top=5) 1094 else: 1095 ax.set_ylim(bottom=0, top=2) 1096 ax.spines['top'].set_visible(False) 1097 ax.spines['right'].set_visible(False) 1098 ax.set_ylabel('bits') 1099 1100 return ax
Plot sequence logo or stacked area plot (use the first one with appropriate zoom levels). The logo visualizes the relative frequency of nt or aa characters in the alignment. The char frequency is scaled to the information content at each position. --> identical to how Geneious calculates it.
Parameters
- aln: alignment MSA class or path
- ax: matplotlib axes
- color_scheme: color scheme for characters. see config.CHAR_COLORS for available options
- plot_type: 'logo' for sequence logo, 'stacked' for stacked area plot
- show_x_label: whether to show the x-axis label :return matplotlib axes
1103def consensus_plot(aln: explore.MSA | str, ax: plt.Axes | None = None, threshold: float | None = None, use_ambig_nt: bool = False, 1104 color_scheme: str | None = 'standard', mask_color: str = 'dimgrey', ambiguity_color: str = 'black', basic_color: str = 'lightgrey', 1105 show_x_label: bool = False, show_name: bool = True, show_sequence: bool = False) -> plt.Axes: 1106 """ 1107 Plot a consensus sequence as a single-row colored sequence. 1108 1109 :param aln: alignment MSA class or path 1110 :param ax: matplotlib axes 1111 :param threshold: consensus threshold (0-1) or None for majority rule 1112 :param use_ambig_nt: whether to use ambiguous nucleotide codes when building consensus 1113 :param color_scheme: color scheme for characters. see config.CHAR_COLORS for available options or None 1114 :param mask_color: color used for mask characters (N/X) 1115 :param ambiguity_color: color used for ambiguous characters 1116 :param show_x_label: whether to show the x-axis label 1117 :param show_name: whether to show the 'consensus' label at y-axis 1118 :param show_sequence: whether to show the sequence at the y-axis 1119 1120 :return: matplotlib axes 1121 """ 1122 1123 # validate input 1124 aln, ax = _validate_input_parameters(aln, ax) 1125 # validate colors 1126 for c in [mask_color, ambiguity_color]: 1127 _validate_color(c) 1128 # validate color scheme 1129 _validate_color_scheme(scheme=color_scheme, aln=aln) 1130 1131 # get consensus 1132 consensus = aln.get_consensus(threshold=threshold, use_ambig_nt=use_ambig_nt) 1133 1134 # prepare color mapping 1135 if color_scheme is not None: 1136 # get mapping from config 1137 char_colors = config.CHAR_COLORS[aln.aln_type][color_scheme] 1138 else: 1139 char_colors = None 1140 1141 # Build polygons and colors for a single PolyCollection 1142 polygons = [] 1143 poly_colors = [] 1144 text_positions = [] 1145 text_chars = [] 1146 text_colors = [] 1147 1148 zoom_offset = aln.zoom[0] if aln.zoom is not None else 0 1149 1150 y_position = len(aln.alignment) - 0.4 1151 1152 for pos, char in enumerate(consensus): 1153 x = pos + zoom_offset 1154 # determine color 1155 if char_colors is not None and char in char_colors: 1156 color = char_colors[char] 1157 else: 1158 # ambiguous nucleotide/aminoacid codes 1159 if char in config.AMBIG_CHARS[aln.aln_type]: 1160 color = ambiguity_color 1161 elif char in ['N', 'X']: 1162 color = mask_color 1163 else: 1164 color = basic_color 1165 1166 rect = [ 1167 (x - 0.5, y_position), 1168 (x + 0.5, y_position), 1169 (x + 0.5, y_position+0.8), 1170 (x - 0.5, y_position+0.8), 1171 ] 1172 polygons.append(rect) 1173 poly_colors.append(color) 1174 1175 if show_sequence: 1176 text_positions.append(x) 1177 text_chars.append(char) 1178 # determine readable text color 1179 text_colors.append(_get_contrast_text_color(to_rgba(color))) 1180 1181 # add single PolyCollection 1182 if polygons: 1183 pc = PolyCollection(polygons, facecolors=poly_colors, edgecolors=poly_colors, linewidths=0.5) 1184 ax.add_collection(pc) 1185 1186 # add texts if requested 1187 if show_sequence: 1188 for x, ch, tc in zip(text_positions, text_chars, text_colors): 1189 ax.text(x, y_position+0.4, ch, ha='center', va='center_baseline', c=tc) 1190 1191 # format axis 1192 _format_x_axis(aln=aln, ax=ax, show_x_label=show_x_label, show_left=False) 1193 ax.set_ylim(y_position-0.1, y_position + 0.9) 1194 if show_name: 1195 ax.yaxis.set_ticks_position('none') 1196 ax.set_yticks([y_position + 0.4]) 1197 ax.set_yticklabels(['consensus']) 1198 else: 1199 ax.set_yticks([]) 1200 1201 return ax
Plot a consensus sequence as a single-row colored sequence.
Parameters
- aln: alignment MSA class or path
- ax: matplotlib axes
- threshold: consensus threshold (0-1) or None for majority rule
- use_ambig_nt: whether to use ambiguous nucleotide codes when building consensus
- color_scheme: color scheme for characters. see config.CHAR_COLORS for available options or None
- mask_color: color used for mask characters (N/X)
- ambiguity_color: color used for ambiguous characters
- show_x_label: whether to show the x-axis label
- show_name: whether to show the 'consensus' label at y-axis
- show_sequence: whether to show the sequence at the y-axis
Returns
matplotlib axes