msaexplorer.draw
The draw module
The draw module lets you draw alignments and statistic plots such as SNPs, ORFs, entropy and much more. For each plot a
matplotlib axes has to be passed to the plotting function.
Importantly some of the plotting features can only be accessed for nucleotide alignments but not for amino acid alignments. The functions will raise the appropriate exception in such a case.
Functions
1r""" 2# The draw module 3 4The draw module lets you draw alignments and statistic plots such as SNPs, ORFs, entropy and much more. For each plot a 5`matplotlib axes` has to be passed to the plotting function. 6 7Importantly some of the plotting features can only be accessed for nucleotide alignments but not for amino acid alignments. 8The functions will raise the appropriate exception in such a case. 9 10## Functions 11 12""" 13# built-in 14from itertools import chain 15from typing import Callable, Dict 16from copy import deepcopy 17import os 18 19# MSAexplorer 20from msaexplorer import explore, config 21from msaexplorer._data_classes import AlignmentStats 22from msaexplorer._helpers import _validate_color, _get_contrast_text_color 23 24# libs 25import numpy as np 26from numpy import ndarray 27import matplotlib.pyplot as plt 28import matplotlib.patches as patches 29from matplotlib.cm import ScalarMappable 30from matplotlib.colors import Normalize, to_rgba, LinearSegmentedColormap 31from matplotlib.collections import PatchCollection, PolyCollection 32from matplotlib.text import TextPath 33from matplotlib.patches import PathPatch 34from matplotlib.font_manager import FontProperties 35from matplotlib.transforms import Affine2D 36 37 38def _validate_input_parameters(aln: explore.MSA | str, ax: plt.Axes, annotation: explore.Annotation | str | None = None) \ 39 -> tuple[explore.MSA, plt.Axes, explore.Annotation] | tuple[explore.MSA, plt.Axes]: 40 """ 41 Validate MSA class and axis. 42 """ 43 # check if alignment is path 44 if type(aln) is str: 45 if os.path.exists(aln): 46 aln = explore.MSA(aln) 47 else: 48 raise FileNotFoundError(f'File {aln} does not exist') 49 # check if alignment is correct type 50 if not isinstance(aln, explore.MSA): 51 raise ValueError('alignment has to be an MSA class. use explore.MSA() to read in alignment') 52 # check if a axis was created 53 # if ax not provided generate one from scratch 54 if ax is None: 55 ax = plt.gca() 56 elif not isinstance(ax, plt.Axes): 57 raise ValueError('ax has to be an matplotlib axis') 58 # check if annotation is correct type 59 if annotation is not None: 60 if type(annotation) is str: 61 if os.path.exists(annotation): 62 # reset zoom so the annotation is correctly parsed 63 msa_temp = deepcopy(aln) 64 msa_temp.zoom = None 65 annotation = explore.Annotation(msa_temp, annotation) 66 else: 67 raise FileNotFoundError() 68 if not isinstance(annotation, explore.Annotation): 69 raise ValueError('annotation has to be an annotation class. use explore.Annotation() to read in annotation') 70 71 if annotation is None: 72 return aln, ax 73 else: 74 return aln, ax, annotation 75 76 77def _validate_color_scheme(scheme: str | None, aln: explore.MSA): 78 """ 79 validates colorscheme 80 """ 81 if scheme is not None: 82 if scheme not in config.CHAR_COLORS[aln.aln_type].keys(): 83 raise ValueError(f'Scheme not supported. Supported: {config.CHAR_COLORS[aln.aln_type].keys()}') 84 85 86def _create_color_dictionary(aln: explore.MSA, color_scheme: str, identical_char_color: str | None = None, different_char_color: str | None = None, mask_color: str | None = None, ambiguity_color: str | None = None): 87 """ 88 create the colorscheme dictionary -> this functions adds the respective colors to a dictionary. Some have fixed values and colors of defined 89 schemes are always the idx + 1 90 """ 91 # basic color mapping 92 aln_colors = {} 93 94 if identical_char_color is not None: 95 aln_colors[0] = {'type': 'identical', 'color': identical_char_color} 96 if different_char_color is not None: 97 aln_colors[-1] = {'type': 'different', 'color': different_char_color} 98 if mask_color is not None: 99 aln_colors[-2] = {'type': 'mask', 'color': mask_color} 100 if ambiguity_color is not None: 101 aln_colors[-3] = {'type': 'ambiguity', 'color': ambiguity_color} 102 103 # use the standard setting for the index (same as in aln.calc_identity_alignment) 104 # and map the corresponding color scheme to it 105 if color_scheme is not None: 106 for idx, char in enumerate(config.CHAR_COLORS[aln.aln_type]['standard']): 107 aln_colors[idx + 1] = {'type': char, 'color': config.CHAR_COLORS[aln.aln_type][color_scheme][char]} 108 109 return aln_colors 110 111 112def _format_x_axis(aln: explore.MSA, ax: plt.Axes, show_x_label: bool, show_left: bool): 113 """ 114 General x axis formatting. 115 """ 116 ax.set_xlim( 117 (aln.zoom[0] - 0.5, aln.zoom[0] + aln.length - 0.5) if aln.zoom is not None else (-0.5, aln.length - 0.5) 118 ) 119 ax.spines['top'].set_visible(False) 120 ax.spines['right'].set_visible(False) 121 if show_x_label: 122 ax.set_xlabel('alignment position') 123 if not show_left: 124 ax.spines['left'].set_visible(False) 125 126 127def _seq_names(aln: explore.MSA, ax: plt.Axes, custom_seq_names: tuple, show_seq_names: bool, include_consensus: bool = False): 128 """ 129 Validate custom names and set show names to True. Format axis accordingly. 130 """ 131 if custom_seq_names: 132 show_seq_names = True 133 if not isinstance(custom_seq_names, tuple): 134 raise ValueError('configure your custom names list: custom_names=(name1, name2...)') 135 if len(custom_seq_names) != len(aln): 136 raise ValueError('length of sequences not equal to number of custom names') 137 if show_seq_names: 138 ax.yaxis.set_ticks_position('none') 139 ax.set_yticks(np.arange(len(aln))) 140 if custom_seq_names: 141 names = custom_seq_names[::-1] 142 else: 143 names = [x.split(' ')[0] for x in aln.sequence_ids[::-1]] 144 if include_consensus: 145 names = names + ['consensus'] 146 y_ticks = np.arange(len(aln) + 1) 147 else: 148 y_ticks = np.arange(len(aln)) 149 ax.set_yticks(y_ticks) 150 ax.set_yticklabels(names) 151 else: 152 ax.set_yticks([]) 153 154 155def _create_legend(color_scheme: str, aln_colors: dict, aln: explore.MSA, detected_identity_values: set, ax: plt.Axes, bbox_to_anchor: tuple | list): 156 """ 157 create the legend for the alignment and identity alignment plot 158 """ 159 if color_scheme is not None and color_scheme != 'standard': 160 for x in aln_colors: 161 for group in config.CHAR_GROUPS[aln.aln_type][color_scheme]: 162 if aln_colors[x]['type'] in config.CHAR_GROUPS[aln.aln_type][color_scheme][group]: 163 aln_colors[x]['type'] = group 164 break 165 # create it 166 handels, labels, detected_groups = [], [], set() 167 for x in aln_colors: 168 if x in detected_identity_values and aln_colors[x]['type'] not in detected_groups: 169 handels.append( 170 ax.add_line( 171 plt.Line2D( 172 [], 173 [], 174 color=aln_colors[x]['color'] if color_scheme != 'hydrophobicity' or x == 0 else 175 config.CHAR_COLORS[aln.aln_type]['hydrophobicity'][ 176 config.CHAR_GROUPS[aln.aln_type]['hydrophobicity'][aln_colors[x]['type']][0] 177 ], 178 marker='s', 179 markeredgecolor='grey', 180 linestyle='', 181 markersize=10)) 182 ) 183 labels.append(aln_colors[x]['type']) 184 detected_groups.add(aln_colors[x]['type']) 185 186 # ncols 187 if color_scheme is None or aln.aln_type != 'AA': 188 ncols = len(detected_identity_values) 189 elif color_scheme == 'standard': 190 ncols = (len(detected_identity_values) + 1) / 2 191 else: 192 ncols = (len(detected_groups) + 1) / 2 193 194 # plot it 195 ax.legend( 196 handels, 197 labels, 198 loc='lower right', 199 bbox_to_anchor=bbox_to_anchor, 200 ncols=ncols, 201 frameon=False 202 ) 203 204 205def _create_alignment(aln: explore.MSA, ax: plt.Axes, matrix: ndarray, aln_colors: dict | ScalarMappable, fancy_gaps: bool, create_identity_patch: bool, 206 show_gaps: bool, show_different_sequence: bool, show_sequence_all: bool, reference_color: str | None, values_to_plot: list, 207 identical_value: int | float = 0): 208 """ 209 create the polygon patch collection for an alignment 210 """ 211 212 # helper functions 213 def _create_identity_patch(row, aln: explore.MSA, col: list, zoom: tuple[int, int], y_position: float | int, 214 reference_color: str | None, seq_name: str, identity_color: str | ndarray, 215 show_gaps: bool): 216 """ 217 Creates the initial patch and adds it to the collection. 218 """ 219 220 color = reference_color if seq_name == aln.reference_id and reference_color is not None else identity_color 221 222 if show_gaps: 223 # plot a rectangle for parts that do not have gaps 224 for stretch in _find_stretches(row, True): 225 col.append( 226 patches.Rectangle( 227 (stretch[0] + zoom[0] - 0.5, y_position), stretch[1] - stretch[0] + 1, 0.8, 228 facecolor=color, 229 edgecolor=color, 230 linewidth=0.5 231 ) 232 ) 233 # just plot a rectangle 234 else: 235 col.append( 236 patches.Rectangle( 237 (zoom[0] - 0.5, y_position), zoom[1] - zoom[0], 0.8, 238 facecolor=color, 239 edgecolor=color, 240 linewidth=0.5 241 ) 242 ) 243 244 return col 245 246 def _find_stretches(row, non_nan_only=False) -> list[tuple[int, int, int]] | list[tuple[int, int]]: 247 """ 248 Finds consecutive stretches of values in an array, with an option to exclude NaN stretches and return start, end, value at start 249 """ 250 if row.size == 0: 251 return [] 252 253 if non_nan_only: 254 # Create a boolean mask for non-NaN values 255 non_nan_mask = ~np.isnan(row) 256 # Find changes in the mask 257 changes = np.diff(non_nan_mask.astype(int)) != 0 258 change_idx = np.nonzero(changes)[0] 259 starts = np.concatenate(([0], change_idx + 1)) 260 ends = np.concatenate((change_idx, [len(row) - 1])) 261 262 # Return only stretches that start with non-NaN values 263 return [(start, end) for start, end in zip(starts, ends) if non_nan_mask[start]] 264 265 else: 266 # Find change points: where adjacent cells differ. 267 changes = np.diff(row) != 0 268 change_idx = np.nonzero(changes)[0] 269 starts = np.concatenate(([0], change_idx + 1)) 270 ends = np.concatenate((change_idx, [len(row) - 1])) 271 272 return [(start, end, row[start]) for start, end in zip(starts, ends)] 273 274 def _create_polygons(stretches: list, values_to_plot: list | ndarray, zoom: tuple, y_position: int | float, 275 polygons: list, aln_colors: dict | ScalarMappable, polygon_colors: list, 276 detected_identity_values: set | None = None): 277 """ 278 create the individual polygons for the heatmap (do not plot each cell but pre-compute if cells are the same and adjacent to each other) 279 """ 280 281 for start, end, value in stretches: 282 # check if this is a value for which we might want to create a polygon 283 # e.g. we might not want to create a polygon for identical values 284 if value not in values_to_plot: 285 continue 286 # add values to this set - > this is important for correct legend creation 287 if detected_identity_values is not None: 288 detected_identity_values.add(value) 289 width = end + 1 - start 290 # Calculate x coordinates adjusted for zoom and centering 291 x0 = start + zoom[0] - 0.5 292 x1 = x0 + width 293 # Define the rectangle corners 294 rect_coords = [ 295 (x0, y_position), 296 (x1, y_position), 297 (x1, y_position + 0.8), 298 (x0, y_position + 0.8), 299 (x0, y_position) 300 ] 301 polygons.append(rect_coords) 302 if type(aln_colors) != ScalarMappable: 303 polygon_colors.append(aln_colors[value]['color']) 304 else: 305 polygon_colors.append(aln_colors.to_rgba(value)) 306 307 return detected_identity_values, polygons, polygon_colors 308 309 def _plot_sequence_text(aln: explore.MSA, seq_name: str, ref_name: str | None, always_text: bool, values: ndarray, 310 matrix: ndarray, ax: plt.Axes, zoom: tuple, y_position: int | float, value_to_skip: int | None, 311 ref_color: str, show_gaps: bool, aln_colors: dict | ScalarMappable = None): 312 """ 313 Plot sequence text - however this will be done even if there is not enough space. 314 """ 315 x_text = 0 316 if seq_name == ref_name and ref_color is not None: 317 different_cols = np.any((matrix != value_to_skip) & ~np.isnan(matrix), axis=0) 318 else: 319 different_cols = [False] * aln.length 320 321 for idx, (character, value) in enumerate(zip(aln[seq_name], values)): 322 if value != value_to_skip and character != '-' or seq_name == ref_name and character != '-' or character == '-' and not show_gaps or always_text and character != '-': 323 if seq_name == ref_name and ref_color is not None: 324 text_color = _get_contrast_text_color(to_rgba(ref_color)) 325 elif type(aln_colors) is ScalarMappable: 326 text_color = _get_contrast_text_color(aln_colors.to_rgba(value)) 327 else: 328 text_color = _get_contrast_text_color(to_rgba(aln_colors[value]['color'])) 329 330 ax.text( 331 x=x_text + zoom[0] if zoom is not None else x_text, 332 y=y_position + 0.4, 333 s=character, 334 fontweight='bold' if different_cols[idx] else 'normal', 335 ha='center', 336 va='center_baseline', 337 c=text_color if value != value_to_skip or seq_name == ref_name and ref_color is not None else 'dimgrey' 338 ) 339 x_text += 1 340 341 # List to store polygons 342 detected_identity_values = {0} 343 polygons, polygon_colors, patch_list = [], [], [] 344 # determine zoom 345 zoom = (0, aln.length) if aln.zoom is None else aln.zoom 346 for i, seq_name in enumerate(aln): 347 # define initial y position 348 y_position = len(aln) - i - 1.4 349 # now plot relevant stuff for the current row 350 row = matrix[i] 351 # plot a line below everything for fancy gaps 352 if fancy_gaps: 353 ax.hlines( 354 y_position + 0.4, 355 xmin=zoom[0] - 0.5, 356 xmax=zoom[1] + 0.5, 357 color='black', 358 linestyle='-', 359 zorder=0, 360 linewidth=0.75 361 ) 362 # plot the basic shape per sequence with gaps 363 if create_identity_patch: 364 if type(aln_colors) == dict: 365 identity_color = aln_colors[0]['color'] 366 else: 367 identity_color = aln_colors.to_rgba(1) # for similarity alignment 368 patch_list = _create_identity_patch(row, aln, patch_list, zoom, y_position, reference_color, seq_name, identity_color, show_gaps) 369 # find consecutive stretches 370 stretches = _find_stretches(row) 371 # create polygons per stretch 372 detected_identity_values, polygons, polygon_colors = _create_polygons( 373 stretches=stretches, values_to_plot=values_to_plot, zoom=zoom, y_position=y_position, polygons=polygons, 374 aln_colors=aln_colors, polygon_colors=polygon_colors, detected_identity_values=detected_identity_values 375 ) 376 # add sequence text 377 if show_different_sequence or show_sequence_all: 378 _plot_sequence_text( 379 aln=aln, seq_name=seq_name, ref_name=aln.reference_id, always_text=show_sequence_all, 380 values=matrix[i], matrix=matrix, ax=ax, zoom=zoom, y_position=y_position, value_to_skip=identical_value, ref_color=reference_color, 381 show_gaps=show_gaps, aln_colors=aln_colors 382 ) 383 # Create the LineCollection: each segment is drawn in a single call. 384 ax.add_collection(PatchCollection(patch_list, match_original=True, linewidths='none', joinstyle='miter', capstyle='butt')) 385 # NOTE: linewidth are important so in large overview plot the differences are still visible 386 ax.add_collection(PolyCollection(polygons, facecolors=polygon_colors, linewidths=0.5, edgecolors=polygon_colors)) 387 388 389 return detected_identity_values 390 391 392def alignment(aln: explore.MSA | str, ax: plt.Axes | None = None, show_sequence_all: bool = False, show_seq_names: bool = False, 393 custom_seq_names: tuple | list = (), mask_color: str = 'dimgrey', ambiguity_color: str = 'black', basic_color: str = 'lightgrey', 394 show_mask:bool = True, fancy_gaps:bool = False, show_ambiguities: bool = False, color_scheme: str | None = 'standard', 395 show_x_label: bool = True, show_legend: bool = False, bbox_to_anchor: tuple[float|int, float|int] | list= (1, 1), 396 show_consensus: bool = False) -> plt.Axes: 397 """ 398 399 Plot an alignment with each character colored as defined in the scheme. This is computationally more intensive as 400 the identity alignments and similarity alignment function as each square for each character is individually plotted. 401 402 :param aln: alignment MSA class or path 403 :param ax: matplotlib axes 404 :param show_seq_names: whether to show seq names 405 :param show_sequence_all: whether to show all sequences - zoom in to avoid plotting issues 406 :param custom_seq_names: custom seq names 407 :param mask_color: color for masked nucleotides/aminoacids 408 :param ambiguity_color: color for ambiguous nucleotides/aminoacids 409 :param basic_color: color that will be used for all normal chars if the colorscheme is None 410 :param show_mask: whether to show N or X chars otherwise it will be shown as match or mismatch 411 :param fancy_gaps: show gaps with a small black bar 412 :param show_ambiguities: whether to show non-N ambiguities -> only relevant for RNA/DNA sequences 413 :param color_scheme: color mismatching chars with their unique color. Options for DNA/RNA are: standard, purine_pyrimidine, strong_weak; and for AS: standard, clustal, zappo, hydrophobicity. Will overwrite different_char_color. 414 :param show_x_label: whether to show x label 415 :param show_legend: whether to show the legend 416 :param bbox_to_anchor: bounding box coordinates for the legend - see: https://matplotlib.org/stable/api/legend_api.html 417 :param show_consensus: whether to show the majority consensus sequence (standard-color scheme) 418 :return matplotlib axis 419 """ 420 # Validate aln, ax inputs 421 aln, ax = _validate_input_parameters(aln=aln, ax=ax) 422 # Validate colors 423 for c in [mask_color, ambiguity_color, basic_color]: 424 _validate_color(c) 425 # Validate color scheme 426 _validate_color_scheme(scheme=color_scheme, aln=aln) 427 # create color mapping 428 aln_colors = _create_color_dictionary( 429 aln=aln, color_scheme=color_scheme, identical_char_color=basic_color, 430 different_char_color=None, mask_color=mask_color, ambiguity_color=ambiguity_color 431 ) 432 # Compute identity alignment 433 numerical_alignment = aln.calc_numerical_alignment(encode_ambiguities=show_ambiguities, encode_mask=show_mask) 434 if color_scheme is None: 435 numerical_alignment[np.where(numerical_alignment > 0)] = 0 436 # create the alignment 437 detected_identity_values = _create_alignment( 438 aln=aln, ax=ax, matrix=numerical_alignment, fancy_gaps=fancy_gaps, create_identity_patch=True if color_scheme is None else False, show_gaps=True, 439 show_different_sequence=False, show_sequence_all=show_sequence_all, reference_color=None, aln_colors=aln_colors, 440 values_to_plot = list(aln_colors.keys()) 441 ) 442 # custom legend 443 if show_legend: 444 aln_colors.pop(0) # otherwise legend is wrong (here there is no check if a position is identical or not) 445 _create_legend( 446 color_scheme=color_scheme, aln_colors=aln_colors, aln=aln, 447 detected_identity_values=detected_identity_values, 448 ax=ax, bbox_to_anchor=bbox_to_anchor 449 ) 450 if show_consensus: 451 consensus_plot(aln=aln, ax=ax, show_x_label=show_x_label, show_name=False, show_sequence=show_sequence_all, 452 color_scheme='standard', basic_color=basic_color, mask_color=mask_color, ambiguity_color=ambiguity_color 453 ) 454 ax.set_ylim(-0.5, len(aln) + 1) 455 else: 456 ax.set_ylim(-0.5, len(aln)) 457 458 _seq_names(aln=aln, ax=ax, custom_seq_names=custom_seq_names, show_seq_names=show_seq_names, include_consensus=show_consensus) 459 # configure axis 460 _format_x_axis(aln=aln, ax=ax, show_x_label=show_x_label, show_left=False) 461 462 return ax 463 464 465 466def identity_alignment(aln: explore.MSA | str, ax: plt.Axes | None = None, show_title: bool = True, show_identity_sequence: bool = False, 467 show_sequence_all: bool = False, show_seq_names: bool = False, custom_seq_names: tuple | list = (), 468 reference_color: str = 'lightsteelblue', basic_color: str = 'lightgrey', different_char_color: str = 'peru', 469 mask_color: str = 'dimgrey', ambiguity_color: str = 'black', show_mask:bool = True, show_gaps:bool = True, 470 fancy_gaps:bool = False, show_mismatches: bool = True, show_ambiguities: bool = False, 471 color_scheme: str | None = None, show_x_label: bool = True, show_legend: bool = False, 472 bbox_to_anchor: tuple[float|int, float|int] | list = (1, 1), show_consensus: bool = False) -> plt.Axes: 473 """ 474 Generates an identity alignment plot. 475 :param aln: alignment MSA class or path 476 :param ax: matplotlib axes 477 :param show_title: whether to show title 478 :param show_seq_names: whether to show seq names 479 :param show_identity_sequence: whether to show sequence for only differences and reference - zoom in to avoid plotting issues 480 :param show_sequence_all: whether to show all sequences - zoom in to avoid plotting issues 481 :param custom_seq_names: custom seq names 482 :param reference_color: color of reference sequence 483 :param basic_color: color for identical nucleotides/aminoacids 484 :param different_char_color: color for different nucleotides/aminoacids 485 :param mask_color: color for masked nucleotides/aminoacids 486 :param ambiguity_color: color for ambiguous nucleotides/aminoacids 487 :param show_mask: whether to show N or X chars otherwise it will be shown as match or mismatch 488 :param show_gaps: whether to show gaps otherwise it will be shown as match or mismatch 489 :param fancy_gaps: show gaps with a small black bar 490 :param show_mismatches: whether to show mismatches otherwise it will be shown as match 491 :param show_ambiguities: whether to show non-N ambiguities -> only relevant for RNA/DNA sequences 492 :param color_scheme: color mismatching chars with their unique color. Options for DNA/RNA are: standard, purine_pyrimidine, strong_weak; and for AS: standard, clustal, zappo, hydrophobicity. Will overwrite different_char_color. 493 :param show_x_label: whether to show x label 494 :param show_legend: whether to show the legend 495 :param bbox_to_anchor: bounding box coordinates for the legend - see: https://matplotlib.org/stable/api/legend_api.html 496 :param show_consensus: whether to show the majority consensus sequence (standard-color scheme) 497 498 :return matplotlib axis 499 """ 500 501 # Both options for gaps work hand in hand 502 if fancy_gaps: 503 show_gaps = True 504 # Validate aln, ax inputs 505 aln, ax = _validate_input_parameters(aln=aln, ax=ax) 506 # Validate colors 507 for c in [reference_color, basic_color, different_char_color, mask_color, ambiguity_color]: 508 _validate_color(c) 509 # Validate color scheme 510 _validate_color_scheme(scheme=color_scheme, aln=aln) 511 # create color mapping 512 aln_colors = _create_color_dictionary( 513 aln=aln, color_scheme=color_scheme, identical_char_color=basic_color, different_char_color=different_char_color, 514 mask_color=mask_color, ambiguity_color=ambiguity_color 515 ) 516 # Compute identity alignment 517 identity_aln = aln.calc_identity_alignment( 518 encode_mask=show_mask, encode_gaps=show_gaps, encode_mismatches=show_mismatches,encode_ambiguities=show_ambiguities, 519 encode_each_mismatch_char=True if color_scheme is not None else False 520 ) 521 # create the alignment 522 detected_identity_values = _create_alignment( 523 aln=aln, ax=ax, matrix=identity_aln, fancy_gaps=fancy_gaps, create_identity_patch=True, show_gaps=show_gaps, 524 show_different_sequence=show_identity_sequence, show_sequence_all=show_sequence_all, reference_color=reference_color, 525 aln_colors=aln_colors, values_to_plot=list(aln_colors.keys())[1:] 526 ) 527 # custom legend 528 if show_legend: 529 _create_legend( 530 color_scheme=color_scheme, aln_colors=aln_colors, aln=aln, detected_identity_values=detected_identity_values, 531 ax=ax, bbox_to_anchor=bbox_to_anchor 532 ) 533 _seq_names(aln=aln, ax=ax, custom_seq_names=custom_seq_names, show_seq_names=show_seq_names) 534 # configure axis 535 if show_consensus: 536 consensus_plot(aln=aln, ax=ax, show_x_label=show_x_label, show_name=False, show_sequence=any([show_sequence_all, show_identity_sequence]), 537 color_scheme='standard', basic_color=basic_color, mask_color=mask_color, 538 ambiguity_color=ambiguity_color 539 ) 540 ax.set_ylim(-0.5, len(aln) + 1) 541 else: 542 ax.set_ylim(-0.5, len(aln)) 543 544 _seq_names(aln=aln, ax=ax, custom_seq_names=custom_seq_names, show_seq_names=show_seq_names, 545 include_consensus=show_consensus) 546 if show_title: 547 ax.set_title('identity', loc='left') 548 _format_x_axis(aln=aln, ax=ax, show_x_label=show_x_label, show_left=False) 549 550 return ax 551 552 553def similarity_alignment(aln: explore.MSA | str, ax: plt.Axes | None = None, matrix_type: str | None = None, show_title: bool = True, 554 show_similarity_sequence: bool = False, show_sequence_all: bool = False, show_seq_names: bool = False, 555 custom_seq_names: tuple | list = (), reference_color: str = 'lightsteelblue', different_char_color: str = 'peru', basic_color: str = 'lightgrey', 556 show_gaps:bool = True, fancy_gaps:bool = False, show_x_label: bool = True, show_cbar: bool = False, 557 cbar_fraction: float = 0.1, show_consensus: bool = False) -> plt.Axes: 558 """ 559 Generates a similarity alignment plot. Importantly the similarity values are normalized! 560 :param aln: alignment MSA class or path 561 :param ax: matplotlib axes 562 :param matrix_type: substitution matrix - see config.SUBS_MATRICES, standard: NT - TRANS, AA - BLOSUM65 563 :param show_title: whether to show title 564 :param show_similarity_sequence: whether to show sequence only for differences and reference - zoom in to avoid plotting issues 565 :param show_sequence_all: whether to show all sequences - zoom in to avoid plotting issues 566 :param show_seq_names: whether to show seq names 567 :param custom_seq_names: custom seq names 568 :param reference_color: color of reference sequence 569 :param different_char_color: color for the lowest similarity 570 :param basic_color: basic color for the highest similarity 571 :param show_gaps: whether to show gaps otherwise it will be ignored 572 :param fancy_gaps: show gaps with a small black bar 573 :param show_x_label: whether to show x label 574 :param show_cbar: whether to show the legend - see https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.colorbar.html 575 :param cbar_fraction: fraction of the original ax reserved for the legend 576 :param show_consensus: whether to show the majority consensus sequence (standard color scheme, no specical handling for special characters) 577 :return matplotlib axis 578 """ 579 # Both options for gaps work hand in hand 580 if fancy_gaps: 581 show_gaps = True 582 # Validate aln, ax inputs 583 aln, ax = _validate_input_parameters(aln=aln, ax=ax) 584 # Validate colors 585 for c in [reference_color, different_char_color, basic_color]: 586 _validate_color(c) 587 # Compute similarity alignment 588 similarity_aln = aln.calc_similarity_alignment(matrix_type=matrix_type) # use normalized values here 589 similarity_aln = similarity_aln.round(2) # round data for good color mapping 590 # create cmap 591 cmap = LinearSegmentedColormap.from_list( 592 "extended", 593 [ 594 (0.0, different_char_color), 595 (1.0, basic_color) 596 ], 597 ) 598 cmap = ScalarMappable(norm=Normalize(vmin=0, vmax=1), cmap=cmap) 599 # create similarity values 600 similarity_values = np.arange(start=0, stop=1, step=0.01) 601 # round it to be absolutely sure that values match with rounded sim alignment 602 similarity_values = list(similarity_values.round(2)) 603 # create the alignment 604 _create_alignment( 605 aln=aln, ax=ax, matrix=similarity_aln, fancy_gaps=fancy_gaps, create_identity_patch=True, show_gaps=show_gaps, 606 show_different_sequence=show_similarity_sequence, show_sequence_all=show_sequence_all, reference_color=reference_color, 607 aln_colors=cmap, values_to_plot=similarity_values, identical_value=1 608 ) 609 # legend 610 if show_cbar: 611 cbar = plt.colorbar(cmap, ax=ax, location= 'top', anchor=(1,0), shrink=0.2, pad=2/ax.bbox.height, fraction=cbar_fraction) 612 cbar.set_ticks([0, 1]) 613 cbar.set_ticklabels(['low', 'high']) 614 615 # format seq names 616 _seq_names(aln, ax, custom_seq_names, show_seq_names) 617 618 # configure axis 619 if show_consensus: 620 consensus_plot(aln=aln, ax=ax, show_x_label=show_x_label, show_name=False, show_sequence=any([show_sequence_all, show_similarity_sequence]), 621 color_scheme='standard', basic_color=basic_color) 622 ax.set_ylim(-0.5, len(aln) + 1) 623 else: 624 ax.set_ylim(-0.5, len(aln)) 625 626 _seq_names(aln=aln, ax=ax, custom_seq_names=custom_seq_names, show_seq_names=show_seq_names, 627 include_consensus=show_consensus) 628 if show_title: 629 ax.set_title('similarity', loc='left') 630 _format_x_axis(aln, ax, show_x_label, show_left=False) 631 632 return ax 633 634 635def _moving_average(arr: ndarray, window_size: int, zoom: tuple | None, aln_length: int) -> tuple[ndarray, ndarray]: 636 """ 637 Calculate the moving average of an array. 638 :param arr: array with values 639 :param window_size: size of the moving average 640 :param zoom: zoom of the alignment 641 :param aln_length: length of the alignment 642 :return: new array with moving average 643 """ 644 if window_size > 1: 645 i = 0 646 moving_averages, plotting_idx = [], [] 647 while i < len(arr) + 1: 648 half_window_size = window_size // 2 649 window_left = arr[i - half_window_size : i] if i > half_window_size else arr[0:i] 650 window_right = arr[i: i + half_window_size] if i < len(arr) - half_window_size else arr[i: len(arr)] 651 moving_averages.append((sum(window_left) + sum(window_right)) / (len(window_left) + len(window_right))) 652 plotting_idx.append(i) 653 i += 1 654 655 return np.array(moving_averages), np.array(plotting_idx) if zoom is None else np.array(plotting_idx) + zoom[0] 656 else: 657 return arr, np.arange(zoom[0], zoom[1]) if zoom is not None else np.arange(aln_length) 658 659 660def stat_plot(aln: explore.MSA | str, stat_type: str, ax: plt.Axes | None = None, line_color: str = 'burlywood', 661 line_width: int | float = 2, rolling_average: int = 20, show_x_label: bool = False, show_title: bool = True) -> plt.Axes: 662 """ 663 Generate a plot for the various alignment stats. 664 :param aln: alignment MSA class or path 665 :param ax: matplotlib axes 666 :param stat_type: 'entropy', 'gc', 'coverage', 'ts tv score', gap frequency, 'identity' or 'similarity' -> (here default matrices are used NT - TRANS, AA - BLOSUM65) 667 :param line_color: color of the line 668 :param line_width: width of the line 669 :param rolling_average: average rolling window size left and right of a position in nucleotides or amino acids 670 :param show_x_label: whether to show the x-axis label 671 :param show_title: whether to show the title 672 :return matplotlib axes 673 """ 674 675 # input check 676 aln, ax = _validate_input_parameters(aln, ax) 677 678 # define possible functions to calc here 679 stat_functions: Dict[str, Callable[[], AlignmentStats | ndarray]] = { 680 'gc': aln.calc_gc, 681 'entropy': aln.calc_entropy, 682 'coverage': aln.calc_coverage, 683 'identity': aln.calc_identity_alignment, 684 'similarity': aln.calc_similarity_alignment, 685 'ts tv score': aln.calc_transition_transversion_score, 686 'gap frequency': aln.calc_gap_frequency 687 } 688 689 if stat_type not in stat_functions: 690 raise ValueError('stat_type must be one of {}'.format(list(stat_functions.keys()))) 691 692 _validate_color(line_color) 693 if rolling_average < 1 or rolling_average > aln.length: 694 raise ValueError('rolling_average must be between 1 and length of sequence') 695 696 # generate input data 697 array = stat_functions[stat_type]() 698 if isinstance(array, AlignmentStats): 699 array = array.values 700 # define possible spans for values 701 if stat_type == 'identity': 702 min_value, max_value = -1, 0 703 elif stat_type == 'ts tv score': 704 min_value, max_value = -1, 1 705 else: 706 min_value, max_value = 0, 1 707 if stat_type in ['identity', 'similarity']: 708 # for the mean nan values get handled as the lowest possible number in the matrix 709 array = np.nan_to_num(array, True, min_value) 710 array = np.mean(array, axis=0) 711 data, plot_idx = _moving_average(array, rolling_average, aln.zoom, aln.length) 712 713 # plot the data 714 ax.fill_between( 715 # this add dummy data left and right for better plotting 716 # otherwise only half of the step is shown 717 np.concatenate(([plot_idx[0] - 0.5], plot_idx, [plot_idx[-1] + 0.5])) if rolling_average == 1 else plot_idx, 718 np.concatenate(([data[0]], data, [data[-1]])) if rolling_average == 1 else data, 719 min_value, 720 linewidth = line_width, 721 edgecolor=line_color, 722 step='mid' if rolling_average == 1 else None, 723 facecolor=(line_color, 0.6) if stat_type not in ['ts tv score', 'gc'] else 'none' 724 ) 725 if stat_type == 'gc': 726 ax.hlines(0.5, xmin=0, xmax=aln.zoom[0] + aln.length if aln.zoom is not None else aln.length, color='black', linestyles='--', linewidth=1) 727 728 # format axis 729 ax.set_ylim(min_value, max_value*0.1+max_value) 730 ax.set_yticks([min_value, max_value]) 731 if stat_type == 'gc': 732 ax.set_yticklabels(['0', '100']) 733 elif stat_type == 'ts tv score': 734 ax.set_yticklabels(['tv', 'ts']) 735 else: 736 ax.set_yticklabels(['low', 'high']) 737 738 # show title 739 if show_title: 740 ax.set_title( 741 f'{stat_type} (average over {rolling_average} positions)' if rolling_average > 1 else f'{stat_type} for each position', 742 loc='left' 743 ) 744 745 _format_x_axis(aln, ax, show_x_label, show_left=True) 746 747 return ax 748 749 750def variant_plot(aln: explore.MSA | str, ax: plt.Axes | None = None, lollisize: tuple[int, int] | list = (1, 3), 751 color_scheme: str = 'standard', show_x_label: bool = False, show_legend: bool = True, 752 bbox_to_anchor: tuple[float|int, float|int] | list = (1, 1)) -> plt.Axes: 753 """ 754 Plots variants. 755 :param aln: alignment MSA class or path 756 :param ax: matplotlib axes 757 :param lollisize: (stem_size, head_size) 758 :param color_scheme: color scheme for characters. see config.CHAR_COLORS for available options 759 :param show_x_label: whether to show the x-axis label 760 :param show_legend: whether to show the legend 761 :param bbox_to_anchor: bounding box coordinates for the legend - see: https://matplotlib.org/stable/api/legend_api.html 762 :return matplotlib axes 763 """ 764 765 # validate input 766 aln, ax = _validate_input_parameters(aln, ax) 767 _validate_color_scheme(color_scheme, aln) 768 if not isinstance(lollisize, tuple) or len(lollisize) != 2: 769 raise ValueError('lollisize must be tuple of length 2 (stem, head)') 770 for _size in lollisize: 771 if not isinstance(_size, float | int) or _size <= 0: 772 raise ValueError('lollisize must be floats greater than zero') 773 774 # define colors 775 colors = config.CHAR_COLORS[aln.aln_type][color_scheme] 776 # get snps 777 snps = aln.get_snps() 778 # define where to plot (each ref type gets a separate line) 779 ref_y_positions, y_pos, detected_var = {}, 0, set() 780 781 # iterate over SNPs 782 for pos, snp_pos in snps.positions.items(): 783 if snp_pos.ref not in ref_y_positions: 784 ref_y_positions[snp_pos.ref] = y_pos 785 y_pos += 1.1 786 787 for alt, (af, _) in snp_pos.alt.items(): 788 x_pos = pos + aln.zoom[0] if aln.zoom is not None else pos 789 y_ref = ref_y_positions[snp_pos.ref] 790 ax.vlines( 791 x=x_pos, 792 ymin=y_ref, 793 ymax=y_ref + af, 794 color=colors[alt], 795 zorder=100, 796 linewidth=lollisize[0] 797 ) 798 ax.plot( 799 x_pos, 800 y_ref + af, 801 color=colors[alt], 802 marker='o', 803 markersize=lollisize[1] 804 ) 805 detected_var.add(alt) 806 807 # plot hlines 808 for y_char in ref_y_positions: 809 ax.hlines( 810 ref_y_positions[y_char], 811 xmin=aln.zoom[0] - 0.5 if aln.zoom is not None else -0.5, 812 xmax=aln.zoom[0] + aln.length + 0.5 if aln.zoom is not None else aln.length + 0.5, 813 color='black', 814 linestyle='-', 815 zorder=0, 816 linewidth=0.75 817 ) 818 # create a custom legend 819 if show_legend: 820 custom_legend = [ 821 ax.add_line( 822 plt.Line2D( 823 [], 824 [], 825 color=colors[char], 826 marker='o', 827 linestyle='', 828 markersize=5 829 ) 830 ) for char in colors if char in detected_var 831 ] 832 ax.legend( 833 custom_legend, 834 [char for char in colors if char in detected_var], # ensures correct sorting 835 loc='lower right', 836 title='variant', 837 bbox_to_anchor=bbox_to_anchor, 838 ncols=len(detected_var)/2 if aln.aln_type == 'AA' else len(detected_var), 839 frameon=False 840 ) 841 842 # format axis 843 _format_x_axis(aln, ax, show_x_label, show_left=False) 844 ax.spines['left'].set_visible(False) 845 ax.set_yticks([ref_y_positions[x] for x in ref_y_positions]) 846 ax.set_yticklabels(ref_y_positions.keys()) 847 ax.set_ylim(0, y_pos) 848 ax.set_ylabel('reference') 849 850 return ax 851 852 853def _plot_annotation(annotation_dict: dict, ax: plt.Axes, direction_marker_size: int | None, color: str | ScalarMappable): 854 """ 855 Plot annotation rectangles 856 """ 857 for annotation in annotation_dict: 858 for locations in annotation_dict[annotation]['location']: 859 x_value = locations[0] 860 length = locations[1] - locations[0] 861 ax.add_patch( 862 patches.FancyBboxPatch( 863 (x_value, annotation_dict[annotation]['track'] + 1), 864 length, 865 0.8, 866 boxstyle="Round, pad=0", 867 ec="black", 868 fc=color.to_rgba(annotation_dict[annotation]['conservation']) if isinstance(color, ScalarMappable) else color, 869 ) 870 ) 871 if direction_marker_size is not None: 872 if annotation_dict[annotation]['strand'] == '-': 873 marker = '<' 874 else: 875 marker = '>' 876 ax.plot(x_value + length/2, annotation_dict[annotation]['track'] + 1.4, marker=marker, markersize=direction_marker_size, color='white', markeredgecolor='black') 877 878 # plot linked annotations (such as splicing) 879 if len(annotation_dict[annotation]['location']) > 1: 880 y_value = annotation_dict[annotation]['track'] + 1.4 881 start = None 882 for locations in annotation_dict[annotation]['location']: 883 if start is None: 884 start = locations[1] 885 continue 886 ax.plot([start, locations[0]], [y_value, y_value], '--', linewidth=2, color='black') 887 start = locations[1] 888 889 890def _add_track_positions(annotation_dic): 891 """ 892 define the position for annotations square so that overlapping annotations do not overlap in the plot 893 """ 894 # create a dict and sort 895 annotation_dic = dict(sorted(annotation_dic.items(), key=lambda x: x[1]['location'][0][0])) 896 897 # remember for each track the largest stop 898 track_stops = [0] 899 900 for ann in annotation_dic: 901 flattened_locations = list(chain.from_iterable(annotation_dic[ann]['location'])) # flatten list 902 track = 0 903 # check if a start of a gene is smaller than the stop of the current track 904 # -> move to new track 905 while flattened_locations[0] < track_stops[track]: 906 track += 1 907 # if all prior tracks are potentially causing an overlap 908 # create a new track and break 909 if len(track_stops) <= track: 910 track_stops.append(0) 911 break 912 # in the current track remember the stop of the current gene 913 track_stops[track] = flattened_locations[-1] 914 # and indicate the track in the dict 915 annotation_dic[ann]['track'] = track 916 917 return annotation_dic 918 919 920def orf_plot(aln: explore.MSA | str, ax: plt.Axes | None = None, min_length: int = 500, non_overlapping_orfs: bool = True, 921 cmap: str = 'Blues', direction_marker_size: int | None = 5, show_x_label: bool = False, show_cbar: bool = False, 922 cbar_fraction: float = 0.1) -> plt.Axes: 923 """ 924 Plot conserved ORFs. 925 :param aln: alignment MSA class or path 926 :param ax: matplotlib axes 927 :param min_length: minimum length of orf 928 :param non_overlapping_orfs: whether to consider overlapping orfs 929 :param cmap: color mapping for % identity - see https://matplotlib.org/stable/users/explain/colors/colormaps.html 930 :param direction_marker_size: marker size for direction marker, not shown if marker_size == None 931 :param show_x_label: whether to show the x-axis label 932 :param show_cbar: whether to show the colorbar - see https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.colorbar.html 933 :param cbar_fraction: fraction of the original ax reserved for the colorbar 934 :return matplotlib axes 935 """ 936 937 # normalize colorbar 938 cmap = ScalarMappable(norm=Normalize(0, 100), cmap=plt.get_cmap(cmap)) 939 # validate input 940 aln, ax = _validate_input_parameters(aln, ax) 941 942 # get orfs --> first deepcopy and reset zoom that the orfs are also zoomed in (by default, the orfs are only 943 # searched within the zoomed region) 944 aln_temp = deepcopy(aln) 945 aln_temp.zoom = None 946 if non_overlapping_orfs: 947 orf_collection = aln_temp.get_non_overlapping_conserved_orfs(min_length=min_length) 948 else: 949 orf_collection = aln_temp.get_conserved_orfs(min_length=min_length) 950 951 # Normalize ORF dataclasses to the annotation dict shape consumed by plotting helpers. 952 annotation_dict = {} 953 for orf_id, orf_data in orf_collection.items(): 954 annotation_dict[orf_id] = { 955 'location': orf_data.location, 956 'strand': orf_data.strand, 957 'conservation': orf_data.conservation, 958 } 959 # filter dict for zoom 960 if aln.zoom is not None: 961 annotation_dict = { 962 key: val 963 for key, val in annotation_dict.items() 964 if max(val['location'][0][0], aln.zoom[0]) <= min(val['location'][0][1], aln.zoom[1]) 965 } 966 # add track for plotting 967 _add_track_positions(annotation_dict) 968 # plot 969 _plot_annotation(annotation_dict, ax, direction_marker_size=direction_marker_size, color=cmap) 970 971 # legend 972 if show_cbar: 973 cbar = plt.colorbar(cmap,ax=ax, location= 'top', orientation='horizontal', anchor=(1,0), shrink=0.2, pad=2/ax.bbox.height, fraction=cbar_fraction) 974 cbar.set_label('% identity') 975 cbar.set_ticks([0, 100]) 976 977 # format axis 978 _format_x_axis(aln, ax, show_x_label, show_left=False) 979 ax.set_ylim(bottom=0.8) 980 ax.set_yticks([]) 981 ax.set_yticklabels([]) 982 ax.set_title('conserved orfs', loc='left') 983 984 return ax 985 986 987def annotation_plot(aln: explore.MSA | str, annotation: explore.Annotation | str, feature_to_plot: str, ax: plt.Axes | None = None, 988 color: str = 'wheat', direction_marker_size: int | None = 5, show_x_label: bool = False) -> plt.Axes: 989 """ 990 Plot annotations from bed, gff or gb files. Are automatically mapped to alignment. 991 :param aln: alignment MSA class 992 :param annotation: annotation class | path to annotation file 993 :param ax: matplotlib axes 994 :param feature_to_plot: potential feature to plot (not for bed files as it is parsed as one feature) 995 :param color: color for the annotation 996 :param direction_marker_size: marker size for direction marker, only relevant if show_direction is True 997 :param show_x_label: whether to show the x-axis label 998 :return matplotlib axes 999 """ 1000 # validate input 1001 aln, ax, annotation = _validate_input_parameters(aln, ax, annotation) 1002 _validate_color(color) 1003 1004 # ignore features to plot for bed files (here it is written into one feature) 1005 if annotation.ann_type == 'bed': 1006 annotation_dict = annotation.features['region'] 1007 feature_to_plot = 'bed regions' 1008 else: 1009 # try to subset the annotation dict 1010 try: 1011 annotation_dict = annotation.features[feature_to_plot] 1012 except KeyError: 1013 raise KeyError(f'Feature {feature_to_plot} not found. Use annotation.features.keys() to see available features.') 1014 1015 # plotting and formating 1016 _add_track_positions(annotation_dict) 1017 _plot_annotation(annotation_dict, ax, direction_marker_size=direction_marker_size, color=color) 1018 _format_x_axis(aln, ax, show_x_label, show_left=False) 1019 ax.set_ylim(bottom=0.8) 1020 ax.set_yticks([]) 1021 ax.set_yticklabels([]) 1022 ax.set_title(f'{annotation.locus} ({feature_to_plot})', loc='left') 1023 1024 return ax 1025 1026 1027def sequence_logo(aln:explore.MSA | str, ax:plt.Axes | None = None, color_scheme: str = 'standard', plot_type: str = 'stacked', 1028 show_x_label:bool = False) -> plt.Axes: 1029 """ 1030 Plot sequence logo or stacked area plot (use the first one with appropriate zoom levels). The 1031 logo visualizes the relative frequency of nt or aa characters in the alignment. The char frequency 1032 is scaled to the information content at each position. --> identical to how Geneious calculates it. 1033 1034 :param aln: alignment MSA class or path 1035 :param ax: matplotlib axes 1036 :param color_scheme: color scheme for characters. see config.CHAR_COLORS for available options 1037 :param plot_type: 'logo' for sequence logo, 'stacked' for stacked area plot 1038 :param show_x_label: whether to show the x-axis label 1039 :return matplotlib axes 1040 """ 1041 1042 aln, ax = _validate_input_parameters(aln, ax) 1043 # calc matrix 1044 matrix = aln.calc_position_matrix('IC') * aln.calc_position_matrix('PPM') 1045 letters_to_plot = list(config.CHAR_COLORS[aln.aln_type]['standard'].keys()) 1046 1047 # plot 1048 if plot_type == 'logo': 1049 for pos in range(matrix.shape[1]): 1050 # sort the positive matrix row values by size 1051 items = [(letters_to_plot[i], matrix[i, pos]) for i in range(len(letters_to_plot)) if matrix[i, pos] > 0] 1052 items.sort(key=lambda x: x[1]) 1053 # plot each position 1054 y_offset = 0 1055 for letter, h in items: 1056 tp = TextPath((aln.zoom[0] - 0.325 if aln.zoom is not None else - 0.325, 0), letter, size=1, prop=FontProperties(weight='bold')) 1057 bb = tp.get_extents() 1058 glyph_height = bb.height if bb.height > 0 else 1e-6 # avoid div by zero 1059 scale_to_1 = 1.0 / glyph_height 1060 1061 transform = (Affine2D() 1062 .scale(1.0, h * scale_to_1) # scale manually by IC and normalize font 1063 .translate(pos, y_offset) 1064 + ax.transData) 1065 1066 patch = PathPatch(tp, transform=transform, 1067 facecolor=config.CHAR_COLORS[aln.aln_type][color_scheme][letter], 1068 edgecolor='none') 1069 ax.add_patch(patch) 1070 y_offset += h 1071 elif plot_type == 'stacked': 1072 y_values = np.zeros(matrix.shape[1]) 1073 x_values = np.arange(0, matrix.shape[1]) if aln.zoom is None else np.arange(aln.zoom[0], aln.zoom[1]) 1074 for row in range(matrix.shape[0]): 1075 y = matrix[row] 1076 ax.fill_between(x_values, 1077 y_values, 1078 y_values + y, 1079 fc=config.CHAR_COLORS[aln.aln_type][color_scheme].get(letters_to_plot[row]), 1080 ec='None', 1081 label=letters_to_plot[row], 1082 step='mid') 1083 1084 # adjust limits & labels 1085 _format_x_axis(aln, ax, show_x_label, show_left=True) 1086 if aln.aln_type == 'AA': 1087 ax.set_ylim(bottom=0, top=5) 1088 else: 1089 ax.set_ylim(bottom=0, top=2) 1090 ax.spines['top'].set_visible(False) 1091 ax.spines['right'].set_visible(False) 1092 ax.set_ylabel('bits') 1093 1094 return ax 1095 1096 1097def consensus_plot(aln: explore.MSA | str, ax: plt.Axes | None = None, threshold: float | None = None, use_ambig_nt: bool = False, 1098 color_scheme: str | None = 'standard', mask_color: str = 'dimgrey', ambiguity_color: str = 'black', basic_color: str = 'lightgrey', 1099 show_x_label: bool = False, show_name: bool = True, show_sequence: bool = False) -> plt.Axes: 1100 """ 1101 Plot a consensus sequence as a single-row colored sequence. 1102 1103 :param aln: alignment MSA class or path 1104 :param ax: matplotlib axes 1105 :param threshold: consensus threshold (0-1) or None for majority rule 1106 :param use_ambig_nt: whether to use ambiguous nucleotide codes when building consensus 1107 :param color_scheme: color scheme for characters. see config.CHAR_COLORS for available options or None 1108 :param mask_color: color used for mask characters (N/X) 1109 :param ambiguity_color: color used for ambiguous characters 1110 :param show_x_label: whether to show the x-axis label 1111 :param show_name: whether to show the 'consensus' label at y-axis 1112 :param show_sequence: whether to show the sequence at the y-axis 1113 1114 :return: matplotlib axes 1115 """ 1116 1117 # validate input 1118 aln, ax = _validate_input_parameters(aln, ax) 1119 # validate colors 1120 for c in [mask_color, ambiguity_color]: 1121 _validate_color(c) 1122 # validate color scheme 1123 _validate_color_scheme(scheme=color_scheme, aln=aln) 1124 1125 # get consensus 1126 consensus = aln.get_consensus(threshold=threshold, use_ambig_nt=use_ambig_nt) 1127 1128 # prepare color mapping 1129 if color_scheme is not None: 1130 # get mapping from config 1131 char_colors = config.CHAR_COLORS[aln.aln_type][color_scheme] 1132 else: 1133 char_colors = None 1134 1135 # Build polygons and colors for a single PolyCollection 1136 polygons = [] 1137 poly_colors = [] 1138 text_positions = [] 1139 text_chars = [] 1140 text_colors = [] 1141 1142 zoom_offset = aln.zoom[0] if aln.zoom is not None else 0 1143 1144 y_position = len(aln) - 0.4 1145 1146 for pos, char in enumerate(consensus): 1147 x = pos + zoom_offset 1148 # determine color 1149 if char_colors is not None and char in char_colors: 1150 color = char_colors[char] 1151 else: 1152 # ambiguous nucleotide/aminoacid codes 1153 if char in config.AMBIG_CHARS[aln.aln_type]: 1154 color = ambiguity_color 1155 elif char in ['N', 'X']: 1156 color = mask_color 1157 else: 1158 color = basic_color 1159 1160 rect = [ 1161 (x - 0.5, y_position), 1162 (x + 0.5, y_position), 1163 (x + 0.5, y_position+0.8), 1164 (x - 0.5, y_position+0.8), 1165 ] 1166 polygons.append(rect) 1167 poly_colors.append(color) 1168 1169 if show_sequence: 1170 text_positions.append(x) 1171 text_chars.append(char) 1172 # determine readable text color 1173 text_colors.append(_get_contrast_text_color(to_rgba(color))) 1174 1175 # add single PolyCollection 1176 if polygons: 1177 pc = PolyCollection(polygons, facecolors=poly_colors, edgecolors=poly_colors, linewidths=0.5) 1178 ax.add_collection(pc) 1179 1180 # add texts if requested 1181 if show_sequence: 1182 for x, ch, tc in zip(text_positions, text_chars, text_colors): 1183 ax.text(x, y_position+0.4, ch, ha='center', va='center_baseline', c=tc) 1184 1185 # format axis 1186 _format_x_axis(aln=aln, ax=ax, show_x_label=show_x_label, show_left=False) 1187 ax.set_ylim(y_position-0.1, y_position + 0.9) 1188 if show_name: 1189 ax.yaxis.set_ticks_position('none') 1190 ax.set_yticks([y_position + 0.4]) 1191 ax.set_yticklabels(['consensus']) 1192 else: 1193 ax.set_yticks([]) 1194 1195 return ax 1196 1197def simplot(aln: explore.MSA | str, ref: str | None, ax: plt.Axes | None = None, colors: str | list | None = None, 1198 window_size: int = 200, step_size: int = 20, distance_calculation: str = 'ged', line_width: int | float = 1, 1199 show_legend: bool = False, show_reference: bool = True, bbox_to_anchor: tuple[float|int, float|int] | list= (1, 1), 1200 show_x_label: bool = False) -> plt.Axes: 1201 """ 1202 Calculate binned pairwise distances (similarity) between sequences and a reference sequence 1203 over a stepwise sliding window in the zoomed region of the alignment. This is inspired by simplot that helps to identify 1204 recombination sites. For proper recombination analyses use simplot or simplot++ (https://github.com/Stephane-S/Simplot_PlusPlus). 1205 Each sequence is plotted as a line displaying the similarity to the reference. The reference sequence can be either 1206 set to None (compared to consensus) or to a specific reference id. 1207 1208 :param aln: alignment MSA class or path 1209 :param ax: matplotlib axes 1210 :param ref: reference sequence id or None. For 'None' all computations are compared to a majority consensus 1211 :param colors: color for each sequence. can be a single named color or a list of named colors or a plt.colormap or None (auto coloring) 1212 :param window_size: window size for sliding window 1213 :param step_size: step size for sliding window 1214 :param distance_calculation: distance calculation method. Supported: ghd (global hamming distance), ged (gap excluded distance) and for nt additionally: jc69(Jukes-Cantor 1969) and k2p (Kimura 2-Parameter / K80). For more information see: explore.MSA.calc_pairwise_identity_matrix() 1215 :param line_width: width of the plotted lines 1216 :param show_legend: whether to show the legend 1217 :param show_reference: whether to show the reference id in the right lower corner 1218 :param bbox_to_anchor: bounding box coordinates for the legend - see: https://matplotlib.org/stable/api/legend_api.html 1219 :param show_x_label: whether to show the x-axis label 1220 1221 :return: matplotlib axes 1222 """ 1223 1224 # validate inputs 1225 aln, ax = _validate_input_parameters(aln=aln, ax=ax) 1226 if not isinstance(window_size, int) or window_size <= 0: 1227 raise ValueError('window_size has to be a positive integer') 1228 if window_size > aln.length: 1229 raise ValueError('window_size can not be larger than the (zoomed) alignment length') 1230 if not isinstance(step_size, int) or step_size <= 0: 1231 raise ValueError('step_size has to be a positive integer') 1232 if step_size > aln.length: 1233 raise ValueError('step_size can not be larger than the (zoomed) alignment length') 1234 if ref is not None and ref not in aln: 1235 raise ValueError(f'Reference {ref} not in alignment') 1236 if distance_calculation not in ['ghd', 'ged', 'jc69', 'k2p']: 1237 raise ValueError(f'Distance calculation method {distance_calculation} not supported. Supported: ghd, ged, jc69, k2p') 1238 if distance_calculation in ['jc69', 'k2p'] and aln.aln_type == 'AA': 1239 raise ValueError(f'Distance calculation method {distance_calculation} only supported for nucleotide alignments') 1240 1241 # get the sequence ids to plot 1242 sequence_ids = [seq_id for seq_id in aln if seq_id != ref] 1243 1244 # validate colors 1245 if colors is not None: 1246 # named color or colormap 1247 if isinstance(colors, str): 1248 # first try potential colormap 1249 try: 1250 cmap = plt.get_cmap(colors) 1251 cmap_colors = cmap(np.linspace(0, 1, len(sequence_ids))) 1252 color_map = {seq_id: color for seq_id, color in zip(sequence_ids, cmap_colors)} 1253 # single color 1254 except ValueError: 1255 _validate_color(colors) 1256 color_map = {seq_id: colors for seq_id in sequence_ids} 1257 # list of colors 1258 elif isinstance(colors, list): 1259 if len(colors) != len(sequence_ids): 1260 raise ValueError('colors list length has to match the number of plotted sequences') 1261 for color in colors: 1262 _validate_color(color) 1263 color_map = {seq_id: color for seq_id, color in zip(sequence_ids, colors)} 1264 else: 1265 raise ValueError('colors has to be either a single named color, a list of named colors, a plt colormap or None (auto)') 1266 else: 1267 color_map = None 1268 1269 # define region and window 1270 region_start = aln.zoom[0] if aln.zoom is not None else 0 1271 half_window_size = window_size // 2 1272 1273 # copy alignment and set reference 1274 aln_tmp = deepcopy(aln) 1275 aln_tmp.reference_id = ref 1276 # reset zoom (later the alignment dictionary will be replaced for a slice of the alignment) 1277 aln_tmp._zoom = None 1278 1279 # remember x positions and distances 1280 x_positions = [] 1281 distance_traces = {seq_id: [] for seq_id in sequence_ids} 1282 1283 for point_to_plot in range(0, aln.length + 1, step_size): 1284 # define windows 1285 left_side = point_to_plot - half_window_size if point_to_plot - half_window_size > 0 else 0 1286 right_side = point_to_plot + half_window_size if point_to_plot + half_window_size < aln.length else aln.length 1287 # slice alignment and replace the original alignment 1288 aln_tmp._alignment = { 1289 seq_id: seq[left_side:right_side] 1290 for seq_id, seq in aln.alignment.items() 1291 } 1292 window_result = aln_tmp.calc_pairwise_distance_to_reference(distance_type=distance_calculation) 1293 value_map = {seq_id: value for seq_id, value in zip(window_result.sequence_ids, window_result.distances)} 1294 1295 x_positions.append(region_start + point_to_plot) 1296 for seq_id in sequence_ids: 1297 distance_traces[seq_id].append(value_map[seq_id]) 1298 1299 for seq_id in sequence_ids: 1300 if color_map is not None: 1301 ax.plot(x_positions, distance_traces[seq_id], 1302 color=color_map[seq_id], 1303 linewidth=line_width, label=seq_id) 1304 else: 1305 ax.plot(x_positions, distance_traces[seq_id], 1306 linewidth=line_width, label=seq_id) 1307 # Format axis 1308 ax.set_ylabel('similarity (%)') 1309 ax.set_ylim(0, 102) 1310 _format_x_axis(aln=aln, ax=ax, show_x_label=show_x_label, show_left=True) 1311 # add legend for all sequences on top 1312 if show_legend: 1313 leg1 = ax.legend(frameon=False, loc='lower right', bbox_to_anchor=bbox_to_anchor, ncols=3) 1314 ax.add_artist(leg1) 1315 # add legend for query 1316 if show_reference: 1317 ref_label = ref if ref is not None else 'consensus' 1318 ref_handle = plt.Line2D([0], [0], linewidth=0, label=f'reference: {ref_label}') 1319 leg2 = ax.legend(handles=[ref_handle], frameon=False, bbox_to_anchor=(1, 0), loc='lower right') 1320 ax.add_artist(leg2) 1321 1322 return ax
393def alignment(aln: explore.MSA | str, ax: plt.Axes | None = None, show_sequence_all: bool = False, show_seq_names: bool = False, 394 custom_seq_names: tuple | list = (), mask_color: str = 'dimgrey', ambiguity_color: str = 'black', basic_color: str = 'lightgrey', 395 show_mask:bool = True, fancy_gaps:bool = False, show_ambiguities: bool = False, color_scheme: str | None = 'standard', 396 show_x_label: bool = True, show_legend: bool = False, bbox_to_anchor: tuple[float|int, float|int] | list= (1, 1), 397 show_consensus: bool = False) -> plt.Axes: 398 """ 399 400 Plot an alignment with each character colored as defined in the scheme. This is computationally more intensive as 401 the identity alignments and similarity alignment function as each square for each character is individually plotted. 402 403 :param aln: alignment MSA class or path 404 :param ax: matplotlib axes 405 :param show_seq_names: whether to show seq names 406 :param show_sequence_all: whether to show all sequences - zoom in to avoid plotting issues 407 :param custom_seq_names: custom seq names 408 :param mask_color: color for masked nucleotides/aminoacids 409 :param ambiguity_color: color for ambiguous nucleotides/aminoacids 410 :param basic_color: color that will be used for all normal chars if the colorscheme is None 411 :param show_mask: whether to show N or X chars otherwise it will be shown as match or mismatch 412 :param fancy_gaps: show gaps with a small black bar 413 :param show_ambiguities: whether to show non-N ambiguities -> only relevant for RNA/DNA sequences 414 :param color_scheme: color mismatching chars with their unique color. Options for DNA/RNA are: standard, purine_pyrimidine, strong_weak; and for AS: standard, clustal, zappo, hydrophobicity. Will overwrite different_char_color. 415 :param show_x_label: whether to show x label 416 :param show_legend: whether to show the legend 417 :param bbox_to_anchor: bounding box coordinates for the legend - see: https://matplotlib.org/stable/api/legend_api.html 418 :param show_consensus: whether to show the majority consensus sequence (standard-color scheme) 419 :return matplotlib axis 420 """ 421 # Validate aln, ax inputs 422 aln, ax = _validate_input_parameters(aln=aln, ax=ax) 423 # Validate colors 424 for c in [mask_color, ambiguity_color, basic_color]: 425 _validate_color(c) 426 # Validate color scheme 427 _validate_color_scheme(scheme=color_scheme, aln=aln) 428 # create color mapping 429 aln_colors = _create_color_dictionary( 430 aln=aln, color_scheme=color_scheme, identical_char_color=basic_color, 431 different_char_color=None, mask_color=mask_color, ambiguity_color=ambiguity_color 432 ) 433 # Compute identity alignment 434 numerical_alignment = aln.calc_numerical_alignment(encode_ambiguities=show_ambiguities, encode_mask=show_mask) 435 if color_scheme is None: 436 numerical_alignment[np.where(numerical_alignment > 0)] = 0 437 # create the alignment 438 detected_identity_values = _create_alignment( 439 aln=aln, ax=ax, matrix=numerical_alignment, fancy_gaps=fancy_gaps, create_identity_patch=True if color_scheme is None else False, show_gaps=True, 440 show_different_sequence=False, show_sequence_all=show_sequence_all, reference_color=None, aln_colors=aln_colors, 441 values_to_plot = list(aln_colors.keys()) 442 ) 443 # custom legend 444 if show_legend: 445 aln_colors.pop(0) # otherwise legend is wrong (here there is no check if a position is identical or not) 446 _create_legend( 447 color_scheme=color_scheme, aln_colors=aln_colors, aln=aln, 448 detected_identity_values=detected_identity_values, 449 ax=ax, bbox_to_anchor=bbox_to_anchor 450 ) 451 if show_consensus: 452 consensus_plot(aln=aln, ax=ax, show_x_label=show_x_label, show_name=False, show_sequence=show_sequence_all, 453 color_scheme='standard', basic_color=basic_color, mask_color=mask_color, ambiguity_color=ambiguity_color 454 ) 455 ax.set_ylim(-0.5, len(aln) + 1) 456 else: 457 ax.set_ylim(-0.5, len(aln)) 458 459 _seq_names(aln=aln, ax=ax, custom_seq_names=custom_seq_names, show_seq_names=show_seq_names, include_consensus=show_consensus) 460 # configure axis 461 _format_x_axis(aln=aln, ax=ax, show_x_label=show_x_label, show_left=False) 462 463 return ax
Plot an alignment with each character colored as defined in the scheme. This is computationally more intensive as the identity alignments and similarity alignment function as each square for each character is individually plotted.
Parameters
- aln: alignment MSA class or path
- ax: matplotlib axes
- show_seq_names: whether to show seq names
- show_sequence_all: whether to show all sequences - zoom in to avoid plotting issues
- custom_seq_names: custom seq names
- mask_color: color for masked nucleotides/aminoacids
- ambiguity_color: color for ambiguous nucleotides/aminoacids
- basic_color: color that will be used for all normal chars if the colorscheme is None
- show_mask: whether to show N or X chars otherwise it will be shown as match or mismatch
- fancy_gaps: show gaps with a small black bar
- show_ambiguities: whether to show non-N ambiguities -> only relevant for RNA/DNA sequences
- color_scheme: color mismatching chars with their unique color. Options for DNA/RNA are: standard, purine_pyrimidine, strong_weak; and for AS: standard, clustal, zappo, hydrophobicity. Will overwrite different_char_color.
- show_x_label: whether to show x label
- show_legend: whether to show the legend
- bbox_to_anchor: bounding box coordinates for the legend - see: https: //matplotlib.org/stable/api/legend_api.html
- show_consensus: whether to show the majority consensus sequence (standard-color scheme) :return matplotlib axis
467def identity_alignment(aln: explore.MSA | str, ax: plt.Axes | None = None, show_title: bool = True, show_identity_sequence: bool = False, 468 show_sequence_all: bool = False, show_seq_names: bool = False, custom_seq_names: tuple | list = (), 469 reference_color: str = 'lightsteelblue', basic_color: str = 'lightgrey', different_char_color: str = 'peru', 470 mask_color: str = 'dimgrey', ambiguity_color: str = 'black', show_mask:bool = True, show_gaps:bool = True, 471 fancy_gaps:bool = False, show_mismatches: bool = True, show_ambiguities: bool = False, 472 color_scheme: str | None = None, show_x_label: bool = True, show_legend: bool = False, 473 bbox_to_anchor: tuple[float|int, float|int] | list = (1, 1), show_consensus: bool = False) -> plt.Axes: 474 """ 475 Generates an identity alignment plot. 476 :param aln: alignment MSA class or path 477 :param ax: matplotlib axes 478 :param show_title: whether to show title 479 :param show_seq_names: whether to show seq names 480 :param show_identity_sequence: whether to show sequence for only differences and reference - zoom in to avoid plotting issues 481 :param show_sequence_all: whether to show all sequences - zoom in to avoid plotting issues 482 :param custom_seq_names: custom seq names 483 :param reference_color: color of reference sequence 484 :param basic_color: color for identical nucleotides/aminoacids 485 :param different_char_color: color for different nucleotides/aminoacids 486 :param mask_color: color for masked nucleotides/aminoacids 487 :param ambiguity_color: color for ambiguous nucleotides/aminoacids 488 :param show_mask: whether to show N or X chars otherwise it will be shown as match or mismatch 489 :param show_gaps: whether to show gaps otherwise it will be shown as match or mismatch 490 :param fancy_gaps: show gaps with a small black bar 491 :param show_mismatches: whether to show mismatches otherwise it will be shown as match 492 :param show_ambiguities: whether to show non-N ambiguities -> only relevant for RNA/DNA sequences 493 :param color_scheme: color mismatching chars with their unique color. Options for DNA/RNA are: standard, purine_pyrimidine, strong_weak; and for AS: standard, clustal, zappo, hydrophobicity. Will overwrite different_char_color. 494 :param show_x_label: whether to show x label 495 :param show_legend: whether to show the legend 496 :param bbox_to_anchor: bounding box coordinates for the legend - see: https://matplotlib.org/stable/api/legend_api.html 497 :param show_consensus: whether to show the majority consensus sequence (standard-color scheme) 498 499 :return matplotlib axis 500 """ 501 502 # Both options for gaps work hand in hand 503 if fancy_gaps: 504 show_gaps = True 505 # Validate aln, ax inputs 506 aln, ax = _validate_input_parameters(aln=aln, ax=ax) 507 # Validate colors 508 for c in [reference_color, basic_color, different_char_color, mask_color, ambiguity_color]: 509 _validate_color(c) 510 # Validate color scheme 511 _validate_color_scheme(scheme=color_scheme, aln=aln) 512 # create color mapping 513 aln_colors = _create_color_dictionary( 514 aln=aln, color_scheme=color_scheme, identical_char_color=basic_color, different_char_color=different_char_color, 515 mask_color=mask_color, ambiguity_color=ambiguity_color 516 ) 517 # Compute identity alignment 518 identity_aln = aln.calc_identity_alignment( 519 encode_mask=show_mask, encode_gaps=show_gaps, encode_mismatches=show_mismatches,encode_ambiguities=show_ambiguities, 520 encode_each_mismatch_char=True if color_scheme is not None else False 521 ) 522 # create the alignment 523 detected_identity_values = _create_alignment( 524 aln=aln, ax=ax, matrix=identity_aln, fancy_gaps=fancy_gaps, create_identity_patch=True, show_gaps=show_gaps, 525 show_different_sequence=show_identity_sequence, show_sequence_all=show_sequence_all, reference_color=reference_color, 526 aln_colors=aln_colors, values_to_plot=list(aln_colors.keys())[1:] 527 ) 528 # custom legend 529 if show_legend: 530 _create_legend( 531 color_scheme=color_scheme, aln_colors=aln_colors, aln=aln, detected_identity_values=detected_identity_values, 532 ax=ax, bbox_to_anchor=bbox_to_anchor 533 ) 534 _seq_names(aln=aln, ax=ax, custom_seq_names=custom_seq_names, show_seq_names=show_seq_names) 535 # configure axis 536 if show_consensus: 537 consensus_plot(aln=aln, ax=ax, show_x_label=show_x_label, show_name=False, show_sequence=any([show_sequence_all, show_identity_sequence]), 538 color_scheme='standard', basic_color=basic_color, mask_color=mask_color, 539 ambiguity_color=ambiguity_color 540 ) 541 ax.set_ylim(-0.5, len(aln) + 1) 542 else: 543 ax.set_ylim(-0.5, len(aln)) 544 545 _seq_names(aln=aln, ax=ax, custom_seq_names=custom_seq_names, show_seq_names=show_seq_names, 546 include_consensus=show_consensus) 547 if show_title: 548 ax.set_title('identity', loc='left') 549 _format_x_axis(aln=aln, ax=ax, show_x_label=show_x_label, show_left=False) 550 551 return ax
Generates an identity alignment plot.
Parameters
- aln: alignment MSA class or path
- ax: matplotlib axes
- show_title: whether to show title
- show_seq_names: whether to show seq names
- show_identity_sequence: whether to show sequence for only differences and reference - zoom in to avoid plotting issues
- show_sequence_all: whether to show all sequences - zoom in to avoid plotting issues
- custom_seq_names: custom seq names
- reference_color: color of reference sequence
- basic_color: color for identical nucleotides/aminoacids
- different_char_color: color for different nucleotides/aminoacids
- mask_color: color for masked nucleotides/aminoacids
- ambiguity_color: color for ambiguous nucleotides/aminoacids
- show_mask: whether to show N or X chars otherwise it will be shown as match or mismatch
- show_gaps: whether to show gaps otherwise it will be shown as match or mismatch
- fancy_gaps: show gaps with a small black bar
- show_mismatches: whether to show mismatches otherwise it will be shown as match
- show_ambiguities: whether to show non-N ambiguities -> only relevant for RNA/DNA sequences
- color_scheme: color mismatching chars with their unique color. Options for DNA/RNA are: standard, purine_pyrimidine, strong_weak; and for AS: standard, clustal, zappo, hydrophobicity. Will overwrite different_char_color.
- show_x_label: whether to show x label
- show_legend: whether to show the legend
- bbox_to_anchor: bounding box coordinates for the legend - see: https: //matplotlib.org/stable/api/legend_api.html
- show_consensus: whether to show the majority consensus sequence (standard-color scheme)
:return matplotlib axis
554def similarity_alignment(aln: explore.MSA | str, ax: plt.Axes | None = None, matrix_type: str | None = None, show_title: bool = True, 555 show_similarity_sequence: bool = False, show_sequence_all: bool = False, show_seq_names: bool = False, 556 custom_seq_names: tuple | list = (), reference_color: str = 'lightsteelblue', different_char_color: str = 'peru', basic_color: str = 'lightgrey', 557 show_gaps:bool = True, fancy_gaps:bool = False, show_x_label: bool = True, show_cbar: bool = False, 558 cbar_fraction: float = 0.1, show_consensus: bool = False) -> plt.Axes: 559 """ 560 Generates a similarity alignment plot. Importantly the similarity values are normalized! 561 :param aln: alignment MSA class or path 562 :param ax: matplotlib axes 563 :param matrix_type: substitution matrix - see config.SUBS_MATRICES, standard: NT - TRANS, AA - BLOSUM65 564 :param show_title: whether to show title 565 :param show_similarity_sequence: whether to show sequence only for differences and reference - zoom in to avoid plotting issues 566 :param show_sequence_all: whether to show all sequences - zoom in to avoid plotting issues 567 :param show_seq_names: whether to show seq names 568 :param custom_seq_names: custom seq names 569 :param reference_color: color of reference sequence 570 :param different_char_color: color for the lowest similarity 571 :param basic_color: basic color for the highest similarity 572 :param show_gaps: whether to show gaps otherwise it will be ignored 573 :param fancy_gaps: show gaps with a small black bar 574 :param show_x_label: whether to show x label 575 :param show_cbar: whether to show the legend - see https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.colorbar.html 576 :param cbar_fraction: fraction of the original ax reserved for the legend 577 :param show_consensus: whether to show the majority consensus sequence (standard color scheme, no specical handling for special characters) 578 :return matplotlib axis 579 """ 580 # Both options for gaps work hand in hand 581 if fancy_gaps: 582 show_gaps = True 583 # Validate aln, ax inputs 584 aln, ax = _validate_input_parameters(aln=aln, ax=ax) 585 # Validate colors 586 for c in [reference_color, different_char_color, basic_color]: 587 _validate_color(c) 588 # Compute similarity alignment 589 similarity_aln = aln.calc_similarity_alignment(matrix_type=matrix_type) # use normalized values here 590 similarity_aln = similarity_aln.round(2) # round data for good color mapping 591 # create cmap 592 cmap = LinearSegmentedColormap.from_list( 593 "extended", 594 [ 595 (0.0, different_char_color), 596 (1.0, basic_color) 597 ], 598 ) 599 cmap = ScalarMappable(norm=Normalize(vmin=0, vmax=1), cmap=cmap) 600 # create similarity values 601 similarity_values = np.arange(start=0, stop=1, step=0.01) 602 # round it to be absolutely sure that values match with rounded sim alignment 603 similarity_values = list(similarity_values.round(2)) 604 # create the alignment 605 _create_alignment( 606 aln=aln, ax=ax, matrix=similarity_aln, fancy_gaps=fancy_gaps, create_identity_patch=True, show_gaps=show_gaps, 607 show_different_sequence=show_similarity_sequence, show_sequence_all=show_sequence_all, reference_color=reference_color, 608 aln_colors=cmap, values_to_plot=similarity_values, identical_value=1 609 ) 610 # legend 611 if show_cbar: 612 cbar = plt.colorbar(cmap, ax=ax, location= 'top', anchor=(1,0), shrink=0.2, pad=2/ax.bbox.height, fraction=cbar_fraction) 613 cbar.set_ticks([0, 1]) 614 cbar.set_ticklabels(['low', 'high']) 615 616 # format seq names 617 _seq_names(aln, ax, custom_seq_names, show_seq_names) 618 619 # configure axis 620 if show_consensus: 621 consensus_plot(aln=aln, ax=ax, show_x_label=show_x_label, show_name=False, show_sequence=any([show_sequence_all, show_similarity_sequence]), 622 color_scheme='standard', basic_color=basic_color) 623 ax.set_ylim(-0.5, len(aln) + 1) 624 else: 625 ax.set_ylim(-0.5, len(aln)) 626 627 _seq_names(aln=aln, ax=ax, custom_seq_names=custom_seq_names, show_seq_names=show_seq_names, 628 include_consensus=show_consensus) 629 if show_title: 630 ax.set_title('similarity', loc='left') 631 _format_x_axis(aln, ax, show_x_label, show_left=False) 632 633 return ax
Generates a similarity alignment plot. Importantly the similarity values are normalized!
Parameters
- aln: alignment MSA class or path
- ax: matplotlib axes
- matrix_type: substitution matrix - see config.SUBS_MATRICES, standard: NT - TRANS, AA - BLOSUM65
- show_title: whether to show title
- show_similarity_sequence: whether to show sequence only for differences and reference - zoom in to avoid plotting issues
- show_sequence_all: whether to show all sequences - zoom in to avoid plotting issues
- show_seq_names: whether to show seq names
- custom_seq_names: custom seq names
- reference_color: color of reference sequence
- different_char_color: color for the lowest similarity
- basic_color: basic color for the highest similarity
- show_gaps: whether to show gaps otherwise it will be ignored
- fancy_gaps: show gaps with a small black bar
- show_x_label: whether to show x label
- show_cbar: whether to show the legend - see https: //matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.colorbar.html
- cbar_fraction: fraction of the original ax reserved for the legend
- show_consensus: whether to show the majority consensus sequence (standard color scheme, no specical handling for special characters) :return matplotlib axis
661def stat_plot(aln: explore.MSA | str, stat_type: str, ax: plt.Axes | None = None, line_color: str = 'burlywood', 662 line_width: int | float = 2, rolling_average: int = 20, show_x_label: bool = False, show_title: bool = True) -> plt.Axes: 663 """ 664 Generate a plot for the various alignment stats. 665 :param aln: alignment MSA class or path 666 :param ax: matplotlib axes 667 :param stat_type: 'entropy', 'gc', 'coverage', 'ts tv score', gap frequency, 'identity' or 'similarity' -> (here default matrices are used NT - TRANS, AA - BLOSUM65) 668 :param line_color: color of the line 669 :param line_width: width of the line 670 :param rolling_average: average rolling window size left and right of a position in nucleotides or amino acids 671 :param show_x_label: whether to show the x-axis label 672 :param show_title: whether to show the title 673 :return matplotlib axes 674 """ 675 676 # input check 677 aln, ax = _validate_input_parameters(aln, ax) 678 679 # define possible functions to calc here 680 stat_functions: Dict[str, Callable[[], AlignmentStats | ndarray]] = { 681 'gc': aln.calc_gc, 682 'entropy': aln.calc_entropy, 683 'coverage': aln.calc_coverage, 684 'identity': aln.calc_identity_alignment, 685 'similarity': aln.calc_similarity_alignment, 686 'ts tv score': aln.calc_transition_transversion_score, 687 'gap frequency': aln.calc_gap_frequency 688 } 689 690 if stat_type not in stat_functions: 691 raise ValueError('stat_type must be one of {}'.format(list(stat_functions.keys()))) 692 693 _validate_color(line_color) 694 if rolling_average < 1 or rolling_average > aln.length: 695 raise ValueError('rolling_average must be between 1 and length of sequence') 696 697 # generate input data 698 array = stat_functions[stat_type]() 699 if isinstance(array, AlignmentStats): 700 array = array.values 701 # define possible spans for values 702 if stat_type == 'identity': 703 min_value, max_value = -1, 0 704 elif stat_type == 'ts tv score': 705 min_value, max_value = -1, 1 706 else: 707 min_value, max_value = 0, 1 708 if stat_type in ['identity', 'similarity']: 709 # for the mean nan values get handled as the lowest possible number in the matrix 710 array = np.nan_to_num(array, True, min_value) 711 array = np.mean(array, axis=0) 712 data, plot_idx = _moving_average(array, rolling_average, aln.zoom, aln.length) 713 714 # plot the data 715 ax.fill_between( 716 # this add dummy data left and right for better plotting 717 # otherwise only half of the step is shown 718 np.concatenate(([plot_idx[0] - 0.5], plot_idx, [plot_idx[-1] + 0.5])) if rolling_average == 1 else plot_idx, 719 np.concatenate(([data[0]], data, [data[-1]])) if rolling_average == 1 else data, 720 min_value, 721 linewidth = line_width, 722 edgecolor=line_color, 723 step='mid' if rolling_average == 1 else None, 724 facecolor=(line_color, 0.6) if stat_type not in ['ts tv score', 'gc'] else 'none' 725 ) 726 if stat_type == 'gc': 727 ax.hlines(0.5, xmin=0, xmax=aln.zoom[0] + aln.length if aln.zoom is not None else aln.length, color='black', linestyles='--', linewidth=1) 728 729 # format axis 730 ax.set_ylim(min_value, max_value*0.1+max_value) 731 ax.set_yticks([min_value, max_value]) 732 if stat_type == 'gc': 733 ax.set_yticklabels(['0', '100']) 734 elif stat_type == 'ts tv score': 735 ax.set_yticklabels(['tv', 'ts']) 736 else: 737 ax.set_yticklabels(['low', 'high']) 738 739 # show title 740 if show_title: 741 ax.set_title( 742 f'{stat_type} (average over {rolling_average} positions)' if rolling_average > 1 else f'{stat_type} for each position', 743 loc='left' 744 ) 745 746 _format_x_axis(aln, ax, show_x_label, show_left=True) 747 748 return ax
Generate a plot for the various alignment stats.
Parameters
- aln: alignment MSA class or path
- ax: matplotlib axes
- stat_type: 'entropy', 'gc', 'coverage', 'ts tv score', gap frequency, 'identity' or 'similarity' -> (here default matrices are used NT - TRANS, AA - BLOSUM65)
- line_color: color of the line
- line_width: width of the line
- rolling_average: average rolling window size left and right of a position in nucleotides or amino acids
- show_x_label: whether to show the x-axis label
- show_title: whether to show the title :return matplotlib axes
751def variant_plot(aln: explore.MSA | str, ax: plt.Axes | None = None, lollisize: tuple[int, int] | list = (1, 3), 752 color_scheme: str = 'standard', show_x_label: bool = False, show_legend: bool = True, 753 bbox_to_anchor: tuple[float|int, float|int] | list = (1, 1)) -> plt.Axes: 754 """ 755 Plots variants. 756 :param aln: alignment MSA class or path 757 :param ax: matplotlib axes 758 :param lollisize: (stem_size, head_size) 759 :param color_scheme: color scheme for characters. see config.CHAR_COLORS for available options 760 :param show_x_label: whether to show the x-axis label 761 :param show_legend: whether to show the legend 762 :param bbox_to_anchor: bounding box coordinates for the legend - see: https://matplotlib.org/stable/api/legend_api.html 763 :return matplotlib axes 764 """ 765 766 # validate input 767 aln, ax = _validate_input_parameters(aln, ax) 768 _validate_color_scheme(color_scheme, aln) 769 if not isinstance(lollisize, tuple) or len(lollisize) != 2: 770 raise ValueError('lollisize must be tuple of length 2 (stem, head)') 771 for _size in lollisize: 772 if not isinstance(_size, float | int) or _size <= 0: 773 raise ValueError('lollisize must be floats greater than zero') 774 775 # define colors 776 colors = config.CHAR_COLORS[aln.aln_type][color_scheme] 777 # get snps 778 snps = aln.get_snps() 779 # define where to plot (each ref type gets a separate line) 780 ref_y_positions, y_pos, detected_var = {}, 0, set() 781 782 # iterate over SNPs 783 for pos, snp_pos in snps.positions.items(): 784 if snp_pos.ref not in ref_y_positions: 785 ref_y_positions[snp_pos.ref] = y_pos 786 y_pos += 1.1 787 788 for alt, (af, _) in snp_pos.alt.items(): 789 x_pos = pos + aln.zoom[0] if aln.zoom is not None else pos 790 y_ref = ref_y_positions[snp_pos.ref] 791 ax.vlines( 792 x=x_pos, 793 ymin=y_ref, 794 ymax=y_ref + af, 795 color=colors[alt], 796 zorder=100, 797 linewidth=lollisize[0] 798 ) 799 ax.plot( 800 x_pos, 801 y_ref + af, 802 color=colors[alt], 803 marker='o', 804 markersize=lollisize[1] 805 ) 806 detected_var.add(alt) 807 808 # plot hlines 809 for y_char in ref_y_positions: 810 ax.hlines( 811 ref_y_positions[y_char], 812 xmin=aln.zoom[0] - 0.5 if aln.zoom is not None else -0.5, 813 xmax=aln.zoom[0] + aln.length + 0.5 if aln.zoom is not None else aln.length + 0.5, 814 color='black', 815 linestyle='-', 816 zorder=0, 817 linewidth=0.75 818 ) 819 # create a custom legend 820 if show_legend: 821 custom_legend = [ 822 ax.add_line( 823 plt.Line2D( 824 [], 825 [], 826 color=colors[char], 827 marker='o', 828 linestyle='', 829 markersize=5 830 ) 831 ) for char in colors if char in detected_var 832 ] 833 ax.legend( 834 custom_legend, 835 [char for char in colors if char in detected_var], # ensures correct sorting 836 loc='lower right', 837 title='variant', 838 bbox_to_anchor=bbox_to_anchor, 839 ncols=len(detected_var)/2 if aln.aln_type == 'AA' else len(detected_var), 840 frameon=False 841 ) 842 843 # format axis 844 _format_x_axis(aln, ax, show_x_label, show_left=False) 845 ax.spines['left'].set_visible(False) 846 ax.set_yticks([ref_y_positions[x] for x in ref_y_positions]) 847 ax.set_yticklabels(ref_y_positions.keys()) 848 ax.set_ylim(0, y_pos) 849 ax.set_ylabel('reference') 850 851 return ax
Plots variants.
Parameters
- aln: alignment MSA class or path
- ax: matplotlib axes
- lollisize: (stem_size, head_size)
- color_scheme: color scheme for characters. see config.CHAR_COLORS for available options
- show_x_label: whether to show the x-axis label
- show_legend: whether to show the legend
- bbox_to_anchor: bounding box coordinates for the legend - see: https: //matplotlib.org/stable/api/legend_api.html :return matplotlib axes
921def orf_plot(aln: explore.MSA | str, ax: plt.Axes | None = None, min_length: int = 500, non_overlapping_orfs: bool = True, 922 cmap: str = 'Blues', direction_marker_size: int | None = 5, show_x_label: bool = False, show_cbar: bool = False, 923 cbar_fraction: float = 0.1) -> plt.Axes: 924 """ 925 Plot conserved ORFs. 926 :param aln: alignment MSA class or path 927 :param ax: matplotlib axes 928 :param min_length: minimum length of orf 929 :param non_overlapping_orfs: whether to consider overlapping orfs 930 :param cmap: color mapping for % identity - see https://matplotlib.org/stable/users/explain/colors/colormaps.html 931 :param direction_marker_size: marker size for direction marker, not shown if marker_size == None 932 :param show_x_label: whether to show the x-axis label 933 :param show_cbar: whether to show the colorbar - see https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.colorbar.html 934 :param cbar_fraction: fraction of the original ax reserved for the colorbar 935 :return matplotlib axes 936 """ 937 938 # normalize colorbar 939 cmap = ScalarMappable(norm=Normalize(0, 100), cmap=plt.get_cmap(cmap)) 940 # validate input 941 aln, ax = _validate_input_parameters(aln, ax) 942 943 # get orfs --> first deepcopy and reset zoom that the orfs are also zoomed in (by default, the orfs are only 944 # searched within the zoomed region) 945 aln_temp = deepcopy(aln) 946 aln_temp.zoom = None 947 if non_overlapping_orfs: 948 orf_collection = aln_temp.get_non_overlapping_conserved_orfs(min_length=min_length) 949 else: 950 orf_collection = aln_temp.get_conserved_orfs(min_length=min_length) 951 952 # Normalize ORF dataclasses to the annotation dict shape consumed by plotting helpers. 953 annotation_dict = {} 954 for orf_id, orf_data in orf_collection.items(): 955 annotation_dict[orf_id] = { 956 'location': orf_data.location, 957 'strand': orf_data.strand, 958 'conservation': orf_data.conservation, 959 } 960 # filter dict for zoom 961 if aln.zoom is not None: 962 annotation_dict = { 963 key: val 964 for key, val in annotation_dict.items() 965 if max(val['location'][0][0], aln.zoom[0]) <= min(val['location'][0][1], aln.zoom[1]) 966 } 967 # add track for plotting 968 _add_track_positions(annotation_dict) 969 # plot 970 _plot_annotation(annotation_dict, ax, direction_marker_size=direction_marker_size, color=cmap) 971 972 # legend 973 if show_cbar: 974 cbar = plt.colorbar(cmap,ax=ax, location= 'top', orientation='horizontal', anchor=(1,0), shrink=0.2, pad=2/ax.bbox.height, fraction=cbar_fraction) 975 cbar.set_label('% identity') 976 cbar.set_ticks([0, 100]) 977 978 # format axis 979 _format_x_axis(aln, ax, show_x_label, show_left=False) 980 ax.set_ylim(bottom=0.8) 981 ax.set_yticks([]) 982 ax.set_yticklabels([]) 983 ax.set_title('conserved orfs', loc='left') 984 985 return ax
Plot conserved ORFs.
Parameters
- aln: alignment MSA class or path
- ax: matplotlib axes
- min_length: minimum length of orf
- non_overlapping_orfs: whether to consider overlapping orfs
- cmap: color mapping for % identity - see https: //matplotlib.org/stable/users/explain/colors/colormaps.html
- direction_marker_size: marker size for direction marker, not shown if marker_size == None
- show_x_label: whether to show the x-axis label
- show_cbar: whether to show the colorbar - see https: //matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.colorbar.html
- cbar_fraction: fraction of the original ax reserved for the colorbar :return matplotlib axes
988def annotation_plot(aln: explore.MSA | str, annotation: explore.Annotation | str, feature_to_plot: str, ax: plt.Axes | None = None, 989 color: str = 'wheat', direction_marker_size: int | None = 5, show_x_label: bool = False) -> plt.Axes: 990 """ 991 Plot annotations from bed, gff or gb files. Are automatically mapped to alignment. 992 :param aln: alignment MSA class 993 :param annotation: annotation class | path to annotation file 994 :param ax: matplotlib axes 995 :param feature_to_plot: potential feature to plot (not for bed files as it is parsed as one feature) 996 :param color: color for the annotation 997 :param direction_marker_size: marker size for direction marker, only relevant if show_direction is True 998 :param show_x_label: whether to show the x-axis label 999 :return matplotlib axes 1000 """ 1001 # validate input 1002 aln, ax, annotation = _validate_input_parameters(aln, ax, annotation) 1003 _validate_color(color) 1004 1005 # ignore features to plot for bed files (here it is written into one feature) 1006 if annotation.ann_type == 'bed': 1007 annotation_dict = annotation.features['region'] 1008 feature_to_plot = 'bed regions' 1009 else: 1010 # try to subset the annotation dict 1011 try: 1012 annotation_dict = annotation.features[feature_to_plot] 1013 except KeyError: 1014 raise KeyError(f'Feature {feature_to_plot} not found. Use annotation.features.keys() to see available features.') 1015 1016 # plotting and formating 1017 _add_track_positions(annotation_dict) 1018 _plot_annotation(annotation_dict, ax, direction_marker_size=direction_marker_size, color=color) 1019 _format_x_axis(aln, ax, show_x_label, show_left=False) 1020 ax.set_ylim(bottom=0.8) 1021 ax.set_yticks([]) 1022 ax.set_yticklabels([]) 1023 ax.set_title(f'{annotation.locus} ({feature_to_plot})', loc='left') 1024 1025 return ax
Plot annotations from bed, gff or gb files. Are automatically mapped to alignment.
Parameters
- aln: alignment MSA class
- annotation: annotation class | path to annotation file
- ax: matplotlib axes
- feature_to_plot: potential feature to plot (not for bed files as it is parsed as one feature)
- color: color for the annotation
- direction_marker_size: marker size for direction marker, only relevant if show_direction is True
- show_x_label: whether to show the x-axis label :return matplotlib axes
1028def sequence_logo(aln:explore.MSA | str, ax:plt.Axes | None = None, color_scheme: str = 'standard', plot_type: str = 'stacked', 1029 show_x_label:bool = False) -> plt.Axes: 1030 """ 1031 Plot sequence logo or stacked area plot (use the first one with appropriate zoom levels). The 1032 logo visualizes the relative frequency of nt or aa characters in the alignment. The char frequency 1033 is scaled to the information content at each position. --> identical to how Geneious calculates it. 1034 1035 :param aln: alignment MSA class or path 1036 :param ax: matplotlib axes 1037 :param color_scheme: color scheme for characters. see config.CHAR_COLORS for available options 1038 :param plot_type: 'logo' for sequence logo, 'stacked' for stacked area plot 1039 :param show_x_label: whether to show the x-axis label 1040 :return matplotlib axes 1041 """ 1042 1043 aln, ax = _validate_input_parameters(aln, ax) 1044 # calc matrix 1045 matrix = aln.calc_position_matrix('IC') * aln.calc_position_matrix('PPM') 1046 letters_to_plot = list(config.CHAR_COLORS[aln.aln_type]['standard'].keys()) 1047 1048 # plot 1049 if plot_type == 'logo': 1050 for pos in range(matrix.shape[1]): 1051 # sort the positive matrix row values by size 1052 items = [(letters_to_plot[i], matrix[i, pos]) for i in range(len(letters_to_plot)) if matrix[i, pos] > 0] 1053 items.sort(key=lambda x: x[1]) 1054 # plot each position 1055 y_offset = 0 1056 for letter, h in items: 1057 tp = TextPath((aln.zoom[0] - 0.325 if aln.zoom is not None else - 0.325, 0), letter, size=1, prop=FontProperties(weight='bold')) 1058 bb = tp.get_extents() 1059 glyph_height = bb.height if bb.height > 0 else 1e-6 # avoid div by zero 1060 scale_to_1 = 1.0 / glyph_height 1061 1062 transform = (Affine2D() 1063 .scale(1.0, h * scale_to_1) # scale manually by IC and normalize font 1064 .translate(pos, y_offset) 1065 + ax.transData) 1066 1067 patch = PathPatch(tp, transform=transform, 1068 facecolor=config.CHAR_COLORS[aln.aln_type][color_scheme][letter], 1069 edgecolor='none') 1070 ax.add_patch(patch) 1071 y_offset += h 1072 elif plot_type == 'stacked': 1073 y_values = np.zeros(matrix.shape[1]) 1074 x_values = np.arange(0, matrix.shape[1]) if aln.zoom is None else np.arange(aln.zoom[0], aln.zoom[1]) 1075 for row in range(matrix.shape[0]): 1076 y = matrix[row] 1077 ax.fill_between(x_values, 1078 y_values, 1079 y_values + y, 1080 fc=config.CHAR_COLORS[aln.aln_type][color_scheme].get(letters_to_plot[row]), 1081 ec='None', 1082 label=letters_to_plot[row], 1083 step='mid') 1084 1085 # adjust limits & labels 1086 _format_x_axis(aln, ax, show_x_label, show_left=True) 1087 if aln.aln_type == 'AA': 1088 ax.set_ylim(bottom=0, top=5) 1089 else: 1090 ax.set_ylim(bottom=0, top=2) 1091 ax.spines['top'].set_visible(False) 1092 ax.spines['right'].set_visible(False) 1093 ax.set_ylabel('bits') 1094 1095 return ax
Plot sequence logo or stacked area plot (use the first one with appropriate zoom levels). The logo visualizes the relative frequency of nt or aa characters in the alignment. The char frequency is scaled to the information content at each position. --> identical to how Geneious calculates it.
Parameters
- aln: alignment MSA class or path
- ax: matplotlib axes
- color_scheme: color scheme for characters. see config.CHAR_COLORS for available options
- plot_type: 'logo' for sequence logo, 'stacked' for stacked area plot
- show_x_label: whether to show the x-axis label :return matplotlib axes
1098def consensus_plot(aln: explore.MSA | str, ax: plt.Axes | None = None, threshold: float | None = None, use_ambig_nt: bool = False, 1099 color_scheme: str | None = 'standard', mask_color: str = 'dimgrey', ambiguity_color: str = 'black', basic_color: str = 'lightgrey', 1100 show_x_label: bool = False, show_name: bool = True, show_sequence: bool = False) -> plt.Axes: 1101 """ 1102 Plot a consensus sequence as a single-row colored sequence. 1103 1104 :param aln: alignment MSA class or path 1105 :param ax: matplotlib axes 1106 :param threshold: consensus threshold (0-1) or None for majority rule 1107 :param use_ambig_nt: whether to use ambiguous nucleotide codes when building consensus 1108 :param color_scheme: color scheme for characters. see config.CHAR_COLORS for available options or None 1109 :param mask_color: color used for mask characters (N/X) 1110 :param ambiguity_color: color used for ambiguous characters 1111 :param show_x_label: whether to show the x-axis label 1112 :param show_name: whether to show the 'consensus' label at y-axis 1113 :param show_sequence: whether to show the sequence at the y-axis 1114 1115 :return: matplotlib axes 1116 """ 1117 1118 # validate input 1119 aln, ax = _validate_input_parameters(aln, ax) 1120 # validate colors 1121 for c in [mask_color, ambiguity_color]: 1122 _validate_color(c) 1123 # validate color scheme 1124 _validate_color_scheme(scheme=color_scheme, aln=aln) 1125 1126 # get consensus 1127 consensus = aln.get_consensus(threshold=threshold, use_ambig_nt=use_ambig_nt) 1128 1129 # prepare color mapping 1130 if color_scheme is not None: 1131 # get mapping from config 1132 char_colors = config.CHAR_COLORS[aln.aln_type][color_scheme] 1133 else: 1134 char_colors = None 1135 1136 # Build polygons and colors for a single PolyCollection 1137 polygons = [] 1138 poly_colors = [] 1139 text_positions = [] 1140 text_chars = [] 1141 text_colors = [] 1142 1143 zoom_offset = aln.zoom[0] if aln.zoom is not None else 0 1144 1145 y_position = len(aln) - 0.4 1146 1147 for pos, char in enumerate(consensus): 1148 x = pos + zoom_offset 1149 # determine color 1150 if char_colors is not None and char in char_colors: 1151 color = char_colors[char] 1152 else: 1153 # ambiguous nucleotide/aminoacid codes 1154 if char in config.AMBIG_CHARS[aln.aln_type]: 1155 color = ambiguity_color 1156 elif char in ['N', 'X']: 1157 color = mask_color 1158 else: 1159 color = basic_color 1160 1161 rect = [ 1162 (x - 0.5, y_position), 1163 (x + 0.5, y_position), 1164 (x + 0.5, y_position+0.8), 1165 (x - 0.5, y_position+0.8), 1166 ] 1167 polygons.append(rect) 1168 poly_colors.append(color) 1169 1170 if show_sequence: 1171 text_positions.append(x) 1172 text_chars.append(char) 1173 # determine readable text color 1174 text_colors.append(_get_contrast_text_color(to_rgba(color))) 1175 1176 # add single PolyCollection 1177 if polygons: 1178 pc = PolyCollection(polygons, facecolors=poly_colors, edgecolors=poly_colors, linewidths=0.5) 1179 ax.add_collection(pc) 1180 1181 # add texts if requested 1182 if show_sequence: 1183 for x, ch, tc in zip(text_positions, text_chars, text_colors): 1184 ax.text(x, y_position+0.4, ch, ha='center', va='center_baseline', c=tc) 1185 1186 # format axis 1187 _format_x_axis(aln=aln, ax=ax, show_x_label=show_x_label, show_left=False) 1188 ax.set_ylim(y_position-0.1, y_position + 0.9) 1189 if show_name: 1190 ax.yaxis.set_ticks_position('none') 1191 ax.set_yticks([y_position + 0.4]) 1192 ax.set_yticklabels(['consensus']) 1193 else: 1194 ax.set_yticks([]) 1195 1196 return ax
Plot a consensus sequence as a single-row colored sequence.
Parameters
- aln: alignment MSA class or path
- ax: matplotlib axes
- threshold: consensus threshold (0-1) or None for majority rule
- use_ambig_nt: whether to use ambiguous nucleotide codes when building consensus
- color_scheme: color scheme for characters. see config.CHAR_COLORS for available options or None
- mask_color: color used for mask characters (N/X)
- ambiguity_color: color used for ambiguous characters
- show_x_label: whether to show the x-axis label
- show_name: whether to show the 'consensus' label at y-axis
- show_sequence: whether to show the sequence at the y-axis
Returns
matplotlib axes
1198def simplot(aln: explore.MSA | str, ref: str | None, ax: plt.Axes | None = None, colors: str | list | None = None, 1199 window_size: int = 200, step_size: int = 20, distance_calculation: str = 'ged', line_width: int | float = 1, 1200 show_legend: bool = False, show_reference: bool = True, bbox_to_anchor: tuple[float|int, float|int] | list= (1, 1), 1201 show_x_label: bool = False) -> plt.Axes: 1202 """ 1203 Calculate binned pairwise distances (similarity) between sequences and a reference sequence 1204 over a stepwise sliding window in the zoomed region of the alignment. This is inspired by simplot that helps to identify 1205 recombination sites. For proper recombination analyses use simplot or simplot++ (https://github.com/Stephane-S/Simplot_PlusPlus). 1206 Each sequence is plotted as a line displaying the similarity to the reference. The reference sequence can be either 1207 set to None (compared to consensus) or to a specific reference id. 1208 1209 :param aln: alignment MSA class or path 1210 :param ax: matplotlib axes 1211 :param ref: reference sequence id or None. For 'None' all computations are compared to a majority consensus 1212 :param colors: color for each sequence. can be a single named color or a list of named colors or a plt.colormap or None (auto coloring) 1213 :param window_size: window size for sliding window 1214 :param step_size: step size for sliding window 1215 :param distance_calculation: distance calculation method. Supported: ghd (global hamming distance), ged (gap excluded distance) and for nt additionally: jc69(Jukes-Cantor 1969) and k2p (Kimura 2-Parameter / K80). For more information see: explore.MSA.calc_pairwise_identity_matrix() 1216 :param line_width: width of the plotted lines 1217 :param show_legend: whether to show the legend 1218 :param show_reference: whether to show the reference id in the right lower corner 1219 :param bbox_to_anchor: bounding box coordinates for the legend - see: https://matplotlib.org/stable/api/legend_api.html 1220 :param show_x_label: whether to show the x-axis label 1221 1222 :return: matplotlib axes 1223 """ 1224 1225 # validate inputs 1226 aln, ax = _validate_input_parameters(aln=aln, ax=ax) 1227 if not isinstance(window_size, int) or window_size <= 0: 1228 raise ValueError('window_size has to be a positive integer') 1229 if window_size > aln.length: 1230 raise ValueError('window_size can not be larger than the (zoomed) alignment length') 1231 if not isinstance(step_size, int) or step_size <= 0: 1232 raise ValueError('step_size has to be a positive integer') 1233 if step_size > aln.length: 1234 raise ValueError('step_size can not be larger than the (zoomed) alignment length') 1235 if ref is not None and ref not in aln: 1236 raise ValueError(f'Reference {ref} not in alignment') 1237 if distance_calculation not in ['ghd', 'ged', 'jc69', 'k2p']: 1238 raise ValueError(f'Distance calculation method {distance_calculation} not supported. Supported: ghd, ged, jc69, k2p') 1239 if distance_calculation in ['jc69', 'k2p'] and aln.aln_type == 'AA': 1240 raise ValueError(f'Distance calculation method {distance_calculation} only supported for nucleotide alignments') 1241 1242 # get the sequence ids to plot 1243 sequence_ids = [seq_id for seq_id in aln if seq_id != ref] 1244 1245 # validate colors 1246 if colors is not None: 1247 # named color or colormap 1248 if isinstance(colors, str): 1249 # first try potential colormap 1250 try: 1251 cmap = plt.get_cmap(colors) 1252 cmap_colors = cmap(np.linspace(0, 1, len(sequence_ids))) 1253 color_map = {seq_id: color for seq_id, color in zip(sequence_ids, cmap_colors)} 1254 # single color 1255 except ValueError: 1256 _validate_color(colors) 1257 color_map = {seq_id: colors for seq_id in sequence_ids} 1258 # list of colors 1259 elif isinstance(colors, list): 1260 if len(colors) != len(sequence_ids): 1261 raise ValueError('colors list length has to match the number of plotted sequences') 1262 for color in colors: 1263 _validate_color(color) 1264 color_map = {seq_id: color for seq_id, color in zip(sequence_ids, colors)} 1265 else: 1266 raise ValueError('colors has to be either a single named color, a list of named colors, a plt colormap or None (auto)') 1267 else: 1268 color_map = None 1269 1270 # define region and window 1271 region_start = aln.zoom[0] if aln.zoom is not None else 0 1272 half_window_size = window_size // 2 1273 1274 # copy alignment and set reference 1275 aln_tmp = deepcopy(aln) 1276 aln_tmp.reference_id = ref 1277 # reset zoom (later the alignment dictionary will be replaced for a slice of the alignment) 1278 aln_tmp._zoom = None 1279 1280 # remember x positions and distances 1281 x_positions = [] 1282 distance_traces = {seq_id: [] for seq_id in sequence_ids} 1283 1284 for point_to_plot in range(0, aln.length + 1, step_size): 1285 # define windows 1286 left_side = point_to_plot - half_window_size if point_to_plot - half_window_size > 0 else 0 1287 right_side = point_to_plot + half_window_size if point_to_plot + half_window_size < aln.length else aln.length 1288 # slice alignment and replace the original alignment 1289 aln_tmp._alignment = { 1290 seq_id: seq[left_side:right_side] 1291 for seq_id, seq in aln.alignment.items() 1292 } 1293 window_result = aln_tmp.calc_pairwise_distance_to_reference(distance_type=distance_calculation) 1294 value_map = {seq_id: value for seq_id, value in zip(window_result.sequence_ids, window_result.distances)} 1295 1296 x_positions.append(region_start + point_to_plot) 1297 for seq_id in sequence_ids: 1298 distance_traces[seq_id].append(value_map[seq_id]) 1299 1300 for seq_id in sequence_ids: 1301 if color_map is not None: 1302 ax.plot(x_positions, distance_traces[seq_id], 1303 color=color_map[seq_id], 1304 linewidth=line_width, label=seq_id) 1305 else: 1306 ax.plot(x_positions, distance_traces[seq_id], 1307 linewidth=line_width, label=seq_id) 1308 # Format axis 1309 ax.set_ylabel('similarity (%)') 1310 ax.set_ylim(0, 102) 1311 _format_x_axis(aln=aln, ax=ax, show_x_label=show_x_label, show_left=True) 1312 # add legend for all sequences on top 1313 if show_legend: 1314 leg1 = ax.legend(frameon=False, loc='lower right', bbox_to_anchor=bbox_to_anchor, ncols=3) 1315 ax.add_artist(leg1) 1316 # add legend for query 1317 if show_reference: 1318 ref_label = ref if ref is not None else 'consensus' 1319 ref_handle = plt.Line2D([0], [0], linewidth=0, label=f'reference: {ref_label}') 1320 leg2 = ax.legend(handles=[ref_handle], frameon=False, bbox_to_anchor=(1, 0), loc='lower right') 1321 ax.add_artist(leg2) 1322 1323 return ax
Calculate binned pairwise distances (similarity) between sequences and a reference sequence over a stepwise sliding window in the zoomed region of the alignment. This is inspired by simplot that helps to identify recombination sites. For proper recombination analyses use simplot or simplot++ (https://github.com/Stephane-S/Simplot_PlusPlus). Each sequence is plotted as a line displaying the similarity to the reference. The reference sequence can be either set to None (compared to consensus) or to a specific reference id.
Parameters
- aln: alignment MSA class or path
- ax: matplotlib axes
- ref: reference sequence id or None. For 'None' all computations are compared to a majority consensus
- colors: color for each sequence. can be a single named color or a list of named colors or a plt.colormap or None (auto coloring)
- window_size: window size for sliding window
- step_size: step size for sliding window
- distance_calculation: distance calculation method. Supported: ghd (global hamming distance), ged (gap excluded distance) and for nt additionally: jc69(Jukes-Cantor 1969) and k2p (Kimura 2-Parameter / K80). For more information see: explore.MSA.calc_pairwise_identity_matrix()
- line_width: width of the plotted lines
- show_legend: whether to show the legend
- show_reference: whether to show the reference id in the right lower corner
- bbox_to_anchor: bounding box coordinates for the legend - see: https: //matplotlib.org/stable/api/legend_api.html
- show_x_label: whether to show the x-axis label
Returns
matplotlib axes