msaexplorer.draw
The draw module
The draw module lets you draw alignments and statistic plots such as SNPs, ORFs, entropy and much more. For each plot a
matplotlib axes
has to be passed to the plotting function.
Importantly some of the plotting features can only be accessed for nucleotide alignments but not for amino acid alignments. The functions will raise the appropriate exception in such a case.
Functions
1r""" 2# The draw module 3 4The draw module lets you draw alignments and statistic plots such as SNPs, ORFs, entropy and much more. For each plot a 5`matplotlib axes` has to be passed to the plotting function. 6 7Importantly some of the plotting features can only be accessed for nucleotide alignments but not for amino acid alignments. 8The functions will raise the appropriate exception in such a case. 9 10## Functions 11 12""" 13 14# built-in 15from itertools import chain 16from typing import Callable, Dict 17from copy import deepcopy 18import os 19 20import matplotlib 21from numpy import ndarray 22 23# MSAexplorer 24from msaexplorer import explore, config 25 26# libs 27import numpy as np 28import matplotlib.pyplot as plt 29import matplotlib.patches as patches 30from matplotlib.cm import ScalarMappable 31from matplotlib.colors import is_color_like, Normalize, to_rgba 32from matplotlib.collections import PatchCollection, PolyCollection 33 34 35# general helper functions 36def _validate_input_parameters(aln: explore.MSA, ax: plt.Axes, annotation: explore.Annotation | None = None): 37 """ 38 Validate MSA class and axis. 39 """ 40 if not isinstance(aln, explore.MSA): 41 raise ValueError('alignment has to be an MSA class. use explore.MSA() to read in alignment') 42 if not isinstance(ax, plt.Axes): 43 raise ValueError('ax has to be an matplotlib axis') 44 if annotation is not None: 45 if not isinstance(annotation, explore.Annotation): 46 raise ValueError('annotation has to be an annotation class. use explore.Annotation() to read in annotation') 47 48 49def _format_x_axis(aln: explore.MSA, ax: plt.Axes, show_x_label: bool, show_left: bool): 50 """ 51 General axis formatting. 52 """ 53 ax.set_xlim( 54 (aln.zoom[0] - 0.5, aln.zoom[0] + aln.length - 0.5) if aln.zoom is not None else (-0.5, aln.length - 0.5) 55 ) 56 ax.spines['top'].set_visible(False) 57 ax.spines['right'].set_visible(False) 58 if show_x_label: 59 ax.set_xlabel('alignment position') 60 if not show_left: 61 ax.spines['left'].set_visible(False) 62 63 64def _find_stretches(row, non_nan_only=False) -> list[tuple[int, int, int]] | list[tuple[int, int]] : 65 """ 66 Finds consecutive stretches of values in an array, with an option to exclude NaN stretches. 67 68 :param row: NumPy array of values 69 :param non_nan_only: If True, only returns stretches of non-NaN values. 70 :return: List of stretches (start, end, value at start) for all values or non-NaN values only. 71 """ 72 if row.size == 0: 73 return [] 74 75 if non_nan_only: 76 # Create a boolean mask for non-NaN values 77 non_nan_mask = ~np.isnan(row) 78 # Find changes in the mask 79 changes = np.diff(non_nan_mask.astype(int)) != 0 80 change_idx = np.nonzero(changes)[0] 81 starts = np.concatenate(([0], change_idx + 1)) 82 ends = np.concatenate((change_idx, [len(row) - 1])) 83 84 # Return only stretches that start with non-NaN values 85 return [(start, end) for start, end in zip(starts, ends) if non_nan_mask[start]] 86 else: 87 # Find change points: where adjacent cells differ. 88 changes = np.diff(row) != 0 89 change_idx = np.nonzero(changes)[0] 90 starts = np.concatenate(([0], change_idx + 1)) 91 ends = np.concatenate((change_idx, [len(row) - 1])) 92 93 return [(start, end, row[start]) for start, end in zip(starts, ends)] 94 95 96def _seq_names(aln: explore.MSA, ax: plt.Axes, custom_seq_names: tuple, show_seq_names: bool): 97 """ 98 Validate custom names and set show names to True. Format axis accordingly. 99 """ 100 if custom_seq_names: 101 show_seq_names = True 102 if not isinstance(custom_seq_names, tuple): 103 raise ValueError('configure your custom names list: custom_names=(name1, name2...)') 104 if len(custom_seq_names) != len(aln.alignment.keys()): 105 raise ValueError('length of sequences not equal to number of custom names') 106 if show_seq_names: 107 ax.yaxis.set_ticks_position('none') 108 ax.set_yticks(np.arange(len(aln.alignment)) + 0.6) 109 if custom_seq_names: 110 ax.set_yticklabels(custom_seq_names[::-1]) 111 else: 112 names = [x.split(' ')[0] for x in list(aln.alignment.keys())[::-1]] 113 ax.set_yticklabels(names) 114 else: 115 ax.set_yticks([]) 116 117 118def _create_identity_patch(row, aln: explore.MSA, col: list, zoom: tuple[int, int], y_position: float | int, reference_color: str, seq_name: str, identity_color: str | ndarray, show_gaps: bool): 119 """ 120 Creates the initial patch. 121 """ 122 if show_gaps: 123 # plot a rectangle for parts that do not have gaps 124 for stretch in _find_stretches(row, True): 125 col.append(patches.Rectangle((stretch[0] + zoom[0] - 0.5, y_position), stretch[1] - stretch[0] + 1, 0.8, 126 facecolor=reference_color if seq_name == aln.reference_id else identity_color 127 ) 128 129 ) 130 # just plot a rectangle 131 else: 132 col.append(patches.Rectangle((zoom[0] - 0.5, y_position), zoom[1] - zoom[0], 0.8, 133 facecolor=reference_color if seq_name == aln.reference_id else identity_color 134 ) 135 ) 136 137 138def _create_polygons(stretches: list, identity_values: list | ndarray, zoom: tuple, y_position: int, polygons: list, aln_colors: dict | ScalarMappable, polygon_colors: list, detected_identity_values: set | None = None): 139 """ 140 create the individual polygons 141 """ 142 143 for start, end, value in stretches: 144 if value not in identity_values: 145 continue 146 if detected_identity_values is not None: 147 detected_identity_values.add(value) 148 width = end + 1 - start 149 # Calculate x coordinates adjusted for zoom and centering 150 x0 = start + zoom[0] - 0.5 151 x1 = x0 + width 152 # Define the rectangle corners 153 rect_coords = [ 154 (x0, y_position), 155 (x1, y_position), 156 (x1, y_position + 0.8), 157 (x0, y_position + 0.8), 158 (x0, y_position) 159 ] 160 polygons.append(rect_coords) 161 if type(aln_colors) != ScalarMappable: 162 polygon_colors.append(aln_colors[value]['color']) 163 else: 164 polygon_colors.append(aln_colors.to_rgba(value)) 165 166 167def _plot_annotation(annotation_dict: dict, ax: plt.Axes, direction_marker_size: int | None, color: str | ScalarMappable): 168 """ 169 Plot annotations 170 :param annotation_dict: dict of annotations 171 :param ax: matplotlib Axes 172 :param direction_marker_size: size of marker 173 :param color: color of annotation (color or scalar) 174 """ 175 for annotation in annotation_dict: 176 for locations in annotation_dict[annotation]['location']: 177 x_value = locations[0] 178 length = locations[1] - locations[0] 179 ax.add_patch( 180 patches.FancyBboxPatch( 181 (x_value, annotation_dict[annotation]['track'] + 1), 182 length, 183 0.8, 184 boxstyle="Round, pad=0", 185 ec="black", 186 fc=color.to_rgba(annotation_dict[annotation]['conservation']) if isinstance(color, ScalarMappable) else color, 187 ) 188 ) 189 if direction_marker_size is not None: 190 if annotation_dict[annotation]['strand'] == '-': 191 marker = '<' 192 else: 193 marker = '>' 194 ax.plot(x_value + length/2, annotation_dict[annotation]['track'] + 1.4, marker=marker, markersize=direction_marker_size, color='white', markeredgecolor='black') 195 196 # plot linked annotations (such as splicing) 197 if len(annotation_dict[annotation]['location']) > 1: 198 y_value = annotation_dict[annotation]['track'] + 1.4 199 start = None 200 for locations in annotation_dict[annotation]['location']: 201 if start is None: 202 start = locations[1] 203 continue 204 ax.plot([start, locations[0]], [y_value, y_value], '--', linewidth=2, color='black') 205 start = locations[1] 206 207 208def _add_track_positions(annotation_dic): 209 # create a dict and sort 210 annotation_dic = dict(sorted(annotation_dic.items(), key=lambda x: x[1]['location'][0][0])) 211 212 # remember for each track the largest stop 213 track_stops = [0] 214 215 for ann in annotation_dic: 216 flattened_locations = list(chain.from_iterable(annotation_dic[ann]['location'])) # flatten list 217 track = 0 218 # check if a start of a gene is smaller than the stop of the current track 219 # -> move to new track 220 while flattened_locations[0] < track_stops[track]: 221 track += 1 222 # if all prior tracks are potentially causing an overlap 223 # create a new track and break 224 if len(track_stops) <= track: 225 track_stops.append(0) 226 break 227 # in the current track remember the stop of the current gene 228 track_stops[track] = flattened_locations[-1] 229 # and indicate the track in the dict 230 annotation_dic[ann]['track'] = track 231 232 return annotation_dic 233 234 235def _get_contrast_text_color(rgba_color): 236 """ 237 compute the brightness of a color 238 """ 239 r, g, b, a = rgba_color 240 brightness = (r * 299 + g * 587 + b * 114) / 1000 241 return 'white' if brightness < 0.4 else 'black' 242 243 244def _plot_sequence_text(aln: explore.MSA, seq_name: str, ref_name: str, values: ndarray, matrix: ndarray, ax:plt.Axes, zoom: tuple, y_position:int, value_to_skip: int, ref_color:str, show_gaps: bool, cmap: None | ScalarMappable = None): 245 """ 246 Plot sequence text - however this will be done even if there is not enough space. 247 Might need some rework in the future. 248 """ 249 x_text = 0 250 if seq_name == ref_name: 251 different_cols = np.any((matrix != value_to_skip) & ~np.isnan(matrix), axis=0) 252 else: 253 different_cols = [False]*aln.length 254 255 for idx, (character, value) in enumerate(zip(aln.alignment[seq_name], values)): 256 if value != value_to_skip and character != '-' or seq_name == ref_name and character != '-' or character == '-'and not show_gaps: 257 # text color 258 if seq_name == ref_name: 259 text_color = _get_contrast_text_color(to_rgba(ref_color)) 260 elif cmap is not None: 261 text_color = _get_contrast_text_color(cmap.to_rgba(value)) 262 else: 263 text_color = 'black' 264 265 ax.text( 266 x=x_text + zoom[0] if zoom is not None else x_text, 267 y=y_position + 0.4, 268 s=character, 269 fontweight='bold' if different_cols[idx] else 'normal', 270 ha='center', 271 va='center', 272 c='grey' if not different_cols[idx] and seq_name == ref_name else text_color, 273 ) 274 x_text += 1 275 276 277def identity_alignment(aln: explore.MSA, ax: plt.Axes, show_title: bool = True, show_sequence: bool = False, show_seq_names: bool = False, custom_seq_names: tuple | list = (), reference_color: str = 'lightsteelblue', show_mask:bool = True, show_gaps:bool = True, fancy_gaps:bool = False, show_mismatches: bool = True, show_ambiguities: bool = False, color_mismatching_chars: bool = False, show_x_label: bool = True, show_legend: bool = False, bbox_to_anchor: tuple[float|int, float|int] | list[float|int, float|int]= (1, 1)): 278 """ 279 Generates an identity alignment overview plot. 280 :param aln: alignment MSA class 281 :param ax: matplotlib axes 282 :param show_title: whether to show title 283 :param show_seq_names: whether to show seq names 284 :param show_sequence: whether to show sequence for differences and reference - zoom in to avoid plotting issues 285 :param custom_seq_names: custom seq names 286 :param reference_color: color of reference sequence 287 :param show_mask: whether to show N or X chars otherwise it will be shown as match or mismatch 288 :param show_gaps: whether to show gaps otherwise it will be shown as match or mismatch 289 :param fancy_gaps: show gaps with a small black bar 290 :param show_mismatches: whether to show mismatches otherwise it will be shown as match 291 :param show_ambiguities: whether to show non-N ambiguities -> only relevant for RNA/DNA sequences 292 :param color_mismatching_chars: color mismatching chars with their unique color 293 :param show_x_label: whether to show x label 294 :param show_legend: whether to show the legend 295 :param bbox_to_anchor: bounding box coordinates for the legend - see: https://matplotlib.org/stable/api/legend_api.html 296 """ 297 298 # Validate inputs and colors 299 _validate_input_parameters(aln, ax) 300 if not is_color_like(reference_color): 301 raise ValueError(f'{reference_color} for reference is not a color') 302 303 # Both options for gaps work hand in hand 304 if fancy_gaps: 305 show_gaps = True 306 307 # Determine zoom 308 zoom = (0, aln.length) if aln.zoom is None else aln.zoom 309 310 # Set up color mapping and identity values 311 aln_colors = config.IDENTITY_COLORS.copy() 312 identity_values = [-1, -2, -3] # -1 = mismatch, -2 = mask, -3 ambiguity 313 if color_mismatching_chars: 314 colors_to_extend = config.CHAR_COLORS[aln.aln_type] 315 identity_values += [x + 1 for x in range(len(colors_to_extend))] # x+1 is needed to allow correct mapping 316 for idx, char in enumerate(colors_to_extend): 317 aln_colors[idx + 1] = {'type': char, 'color': colors_to_extend[char]} 318 319 # Compute identity alignment 320 identity_aln = aln.calc_identity_alignment( 321 encode_mask=show_mask, 322 encode_gaps=show_gaps, 323 encode_mismatches=show_mismatches, 324 encode_ambiguities=show_ambiguities, 325 encode_each_mismatch_char=color_mismatching_chars 326 ) 327 328 # List to store polygons 329 detected_identity_values = {0} 330 polygons, polygon_colors, patch_list = [], [], [] 331 332 for i, seq_name in enumerate(aln.alignment): 333 y_position = len(aln.alignment) - i - 1 334 row = identity_aln[i] 335 336 # plot a line below everything 337 if fancy_gaps: 338 ax.hlines( 339 y_position + 0.4, 340 xmin=zoom[0] - 0.5, 341 xmax=zoom[1] + 0.5, 342 color='black', 343 linestyle='-', 344 zorder=0, 345 linewidth=0.75 346 ) 347 348 # plot the basic shape per sequence with gaps 349 _create_identity_patch(row, aln, patch_list, zoom, y_position, reference_color, seq_name, aln_colors[0]['color'], show_gaps) 350 351 # find consecutive stretches 352 stretches = _find_stretches(row) 353 # create polygons per stretch 354 _create_polygons(stretches, identity_values, zoom, y_position, polygons, aln_colors, polygon_colors, detected_identity_values) 355 356 # add sequence text 357 if show_sequence: 358 _plot_sequence_text(aln, list(aln.alignment.keys())[i], aln.reference_id, identity_aln[i], identity_aln, ax, 359 zoom, y_position, 0, reference_color, show_gaps) 360 361 # Create the LineCollection: each segment is drawn in a single call. 362 ax.add_collection(PatchCollection(patch_list, match_original=True, linewidths='none', joinstyle='miter', capstyle='butt')) 363 ax.add_collection(PolyCollection(polygons, facecolors=polygon_colors, linewidths=0.5, edgecolors=polygon_colors)) 364 365 # custom legend 366 if show_legend: 367 # create it 368 custom_legend = [ 369 ax.add_line( 370 plt.Line2D( 371 [], 372 [], 373 color=aln_colors[x]['color'], marker='s' ,markeredgecolor='grey', linestyle='', markersize=10)) for x in aln_colors if x in detected_identity_values 374 ] 375 # plot it 376 ax.legend( 377 custom_legend, 378 [aln_colors[x]['type'] for x in aln_colors if x in detected_identity_values], 379 loc='lower right', 380 bbox_to_anchor=bbox_to_anchor, 381 ncols=(len(detected_identity_values) + 1) / 2 if aln.aln_type == 'AA' and color_mismatching_chars else len(detected_identity_values), 382 frameon=False 383 ) 384 385 _seq_names(aln, ax, custom_seq_names, show_seq_names) 386 387 # configure axis 388 ax.set_ylim(0, len(aln.alignment)) 389 if show_title: 390 ax.set_title('identity', loc='left') 391 _format_x_axis(aln, ax, show_x_label, show_left=False) 392 393 394def similarity_alignment(aln: explore.MSA, ax: plt.Axes, matrix_type: str | None = None, show_title: bool = True, show_sequence: bool = False, show_seq_names: bool = False, custom_seq_names: tuple | list = (), reference_color: str = 'lightsteelblue', cmap: str = 'twilight_r', show_gaps:bool = True, fancy_gaps:bool = False, show_x_label: bool = True, show_cbar: bool = False, cbar_fraction: float = 0.1): 395 """ 396 Generates a similarity alignment overview plot. Importantly the similarity values are normalized! 397 :param aln: alignment MSA class 398 :param ax: matplotlib axes 399 :param matrix_type: substitution matrix - see config.SUBS_MATRICES, standard: NT - TRANS, AA - BLOSUM65 400 :param show_title: whether to show title 401 :param show_sequence: whether to show sequence for differences and reference - zoom in to avoid plotting issues 402 :param show_seq_names: whether to show seq names 403 :param custom_seq_names: custom seq names 404 :param reference_color: color of reference sequence 405 :param cmap: color mapping for % identity - see https://matplotlib.org/stable/users/explain/colors/colormaps.html 406 :param gap_color: color for gaps 407 :param show_gaps: whether to show gaps otherwise it will be ignored 408 :param fancy_gaps: show gaps with a small black bar 409 :param show_x_label: whether to show x label 410 :param show_cbar: whether to show the legend - see https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.colorbar.html 411 :param cbar_fraction: fraction of the original ax reserved for the legend 412 """ 413 # input check 414 _validate_input_parameters(aln, ax) 415 416 # validate colors 417 if not is_color_like(reference_color): 418 raise ValueError(f'{reference_color} for reference is not a color') 419 420 # Both options for gaps work hand in hand 421 if fancy_gaps: 422 show_gaps = True 423 424 # define zoom to plot 425 if aln.zoom is None: 426 zoom = (0, aln.length) 427 else: 428 zoom = aln.zoom 429 430 # get data 431 similarity_aln = aln.calc_similarity_alignment(matrix_type=matrix_type) # use normalized values here 432 similarity_aln = similarity_aln.round(2) # round data for good color mapping 433 434 # determine min max values of the underlying matrix and create cmap 435 min_value, max_value = 0, 1 436 cmap = ScalarMappable( 437 norm=Normalize( 438 vmin=min_value, 439 vmax=max_value 440 ), 441 cmap=plt.get_cmap(cmap) 442 ) 443 444 # create similarity values 445 similarity_values = np.arange(start=min_value, stop=max_value, step=0.01) 446 # round it to be absolutely sure that values match with rounded sim alignment 447 similarity_values = similarity_values.round(2) 448 449 # create plot 450 polygons, polygon_colors, patch_list = [], [], [] 451 452 for i, seq_name in enumerate(aln.alignment): 453 y_position = len(aln.alignment) - i - 1 454 row = similarity_aln[i] 455 456 # plot a line below everything 457 if fancy_gaps: 458 ax.hlines( 459 y_position + 0.4, 460 xmin=zoom[0] - 0.5, 461 xmax=zoom[1] + 0.5, 462 color='black', 463 linestyle='-', 464 zorder=0, 465 linewidth=0.75 466 ) 467 468 # plot the basic shape per sequence with gaps 469 _create_identity_patch(row, aln, patch_list, zoom, y_position, reference_color, seq_name, 470 cmap.to_rgba(max_value) if seq_name != aln.reference_id else reference_color, 471 show_gaps) 472 473 # find consecutive stretches 474 stretches = _find_stretches(row) 475 # create polygons per stretch 476 _create_polygons(stretches, similarity_values, zoom, y_position, polygons, cmap, polygon_colors) 477 478 # add sequence text 479 if show_sequence: 480 _plot_sequence_text(aln, list(aln.alignment.keys())[i], aln.reference_id, similarity_aln[i], similarity_aln, ax, 481 zoom, y_position, 1, reference_color, show_gaps, cmap) 482 483 # Create the LineCollection: each segment is drawn in a single call. 484 ax.add_collection(PatchCollection(patch_list, match_original=True, linewidths='none', joinstyle='miter', capstyle='butt')) 485 ax.add_collection(PolyCollection(polygons, facecolors=polygon_colors, linewidths=0.5, edgecolors=polygon_colors)) 486 487 # legend 488 if show_cbar: 489 cbar = plt.colorbar(cmap, ax=ax, location= 'top', anchor=(1,0), shrink=0.2, pad=2/ax.bbox.height, fraction=cbar_fraction) 490 cbar.set_ticks([min_value, max_value]) 491 cbar.set_ticklabels(['low', 'high']) 492 493 # format seq names 494 _seq_names(aln, ax, custom_seq_names, show_seq_names) 495 496 # configure axis 497 ax.set_ylim(0, len(aln.alignment)) 498 if show_title: 499 ax.set_title('similarity', loc='left') 500 _format_x_axis(aln, ax, show_x_label, show_left=False) 501 502 503def stat_plot(aln: explore.MSA, ax: plt.Axes, stat_type: str, line_color: str = 'burlywood', line_width: int | float = 2, rolling_average: int = 20, show_x_label: bool = False, show_title: bool = True): 504 """ 505 Generate a plot for the various alignment stats. 506 :param aln: alignment MSA class 507 :param ax: matplotlib axes 508 :param stat_type: 'entropy', 'gc', 'coverage', 'ts/tv', 'identity' or 'similarity' -> (here default matrices are used NT - TRANS, AA - BLOSUM65) 509 :param line_color: color of the line 510 :param line_width: width of the line 511 :param rolling_average: average rolling window size left and right of a position in nucleotides or amino acids 512 :param show_x_label: whether to show the x-axis label 513 :param show_title: whether to show the title 514 """ 515 516 def moving_average(arr, window_size): 517 if window_size > 1: 518 i = 0 519 moving_averages, plotting_idx = [], [] 520 while i < len(arr) + 1: 521 half_window_size = window_size // 2 522 window_left = arr[i - half_window_size : i] if i > half_window_size else arr[0:i] 523 window_right = arr[i: i + half_window_size] if i < len(arr) - half_window_size else arr[i: len(arr)] 524 moving_averages.append((sum(window_left) + sum(window_right)) / (len(window_left) + len(window_right))) 525 plotting_idx.append(i) 526 i += 1 527 528 return np.array(moving_averages), np.array(plotting_idx) if aln.zoom is None else np.array(plotting_idx) + aln.zoom[0] 529 else: 530 return arr, np.arange(aln.zoom[0], aln.zoom[1]) if aln.zoom is not None else np.arange(aln.length) 531 532 # define possible functions to calc here 533 stat_functions: Dict[str, Callable[[], list | ndarray]] = { 534 'gc': aln.calc_gc, 535 'entropy': aln.calc_entropy, 536 'coverage': aln.calc_coverage, 537 'identity': aln.calc_identity_alignment, 538 'similarity': aln.calc_similarity_alignment, 539 'ts tv score': aln.calc_transition_transversion_score 540 } 541 542 if stat_type not in stat_functions: 543 raise ValueError('stat_type must be one of {}'.format(list(stat_functions.keys()))) 544 545 # input check 546 _validate_input_parameters(aln, ax) 547 if not is_color_like(line_color): 548 raise ValueError('line color is not a color') 549 550 # validate rolling average 551 if rolling_average < 1 or rolling_average > aln.length: 552 raise ValueError('rolling_average must be between 1 and length of sequence') 553 554 # generate input data 555 array = stat_functions[stat_type]() 556 557 if stat_type == 'identity': 558 min_value, max_value = -1, 0 559 elif stat_type == 'ts tv score': 560 min_value, max_value = -1, 1 561 else: 562 min_value, max_value = 0, 1 563 if stat_type in ['identity', 'similarity']: 564 # for the mean nan values get handled as the lowest possible number in the matrix 565 array = np.nan_to_num(array, True, min_value) 566 array = np.mean(array, axis=0) 567 data, plot_idx = moving_average(array, rolling_average) 568 569 # plot the data 570 ax.fill_between( 571 # this add dummy data left and right for better plotting 572 # otherwise only half of the step is shown 573 np.concatenate(([plot_idx[0] - 0.5], plot_idx, [plot_idx[-1] + 0.5])) if rolling_average == 1 else plot_idx, 574 np.concatenate(([data[0]], data, [data[-1]])) if rolling_average == 1 else data, 575 min_value, 576 linewidth = line_width, 577 edgecolor=line_color, 578 step='mid' if rolling_average == 1 else None, 579 facecolor=(line_color, 0.6) if stat_type not in ['ts tv score', 'gc'] else 'none' 580 ) 581 if stat_type == 'gc': 582 ax.hlines(0.5, xmin=0, xmax=aln.zoom[0] + aln.length if aln.zoom is not None else aln.length, color='black', linestyles='--', linewidth=1) 583 584 # format axis 585 ax.set_ylim(min_value, max_value*0.1+max_value) 586 ax.set_yticks([min_value, max_value]) 587 if stat_type == 'gc': 588 ax.set_yticklabels(['0', '100']) 589 elif stat_type == 'ts tv score': 590 ax.set_yticklabels(['tv', 'ts']) 591 else: 592 ax.set_yticklabels(['low', 'high']) 593 594 # show title 595 if show_title: 596 ax.set_title( 597 f'{stat_type} (average over {rolling_average} positions)' if rolling_average > 1 else f'{stat_type} for each position', 598 loc='left' 599 ) 600 601 _format_x_axis(aln, ax, show_x_label, show_left=True) 602 603 604def variant_plot(aln: explore.MSA, ax: plt.Axes, lollisize: tuple[int, int] | list[int, int] = (1, 3), show_x_label: bool = False, show_legend: bool = True, bbox_to_anchor: tuple[float|int, float|int] | list[float|int, float|int] = (1, 1)): 605 """ 606 Plots variants. 607 :param aln: alignment MSA class 608 :param ax: matplotlib axes 609 :param lollisize: (stem_size, head_size) 610 :param show_x_label: whether to show the x-axis label 611 :param show_legend: whether to show the legend 612 :param bbox_to_anchor: bounding box coordinates for the legend - see: https://matplotlib.org/stable/api/legend_api.html 613 """ 614 615 # validate input 616 _validate_input_parameters(aln, ax) 617 if not isinstance(lollisize, tuple) or len(lollisize) != 2: 618 raise ValueError('lollisize must be tuple of length 2 (stem, head)') 619 for _size in lollisize: 620 if not isinstance(_size, float | int) or _size <= 0: 621 raise ValueError('lollisize must be floats greater than zero') 622 623 # define colors 624 colors = config.CHAR_COLORS[aln.aln_type] 625 # get snps 626 snps = aln.get_snps() 627 # define where to plot (each ref type gets a separate line) 628 ref_y_positions, y_pos, detected_var = {}, 0, set() 629 630 # iterate over snp dict 631 for pos in snps['POS']: 632 for identifier in snps['POS'][pos]: 633 # fill in y pos dict 634 if identifier == 'ref': 635 if snps['POS'][pos]['ref'] not in ref_y_positions: 636 ref_y_positions[snps['POS'][pos]['ref']] = y_pos 637 y_pos += 1.1 638 continue 639 # plot 640 if identifier == 'ALT': 641 for alt in snps['POS'][pos]['ALT']: 642 ax.vlines(x=pos + aln.zoom[0] if aln.zoom is not None else pos, 643 ymin=ref_y_positions[snps['POS'][pos]['ref']], 644 ymax=ref_y_positions[snps['POS'][pos]['ref']] + snps['POS'][pos]['ALT'][alt]['AF'], 645 color=colors[alt], 646 zorder=100, 647 linewidth=lollisize[0] 648 ) 649 ax.plot(pos + aln.zoom[0] if aln.zoom is not None else pos, 650 ref_y_positions[snps['POS'][pos]['ref']] + snps['POS'][pos]['ALT'][alt]['AF'], 651 color=colors[alt], 652 marker='o', 653 markersize=lollisize[1] 654 ) 655 detected_var.add(alt) 656 657 # plot hlines 658 for y_char in ref_y_positions: 659 ax.hlines( 660 ref_y_positions[y_char], 661 xmin=aln.zoom[0] - 0.5 if aln.zoom is not None else -0.5, 662 xmax=aln.zoom[0] + aln.length + 0.5 if aln.zoom is not None else aln.length + 0.5, 663 color='black', 664 linestyle='-', 665 zorder=0, 666 linewidth=0.75 667 ) 668 # create a custom legend 669 if show_legend: 670 custom_legend = [ 671 ax.add_line( 672 plt.Line2D( 673 [], 674 [], 675 color=colors[char], 676 marker='o', 677 linestyle='', 678 markersize=5 679 ) 680 ) for char in colors if char in detected_var 681 ] 682 ax.legend( 683 custom_legend, 684 [char for char in colors if char in detected_var], # ensures correct sorting 685 loc='lower right', 686 title='variant', 687 bbox_to_anchor=bbox_to_anchor, 688 ncols=len(detected_var)/2 if aln.aln_type == 'AA' else len(detected_var), 689 frameon=False 690 ) 691 692 # format axis 693 _format_x_axis(aln, ax, show_x_label, show_left=False) 694 ax.spines['left'].set_visible(False) 695 ax.set_yticks([ref_y_positions[x] for x in ref_y_positions]) 696 ax.set_yticklabels(ref_y_positions.keys()) 697 ax.set_ylim(0, y_pos) 698 ax.set_ylabel('reference') 699 700 701def orf_plot(aln: explore.MSA, ax: plt.Axes, min_length: int = 500, non_overlapping_orfs: bool = True, cmap: str = 'Blues', direction_marker_size: int | None = 5, show_x_label: bool = False, show_cbar: bool = False, cbar_fraction: float = 0.1): 702 """ 703 Plot conserved ORFs. 704 :param aln: alignment MSA class 705 :param ax: matplotlib axes 706 :param min_length: minimum length of orf 707 :param non_overlapping_orfs: whether to consider overlapping orfs 708 :param cmap: color mapping for % identity - see https://matplotlib.org/stable/users/explain/colors/colormaps.html 709 :param direction_marker_size: marker size for direction marker, not shown if marker_size == None 710 :param show_x_label: whether to show the x-axis label 711 :param show_cbar: whether to show the colorbar - see https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.colorbar.html 712 :param cbar_fraction: fraction of the original ax reserved for the colorbar 713 """ 714 715 # normalize colorbar 716 cmap = ScalarMappable(norm=Normalize(0, 100), cmap=plt.get_cmap(cmap)) 717 718 # validate input 719 _validate_input_parameters(aln, ax) 720 721 # get orfs --> first deepcopy and reset zoom that the orfs are also zoomed in (by default, the orfs are only 722 # searched within the zoomed region) 723 aln_temp = deepcopy(aln) 724 aln_temp.zoom = None 725 if non_overlapping_orfs: 726 annotation_dict = aln_temp.get_non_overlapping_conserved_orfs(min_length=min_length) 727 else: 728 annotation_dict = aln_temp.get_conserved_orfs(min_length=min_length) 729 730 # filter dict for zoom 731 if aln.zoom is not None: 732 annotation_dict = {key:val for key, val in annotation_dict.items() if max(val['location'][0][0], aln.zoom[0]) <= min(val['location'][0][1], aln.zoom[1])} 733 734 # add track for plotting 735 _add_track_positions(annotation_dict) 736 737 # plot 738 _plot_annotation(annotation_dict, ax, direction_marker_size=direction_marker_size, color=cmap) 739 740 # legend 741 if show_cbar: 742 cbar = plt.colorbar(cmap,ax=ax, location= 'top', orientation='horizontal', anchor=(1,0), shrink=0.2, pad=2/ax.bbox.height, fraction=cbar_fraction) 743 cbar.set_label('% identity') 744 cbar.set_ticks([0, 100]) 745 746 # format axis 747 _format_x_axis(aln, ax, show_x_label, show_left=False) 748 ax.set_ylim(bottom=0.8) 749 ax.set_yticks([]) 750 ax.set_yticklabels([]) 751 ax.set_title('conserved orfs', loc='left') 752 753 754def annotation_plot(aln: explore.MSA, annotation: explore.Annotation | str, ax: plt.Axes, feature_to_plot: str, color: str = 'wheat', direction_marker_size: int | None = 5, show_x_label: bool = False): 755 """ 756 Plot annotations from bed, gff or gb files. Are automatically mapped to alignment. 757 :param aln: alignment MSA class 758 :param annotation: annotation class | path to annotation file 759 :param ax: matplotlib axes 760 :param feature_to_plot: potential feature to plot (not for bed files as it is parsed as one feature) 761 :param color: color for the annotation 762 :param show_direction: show strand information 763 :param direction_marker_size: marker size for direction marker, only relevant if show_direction is True 764 :param show_x_label: whether to show the x-axis label 765 """ 766 # helper function 767 def parse_annotation_from_string(path: str, msa: explore.MSA) -> explore.Annotation: 768 """ 769 Parse annotation. 770 :param path: path to annotation 771 :param msa: msa object 772 :return: parsed annotation 773 """ 774 if os.path.exists(path): 775 # reset zoom so the annotation is correctly parsed 776 msa_temp = deepcopy(msa) 777 msa_temp.zoom = None 778 return explore.Annotation(msa_temp, path) 779 else: 780 raise FileNotFoundError() 781 782 # parse from path 783 if type(annotation) is str: 784 annotation = parse_annotation_from_string(annotation, aln) 785 786 # validate input 787 _validate_input_parameters(aln, ax, annotation) 788 if not is_color_like(color): 789 raise ValueError(f'{color} for reference is not a color') 790 791 # ignore features to plot for bed files (here it is written into one feature) 792 if annotation.ann_type == 'bed': 793 annotation_dict = annotation.features['region'] 794 feature_to_plot = 'bed regions' 795 else: 796 # try to subset the annotation dict 797 try: 798 annotation_dict = annotation.features[feature_to_plot] 799 except KeyError: 800 raise KeyError(f'Feature {feature_to_plot} not found. Use annotation.features.keys() to see available features.') 801 802 # plotting and formating 803 _add_track_positions(annotation_dict) 804 _plot_annotation(annotation_dict, ax, direction_marker_size=direction_marker_size, color=color) 805 _format_x_axis(aln, ax, show_x_label, show_left=False) 806 ax.set_ylim(bottom=0.8) 807 ax.set_yticks([]) 808 ax.set_yticklabels([]) 809 ax.set_title(f'{annotation.locus} ({feature_to_plot})', loc='left')
def
identity_alignment( aln: msaexplorer.explore.MSA, ax: matplotlib.axes._axes.Axes, show_title: bool = True, show_sequence: bool = False, show_seq_names: bool = False, custom_seq_names: tuple | list = (), reference_color: str = 'lightsteelblue', show_mask: bool = True, show_gaps: bool = True, fancy_gaps: bool = False, show_mismatches: bool = True, show_ambiguities: bool = False, color_mismatching_chars: bool = False, show_x_label: bool = True, show_legend: bool = False, bbox_to_anchor: tuple[float | int, float | int] | list[float | int, float | int] = (1, 1)):
278def identity_alignment(aln: explore.MSA, ax: plt.Axes, show_title: bool = True, show_sequence: bool = False, show_seq_names: bool = False, custom_seq_names: tuple | list = (), reference_color: str = 'lightsteelblue', show_mask:bool = True, show_gaps:bool = True, fancy_gaps:bool = False, show_mismatches: bool = True, show_ambiguities: bool = False, color_mismatching_chars: bool = False, show_x_label: bool = True, show_legend: bool = False, bbox_to_anchor: tuple[float|int, float|int] | list[float|int, float|int]= (1, 1)): 279 """ 280 Generates an identity alignment overview plot. 281 :param aln: alignment MSA class 282 :param ax: matplotlib axes 283 :param show_title: whether to show title 284 :param show_seq_names: whether to show seq names 285 :param show_sequence: whether to show sequence for differences and reference - zoom in to avoid plotting issues 286 :param custom_seq_names: custom seq names 287 :param reference_color: color of reference sequence 288 :param show_mask: whether to show N or X chars otherwise it will be shown as match or mismatch 289 :param show_gaps: whether to show gaps otherwise it will be shown as match or mismatch 290 :param fancy_gaps: show gaps with a small black bar 291 :param show_mismatches: whether to show mismatches otherwise it will be shown as match 292 :param show_ambiguities: whether to show non-N ambiguities -> only relevant for RNA/DNA sequences 293 :param color_mismatching_chars: color mismatching chars with their unique color 294 :param show_x_label: whether to show x label 295 :param show_legend: whether to show the legend 296 :param bbox_to_anchor: bounding box coordinates for the legend - see: https://matplotlib.org/stable/api/legend_api.html 297 """ 298 299 # Validate inputs and colors 300 _validate_input_parameters(aln, ax) 301 if not is_color_like(reference_color): 302 raise ValueError(f'{reference_color} for reference is not a color') 303 304 # Both options for gaps work hand in hand 305 if fancy_gaps: 306 show_gaps = True 307 308 # Determine zoom 309 zoom = (0, aln.length) if aln.zoom is None else aln.zoom 310 311 # Set up color mapping and identity values 312 aln_colors = config.IDENTITY_COLORS.copy() 313 identity_values = [-1, -2, -3] # -1 = mismatch, -2 = mask, -3 ambiguity 314 if color_mismatching_chars: 315 colors_to_extend = config.CHAR_COLORS[aln.aln_type] 316 identity_values += [x + 1 for x in range(len(colors_to_extend))] # x+1 is needed to allow correct mapping 317 for idx, char in enumerate(colors_to_extend): 318 aln_colors[idx + 1] = {'type': char, 'color': colors_to_extend[char]} 319 320 # Compute identity alignment 321 identity_aln = aln.calc_identity_alignment( 322 encode_mask=show_mask, 323 encode_gaps=show_gaps, 324 encode_mismatches=show_mismatches, 325 encode_ambiguities=show_ambiguities, 326 encode_each_mismatch_char=color_mismatching_chars 327 ) 328 329 # List to store polygons 330 detected_identity_values = {0} 331 polygons, polygon_colors, patch_list = [], [], [] 332 333 for i, seq_name in enumerate(aln.alignment): 334 y_position = len(aln.alignment) - i - 1 335 row = identity_aln[i] 336 337 # plot a line below everything 338 if fancy_gaps: 339 ax.hlines( 340 y_position + 0.4, 341 xmin=zoom[0] - 0.5, 342 xmax=zoom[1] + 0.5, 343 color='black', 344 linestyle='-', 345 zorder=0, 346 linewidth=0.75 347 ) 348 349 # plot the basic shape per sequence with gaps 350 _create_identity_patch(row, aln, patch_list, zoom, y_position, reference_color, seq_name, aln_colors[0]['color'], show_gaps) 351 352 # find consecutive stretches 353 stretches = _find_stretches(row) 354 # create polygons per stretch 355 _create_polygons(stretches, identity_values, zoom, y_position, polygons, aln_colors, polygon_colors, detected_identity_values) 356 357 # add sequence text 358 if show_sequence: 359 _plot_sequence_text(aln, list(aln.alignment.keys())[i], aln.reference_id, identity_aln[i], identity_aln, ax, 360 zoom, y_position, 0, reference_color, show_gaps) 361 362 # Create the LineCollection: each segment is drawn in a single call. 363 ax.add_collection(PatchCollection(patch_list, match_original=True, linewidths='none', joinstyle='miter', capstyle='butt')) 364 ax.add_collection(PolyCollection(polygons, facecolors=polygon_colors, linewidths=0.5, edgecolors=polygon_colors)) 365 366 # custom legend 367 if show_legend: 368 # create it 369 custom_legend = [ 370 ax.add_line( 371 plt.Line2D( 372 [], 373 [], 374 color=aln_colors[x]['color'], marker='s' ,markeredgecolor='grey', linestyle='', markersize=10)) for x in aln_colors if x in detected_identity_values 375 ] 376 # plot it 377 ax.legend( 378 custom_legend, 379 [aln_colors[x]['type'] for x in aln_colors if x in detected_identity_values], 380 loc='lower right', 381 bbox_to_anchor=bbox_to_anchor, 382 ncols=(len(detected_identity_values) + 1) / 2 if aln.aln_type == 'AA' and color_mismatching_chars else len(detected_identity_values), 383 frameon=False 384 ) 385 386 _seq_names(aln, ax, custom_seq_names, show_seq_names) 387 388 # configure axis 389 ax.set_ylim(0, len(aln.alignment)) 390 if show_title: 391 ax.set_title('identity', loc='left') 392 _format_x_axis(aln, ax, show_x_label, show_left=False)
Generates an identity alignment overview plot.
Parameters
- aln: alignment MSA class
- ax: matplotlib axes
- show_title: whether to show title
- show_seq_names: whether to show seq names
- show_sequence: whether to show sequence for differences and reference - zoom in to avoid plotting issues
- custom_seq_names: custom seq names
- reference_color: color of reference sequence
- show_mask: whether to show N or X chars otherwise it will be shown as match or mismatch
- show_gaps: whether to show gaps otherwise it will be shown as match or mismatch
- fancy_gaps: show gaps with a small black bar
- show_mismatches: whether to show mismatches otherwise it will be shown as match
- show_ambiguities: whether to show non-N ambiguities -> only relevant for RNA/DNA sequences
- color_mismatching_chars: color mismatching chars with their unique color
- show_x_label: whether to show x label
- show_legend: whether to show the legend
- bbox_to_anchor: bounding box coordinates for the legend - see: https: //matplotlib.org/stable/api/legend_api.html
def
similarity_alignment( aln: msaexplorer.explore.MSA, ax: matplotlib.axes._axes.Axes, matrix_type: str | None = None, show_title: bool = True, show_sequence: bool = False, show_seq_names: bool = False, custom_seq_names: tuple | list = (), reference_color: str = 'lightsteelblue', cmap: str = 'twilight_r', show_gaps: bool = True, fancy_gaps: bool = False, show_x_label: bool = True, show_cbar: bool = False, cbar_fraction: float = 0.1):
395def similarity_alignment(aln: explore.MSA, ax: plt.Axes, matrix_type: str | None = None, show_title: bool = True, show_sequence: bool = False, show_seq_names: bool = False, custom_seq_names: tuple | list = (), reference_color: str = 'lightsteelblue', cmap: str = 'twilight_r', show_gaps:bool = True, fancy_gaps:bool = False, show_x_label: bool = True, show_cbar: bool = False, cbar_fraction: float = 0.1): 396 """ 397 Generates a similarity alignment overview plot. Importantly the similarity values are normalized! 398 :param aln: alignment MSA class 399 :param ax: matplotlib axes 400 :param matrix_type: substitution matrix - see config.SUBS_MATRICES, standard: NT - TRANS, AA - BLOSUM65 401 :param show_title: whether to show title 402 :param show_sequence: whether to show sequence for differences and reference - zoom in to avoid plotting issues 403 :param show_seq_names: whether to show seq names 404 :param custom_seq_names: custom seq names 405 :param reference_color: color of reference sequence 406 :param cmap: color mapping for % identity - see https://matplotlib.org/stable/users/explain/colors/colormaps.html 407 :param gap_color: color for gaps 408 :param show_gaps: whether to show gaps otherwise it will be ignored 409 :param fancy_gaps: show gaps with a small black bar 410 :param show_x_label: whether to show x label 411 :param show_cbar: whether to show the legend - see https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.colorbar.html 412 :param cbar_fraction: fraction of the original ax reserved for the legend 413 """ 414 # input check 415 _validate_input_parameters(aln, ax) 416 417 # validate colors 418 if not is_color_like(reference_color): 419 raise ValueError(f'{reference_color} for reference is not a color') 420 421 # Both options for gaps work hand in hand 422 if fancy_gaps: 423 show_gaps = True 424 425 # define zoom to plot 426 if aln.zoom is None: 427 zoom = (0, aln.length) 428 else: 429 zoom = aln.zoom 430 431 # get data 432 similarity_aln = aln.calc_similarity_alignment(matrix_type=matrix_type) # use normalized values here 433 similarity_aln = similarity_aln.round(2) # round data for good color mapping 434 435 # determine min max values of the underlying matrix and create cmap 436 min_value, max_value = 0, 1 437 cmap = ScalarMappable( 438 norm=Normalize( 439 vmin=min_value, 440 vmax=max_value 441 ), 442 cmap=plt.get_cmap(cmap) 443 ) 444 445 # create similarity values 446 similarity_values = np.arange(start=min_value, stop=max_value, step=0.01) 447 # round it to be absolutely sure that values match with rounded sim alignment 448 similarity_values = similarity_values.round(2) 449 450 # create plot 451 polygons, polygon_colors, patch_list = [], [], [] 452 453 for i, seq_name in enumerate(aln.alignment): 454 y_position = len(aln.alignment) - i - 1 455 row = similarity_aln[i] 456 457 # plot a line below everything 458 if fancy_gaps: 459 ax.hlines( 460 y_position + 0.4, 461 xmin=zoom[0] - 0.5, 462 xmax=zoom[1] + 0.5, 463 color='black', 464 linestyle='-', 465 zorder=0, 466 linewidth=0.75 467 ) 468 469 # plot the basic shape per sequence with gaps 470 _create_identity_patch(row, aln, patch_list, zoom, y_position, reference_color, seq_name, 471 cmap.to_rgba(max_value) if seq_name != aln.reference_id else reference_color, 472 show_gaps) 473 474 # find consecutive stretches 475 stretches = _find_stretches(row) 476 # create polygons per stretch 477 _create_polygons(stretches, similarity_values, zoom, y_position, polygons, cmap, polygon_colors) 478 479 # add sequence text 480 if show_sequence: 481 _plot_sequence_text(aln, list(aln.alignment.keys())[i], aln.reference_id, similarity_aln[i], similarity_aln, ax, 482 zoom, y_position, 1, reference_color, show_gaps, cmap) 483 484 # Create the LineCollection: each segment is drawn in a single call. 485 ax.add_collection(PatchCollection(patch_list, match_original=True, linewidths='none', joinstyle='miter', capstyle='butt')) 486 ax.add_collection(PolyCollection(polygons, facecolors=polygon_colors, linewidths=0.5, edgecolors=polygon_colors)) 487 488 # legend 489 if show_cbar: 490 cbar = plt.colorbar(cmap, ax=ax, location= 'top', anchor=(1,0), shrink=0.2, pad=2/ax.bbox.height, fraction=cbar_fraction) 491 cbar.set_ticks([min_value, max_value]) 492 cbar.set_ticklabels(['low', 'high']) 493 494 # format seq names 495 _seq_names(aln, ax, custom_seq_names, show_seq_names) 496 497 # configure axis 498 ax.set_ylim(0, len(aln.alignment)) 499 if show_title: 500 ax.set_title('similarity', loc='left') 501 _format_x_axis(aln, ax, show_x_label, show_left=False)
Generates a similarity alignment overview plot. Importantly the similarity values are normalized!
Parameters
- aln: alignment MSA class
- ax: matplotlib axes
- matrix_type: substitution matrix - see config.SUBS_MATRICES, standard: NT - TRANS, AA - BLOSUM65
- show_title: whether to show title
- show_sequence: whether to show sequence for differences and reference - zoom in to avoid plotting issues
- show_seq_names: whether to show seq names
- custom_seq_names: custom seq names
- reference_color: color of reference sequence
- cmap: color mapping for % identity - see https: //matplotlib.org/stable/users/explain/colors/colormaps.html
- gap_color: color for gaps
- show_gaps: whether to show gaps otherwise it will be ignored
- fancy_gaps: show gaps with a small black bar
- show_x_label: whether to show x label
- show_cbar: whether to show the legend - see https: //matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.colorbar.html
- cbar_fraction: fraction of the original ax reserved for the legend
def
stat_plot( aln: msaexplorer.explore.MSA, ax: matplotlib.axes._axes.Axes, stat_type: str, line_color: str = 'burlywood', line_width: int | float = 2, rolling_average: int = 20, show_x_label: bool = False, show_title: bool = True):
504def stat_plot(aln: explore.MSA, ax: plt.Axes, stat_type: str, line_color: str = 'burlywood', line_width: int | float = 2, rolling_average: int = 20, show_x_label: bool = False, show_title: bool = True): 505 """ 506 Generate a plot for the various alignment stats. 507 :param aln: alignment MSA class 508 :param ax: matplotlib axes 509 :param stat_type: 'entropy', 'gc', 'coverage', 'ts/tv', 'identity' or 'similarity' -> (here default matrices are used NT - TRANS, AA - BLOSUM65) 510 :param line_color: color of the line 511 :param line_width: width of the line 512 :param rolling_average: average rolling window size left and right of a position in nucleotides or amino acids 513 :param show_x_label: whether to show the x-axis label 514 :param show_title: whether to show the title 515 """ 516 517 def moving_average(arr, window_size): 518 if window_size > 1: 519 i = 0 520 moving_averages, plotting_idx = [], [] 521 while i < len(arr) + 1: 522 half_window_size = window_size // 2 523 window_left = arr[i - half_window_size : i] if i > half_window_size else arr[0:i] 524 window_right = arr[i: i + half_window_size] if i < len(arr) - half_window_size else arr[i: len(arr)] 525 moving_averages.append((sum(window_left) + sum(window_right)) / (len(window_left) + len(window_right))) 526 plotting_idx.append(i) 527 i += 1 528 529 return np.array(moving_averages), np.array(plotting_idx) if aln.zoom is None else np.array(plotting_idx) + aln.zoom[0] 530 else: 531 return arr, np.arange(aln.zoom[0], aln.zoom[1]) if aln.zoom is not None else np.arange(aln.length) 532 533 # define possible functions to calc here 534 stat_functions: Dict[str, Callable[[], list | ndarray]] = { 535 'gc': aln.calc_gc, 536 'entropy': aln.calc_entropy, 537 'coverage': aln.calc_coverage, 538 'identity': aln.calc_identity_alignment, 539 'similarity': aln.calc_similarity_alignment, 540 'ts tv score': aln.calc_transition_transversion_score 541 } 542 543 if stat_type not in stat_functions: 544 raise ValueError('stat_type must be one of {}'.format(list(stat_functions.keys()))) 545 546 # input check 547 _validate_input_parameters(aln, ax) 548 if not is_color_like(line_color): 549 raise ValueError('line color is not a color') 550 551 # validate rolling average 552 if rolling_average < 1 or rolling_average > aln.length: 553 raise ValueError('rolling_average must be between 1 and length of sequence') 554 555 # generate input data 556 array = stat_functions[stat_type]() 557 558 if stat_type == 'identity': 559 min_value, max_value = -1, 0 560 elif stat_type == 'ts tv score': 561 min_value, max_value = -1, 1 562 else: 563 min_value, max_value = 0, 1 564 if stat_type in ['identity', 'similarity']: 565 # for the mean nan values get handled as the lowest possible number in the matrix 566 array = np.nan_to_num(array, True, min_value) 567 array = np.mean(array, axis=0) 568 data, plot_idx = moving_average(array, rolling_average) 569 570 # plot the data 571 ax.fill_between( 572 # this add dummy data left and right for better plotting 573 # otherwise only half of the step is shown 574 np.concatenate(([plot_idx[0] - 0.5], plot_idx, [plot_idx[-1] + 0.5])) if rolling_average == 1 else plot_idx, 575 np.concatenate(([data[0]], data, [data[-1]])) if rolling_average == 1 else data, 576 min_value, 577 linewidth = line_width, 578 edgecolor=line_color, 579 step='mid' if rolling_average == 1 else None, 580 facecolor=(line_color, 0.6) if stat_type not in ['ts tv score', 'gc'] else 'none' 581 ) 582 if stat_type == 'gc': 583 ax.hlines(0.5, xmin=0, xmax=aln.zoom[0] + aln.length if aln.zoom is not None else aln.length, color='black', linestyles='--', linewidth=1) 584 585 # format axis 586 ax.set_ylim(min_value, max_value*0.1+max_value) 587 ax.set_yticks([min_value, max_value]) 588 if stat_type == 'gc': 589 ax.set_yticklabels(['0', '100']) 590 elif stat_type == 'ts tv score': 591 ax.set_yticklabels(['tv', 'ts']) 592 else: 593 ax.set_yticklabels(['low', 'high']) 594 595 # show title 596 if show_title: 597 ax.set_title( 598 f'{stat_type} (average over {rolling_average} positions)' if rolling_average > 1 else f'{stat_type} for each position', 599 loc='left' 600 ) 601 602 _format_x_axis(aln, ax, show_x_label, show_left=True)
Generate a plot for the various alignment stats.
Parameters
- aln: alignment MSA class
- ax: matplotlib axes
- stat_type: 'entropy', 'gc', 'coverage', 'ts/tv', 'identity' or 'similarity' -> (here default matrices are used NT - TRANS, AA - BLOSUM65)
- line_color: color of the line
- line_width: width of the line
- rolling_average: average rolling window size left and right of a position in nucleotides or amino acids
- show_x_label: whether to show the x-axis label
- show_title: whether to show the title
def
variant_plot( aln: msaexplorer.explore.MSA, ax: matplotlib.axes._axes.Axes, lollisize: tuple[int, int] | list[int, int] = (1, 3), show_x_label: bool = False, show_legend: bool = True, bbox_to_anchor: tuple[float | int, float | int] | list[float | int, float | int] = (1, 1)):
605def variant_plot(aln: explore.MSA, ax: plt.Axes, lollisize: tuple[int, int] | list[int, int] = (1, 3), show_x_label: bool = False, show_legend: bool = True, bbox_to_anchor: tuple[float|int, float|int] | list[float|int, float|int] = (1, 1)): 606 """ 607 Plots variants. 608 :param aln: alignment MSA class 609 :param ax: matplotlib axes 610 :param lollisize: (stem_size, head_size) 611 :param show_x_label: whether to show the x-axis label 612 :param show_legend: whether to show the legend 613 :param bbox_to_anchor: bounding box coordinates for the legend - see: https://matplotlib.org/stable/api/legend_api.html 614 """ 615 616 # validate input 617 _validate_input_parameters(aln, ax) 618 if not isinstance(lollisize, tuple) or len(lollisize) != 2: 619 raise ValueError('lollisize must be tuple of length 2 (stem, head)') 620 for _size in lollisize: 621 if not isinstance(_size, float | int) or _size <= 0: 622 raise ValueError('lollisize must be floats greater than zero') 623 624 # define colors 625 colors = config.CHAR_COLORS[aln.aln_type] 626 # get snps 627 snps = aln.get_snps() 628 # define where to plot (each ref type gets a separate line) 629 ref_y_positions, y_pos, detected_var = {}, 0, set() 630 631 # iterate over snp dict 632 for pos in snps['POS']: 633 for identifier in snps['POS'][pos]: 634 # fill in y pos dict 635 if identifier == 'ref': 636 if snps['POS'][pos]['ref'] not in ref_y_positions: 637 ref_y_positions[snps['POS'][pos]['ref']] = y_pos 638 y_pos += 1.1 639 continue 640 # plot 641 if identifier == 'ALT': 642 for alt in snps['POS'][pos]['ALT']: 643 ax.vlines(x=pos + aln.zoom[0] if aln.zoom is not None else pos, 644 ymin=ref_y_positions[snps['POS'][pos]['ref']], 645 ymax=ref_y_positions[snps['POS'][pos]['ref']] + snps['POS'][pos]['ALT'][alt]['AF'], 646 color=colors[alt], 647 zorder=100, 648 linewidth=lollisize[0] 649 ) 650 ax.plot(pos + aln.zoom[0] if aln.zoom is not None else pos, 651 ref_y_positions[snps['POS'][pos]['ref']] + snps['POS'][pos]['ALT'][alt]['AF'], 652 color=colors[alt], 653 marker='o', 654 markersize=lollisize[1] 655 ) 656 detected_var.add(alt) 657 658 # plot hlines 659 for y_char in ref_y_positions: 660 ax.hlines( 661 ref_y_positions[y_char], 662 xmin=aln.zoom[0] - 0.5 if aln.zoom is not None else -0.5, 663 xmax=aln.zoom[0] + aln.length + 0.5 if aln.zoom is not None else aln.length + 0.5, 664 color='black', 665 linestyle='-', 666 zorder=0, 667 linewidth=0.75 668 ) 669 # create a custom legend 670 if show_legend: 671 custom_legend = [ 672 ax.add_line( 673 plt.Line2D( 674 [], 675 [], 676 color=colors[char], 677 marker='o', 678 linestyle='', 679 markersize=5 680 ) 681 ) for char in colors if char in detected_var 682 ] 683 ax.legend( 684 custom_legend, 685 [char for char in colors if char in detected_var], # ensures correct sorting 686 loc='lower right', 687 title='variant', 688 bbox_to_anchor=bbox_to_anchor, 689 ncols=len(detected_var)/2 if aln.aln_type == 'AA' else len(detected_var), 690 frameon=False 691 ) 692 693 # format axis 694 _format_x_axis(aln, ax, show_x_label, show_left=False) 695 ax.spines['left'].set_visible(False) 696 ax.set_yticks([ref_y_positions[x] for x in ref_y_positions]) 697 ax.set_yticklabels(ref_y_positions.keys()) 698 ax.set_ylim(0, y_pos) 699 ax.set_ylabel('reference')
Plots variants.
Parameters
- aln: alignment MSA class
- ax: matplotlib axes
- lollisize: (stem_size, head_size)
- show_x_label: whether to show the x-axis label
- show_legend: whether to show the legend
- bbox_to_anchor: bounding box coordinates for the legend - see: https: //matplotlib.org/stable/api/legend_api.html
def
orf_plot( aln: msaexplorer.explore.MSA, ax: matplotlib.axes._axes.Axes, min_length: int = 500, non_overlapping_orfs: bool = True, cmap: str = 'Blues', direction_marker_size: int | None = 5, show_x_label: bool = False, show_cbar: bool = False, cbar_fraction: float = 0.1):
702def orf_plot(aln: explore.MSA, ax: plt.Axes, min_length: int = 500, non_overlapping_orfs: bool = True, cmap: str = 'Blues', direction_marker_size: int | None = 5, show_x_label: bool = False, show_cbar: bool = False, cbar_fraction: float = 0.1): 703 """ 704 Plot conserved ORFs. 705 :param aln: alignment MSA class 706 :param ax: matplotlib axes 707 :param min_length: minimum length of orf 708 :param non_overlapping_orfs: whether to consider overlapping orfs 709 :param cmap: color mapping for % identity - see https://matplotlib.org/stable/users/explain/colors/colormaps.html 710 :param direction_marker_size: marker size for direction marker, not shown if marker_size == None 711 :param show_x_label: whether to show the x-axis label 712 :param show_cbar: whether to show the colorbar - see https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.colorbar.html 713 :param cbar_fraction: fraction of the original ax reserved for the colorbar 714 """ 715 716 # normalize colorbar 717 cmap = ScalarMappable(norm=Normalize(0, 100), cmap=plt.get_cmap(cmap)) 718 719 # validate input 720 _validate_input_parameters(aln, ax) 721 722 # get orfs --> first deepcopy and reset zoom that the orfs are also zoomed in (by default, the orfs are only 723 # searched within the zoomed region) 724 aln_temp = deepcopy(aln) 725 aln_temp.zoom = None 726 if non_overlapping_orfs: 727 annotation_dict = aln_temp.get_non_overlapping_conserved_orfs(min_length=min_length) 728 else: 729 annotation_dict = aln_temp.get_conserved_orfs(min_length=min_length) 730 731 # filter dict for zoom 732 if aln.zoom is not None: 733 annotation_dict = {key:val for key, val in annotation_dict.items() if max(val['location'][0][0], aln.zoom[0]) <= min(val['location'][0][1], aln.zoom[1])} 734 735 # add track for plotting 736 _add_track_positions(annotation_dict) 737 738 # plot 739 _plot_annotation(annotation_dict, ax, direction_marker_size=direction_marker_size, color=cmap) 740 741 # legend 742 if show_cbar: 743 cbar = plt.colorbar(cmap,ax=ax, location= 'top', orientation='horizontal', anchor=(1,0), shrink=0.2, pad=2/ax.bbox.height, fraction=cbar_fraction) 744 cbar.set_label('% identity') 745 cbar.set_ticks([0, 100]) 746 747 # format axis 748 _format_x_axis(aln, ax, show_x_label, show_left=False) 749 ax.set_ylim(bottom=0.8) 750 ax.set_yticks([]) 751 ax.set_yticklabels([]) 752 ax.set_title('conserved orfs', loc='left')
Plot conserved ORFs.
Parameters
- aln: alignment MSA class
- ax: matplotlib axes
- min_length: minimum length of orf
- non_overlapping_orfs: whether to consider overlapping orfs
- cmap: color mapping for % identity - see https: //matplotlib.org/stable/users/explain/colors/colormaps.html
- direction_marker_size: marker size for direction marker, not shown if marker_size == None
- show_x_label: whether to show the x-axis label
- show_cbar: whether to show the colorbar - see https: //matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.colorbar.html
- cbar_fraction: fraction of the original ax reserved for the colorbar
def
annotation_plot( aln: msaexplorer.explore.MSA, annotation: msaexplorer.explore.Annotation | str, ax: matplotlib.axes._axes.Axes, feature_to_plot: str, color: str = 'wheat', direction_marker_size: int | None = 5, show_x_label: bool = False):
755def annotation_plot(aln: explore.MSA, annotation: explore.Annotation | str, ax: plt.Axes, feature_to_plot: str, color: str = 'wheat', direction_marker_size: int | None = 5, show_x_label: bool = False): 756 """ 757 Plot annotations from bed, gff or gb files. Are automatically mapped to alignment. 758 :param aln: alignment MSA class 759 :param annotation: annotation class | path to annotation file 760 :param ax: matplotlib axes 761 :param feature_to_plot: potential feature to plot (not for bed files as it is parsed as one feature) 762 :param color: color for the annotation 763 :param show_direction: show strand information 764 :param direction_marker_size: marker size for direction marker, only relevant if show_direction is True 765 :param show_x_label: whether to show the x-axis label 766 """ 767 # helper function 768 def parse_annotation_from_string(path: str, msa: explore.MSA) -> explore.Annotation: 769 """ 770 Parse annotation. 771 :param path: path to annotation 772 :param msa: msa object 773 :return: parsed annotation 774 """ 775 if os.path.exists(path): 776 # reset zoom so the annotation is correctly parsed 777 msa_temp = deepcopy(msa) 778 msa_temp.zoom = None 779 return explore.Annotation(msa_temp, path) 780 else: 781 raise FileNotFoundError() 782 783 # parse from path 784 if type(annotation) is str: 785 annotation = parse_annotation_from_string(annotation, aln) 786 787 # validate input 788 _validate_input_parameters(aln, ax, annotation) 789 if not is_color_like(color): 790 raise ValueError(f'{color} for reference is not a color') 791 792 # ignore features to plot for bed files (here it is written into one feature) 793 if annotation.ann_type == 'bed': 794 annotation_dict = annotation.features['region'] 795 feature_to_plot = 'bed regions' 796 else: 797 # try to subset the annotation dict 798 try: 799 annotation_dict = annotation.features[feature_to_plot] 800 except KeyError: 801 raise KeyError(f'Feature {feature_to_plot} not found. Use annotation.features.keys() to see available features.') 802 803 # plotting and formating 804 _add_track_positions(annotation_dict) 805 _plot_annotation(annotation_dict, ax, direction_marker_size=direction_marker_size, color=color) 806 _format_x_axis(aln, ax, show_x_label, show_left=False) 807 ax.set_ylim(bottom=0.8) 808 ax.set_yticks([]) 809 ax.set_yticklabels([]) 810 ax.set_title(f'{annotation.locus} ({feature_to_plot})', loc='left')
Plot annotations from bed, gff or gb files. Are automatically mapped to alignment.
Parameters
- aln: alignment MSA class
- annotation: annotation class | path to annotation file
- ax: matplotlib axes
- feature_to_plot: potential feature to plot (not for bed files as it is parsed as one feature)
- color: color for the annotation
- show_direction: show strand information
- direction_marker_size: marker size for direction marker, only relevant if show_direction is True
- show_x_label: whether to show the x-axis label