msaexplorer
MSAexplorer
MSAexplorer is a lightweight toolkit to explore, plot, and export multiple sequence alignments. The API is organized into three modules:
explore: parse alignments/annotations and compute statistics.draw: create matplotlib plots directly fromMSAobjects or file paths.export: serialize computed results (SNPs, stats, FASTA, ORFs, recovery tables).
All examples below are tested with files from example_alignments/.
Quick Start (explore)
from pathlib import Path
from msaexplorer import explore
base = Path('example_alignments')
aln = explore.MSA(str(base / 'DNA.fasta'))
# set reference and zoom window (start inclusive, end exclusive)
aln.reference_id = aln.sequence_ids[0]
aln.zoom = (0, 300)
print(aln.aln_type) # 'DNA'
print(len(aln), aln.length)
print(aln.get_reference_coords())
Biopython Interoperability
from pathlib import Path
from Bio import AlignIO, SeqIO
from msaexplorer import explore
base = Path('example_alignments')
# alignment from Bio.Align.MultipleSeqAlignment
bio_aln = AlignIO.read(str(base / 'DNA.fasta'), 'fasta')
aln = explore.MSA(bio_aln, reference_id=bio_aln[0].id, zoom_range=(0, 200))
# annotation from Bio.SeqIO GenBank iterator
gb_iter = SeqIO.parse(str(base / 'DNA_RNA.gb'), 'genbank')
ann = explore.Annotation(aln, gb_iter)
print(ann.ann_type, list(ann.features.keys())[:3])
Working with Downstream Dataclasses
from pathlib import Path
from msaexplorer import explore
aln = explore.MSA(str(Path('example_alignments') / 'DNA.fasta'), zoom_range=(0, 300))
aln.reference_id = aln.sequence_ids[0]
entropy = aln.calc_entropy() # AlignmentStats
length_stats = aln.calc_length_stats() # LengthStats
dist_to_ref = aln.calc_pairwise_distance_to_reference() # PairwiseDistance
variants = aln.get_snps() # VariantCollection
orfs = aln.get_non_overlapping_conserved_orfs(90) # OrfCollection
print(entropy.stat_name, entropy.positions[:3], entropy.values[:3])
print(length_stats.mean_length, length_stats.std_length)
print(dist_to_ref.reference_id, dist_to_ref.sequence_ids[:2], dist_to_ref.distances[:2])
print(variants.chrom, len(variants))
print(orfs.keys()[:3])
Plotting (draw)
All plotting functions return a matplotlib Axes.
You can either:
- pass an existing axis (best for multi-panel figures), or
- pass only an alignment/path for a one-liner plot.
One-Liner Examples (path input)
from msaexplorer import draw
import matplotlib.pyplot as plt
draw.identity_alignment('example_alignments/DNA.fasta')
plt.show()
draw.stat_plot('example_alignments/DNA.fasta', stat_type='entropy', rolling_average=5)
plt.show()
Multi-Panel Figure with Main Plot Types
# import necessary packages
from pathlib import Path
import matplotlib.pyplot as plt
from msaexplorer import explore, draw
# Example 1
# load DNA alignment
base = Path('example_alignments')
aln = explore.MSA(str(base / 'DNA.fasta'), zoom_range=(0, 100))
# ini figure
fig, ax = plt.subplots(nrows=3, ncols=1, figsize=(14, 15), height_ratios=[1, 1, 10])
# plot
draw.stat_plot(aln, ax=ax[0], stat_type='coverage', rolling_average=1, show_title=True)
draw.sequence_logo(aln, ax=ax[1], plot_type='logo')
draw.identity_alignment(aln, ax=ax[2], show_consensus=True, show_seq_names=True, fancy_gaps=True, show_legend=True, color_scheme='standard', show_identity_sequence=True)
plt.tight_layout()
plt.show()
# Example 2
# load AA alignment
aln = explore.MSA(str(base / 'AS.fasta'))
# ini figure
fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(15, 5), height_ratios=[1, 2])
draw.stat_plot(aln, ax=ax[0], stat_type='entropy', rolling_average=5, show_title=True)
draw.identity_alignment(aln, ax=ax[1], show_consensus=True, show_seq_names=True, fancy_gaps=True, show_legend=True, color_scheme='hydrophobicity')
plt.tight_layout()
plt.show()
Export (export)
from pathlib import Path
from msaexplorer import explore, export
aln = explore.MSA(str(Path('example_alignments') / 'DNA.fasta'), zoom_range=(0, 300))
aln.reference_id = aln.sequence_ids[0]
variants = aln.get_snps()
entropy = aln.calc_entropy()
consensus = aln.get_consensus()
# return as strings
vcf_text = export.snps(variants, format_type='vcf')
stats_text = export.stats(entropy)
fasta_text = export.fasta(consensus, header='consensus_dna')
# or write to disk (path argument)
# export.snps(variants, format_type='tabular', path='results/snps')
# export.stats(entropy, path='results/entropy.tsv')
Notes
zoomcan be reset withaln.zoom = None.- If no
reference_idis set, methods that need a reference use the consensus. - For API-level details, inspect module/class docstrings in
explore,draw, andexport.
1r""" 2# MSAexplorer 3 4MSAexplorer is a lightweight toolkit to **explore**, **plot**, and **export** multiple sequence alignments. 5The API is organized into three modules: 6 7- `explore`: parse alignments/annotations and compute statistics. 8- `draw`: create matplotlib plots directly from `MSA` objects or file paths. 9- `export`: serialize computed results (SNPs, stats, FASTA, ORFs, recovery tables). 10 11All examples below are tested with files from `example_alignments/`. 12 13## Quick Start (`explore`) 14 15```python 16from pathlib import Path 17from msaexplorer import explore 18 19base = Path('example_alignments') 20aln = explore.MSA(str(base / 'DNA.fasta')) 21 22# set reference and zoom window (start inclusive, end exclusive) 23aln.reference_id = aln.sequence_ids[0] 24aln.zoom = (0, 300) 25 26print(aln.aln_type) # 'DNA' 27print(len(aln), aln.length) 28print(aln.get_reference_coords()) 29``` 30 31### Biopython Interoperability 32 33```python 34from pathlib import Path 35from Bio import AlignIO, SeqIO 36from msaexplorer import explore 37 38base = Path('example_alignments') 39 40# alignment from Bio.Align.MultipleSeqAlignment 41bio_aln = AlignIO.read(str(base / 'DNA.fasta'), 'fasta') 42aln = explore.MSA(bio_aln, reference_id=bio_aln[0].id, zoom_range=(0, 200)) 43 44# annotation from Bio.SeqIO GenBank iterator 45gb_iter = SeqIO.parse(str(base / 'DNA_RNA.gb'), 'genbank') 46ann = explore.Annotation(aln, gb_iter) 47print(ann.ann_type, list(ann.features.keys())[:3]) 48``` 49 50### Working with Downstream Dataclasses 51 52```python 53from pathlib import Path 54from msaexplorer import explore 55 56aln = explore.MSA(str(Path('example_alignments') / 'DNA.fasta'), zoom_range=(0, 300)) 57aln.reference_id = aln.sequence_ids[0] 58 59entropy = aln.calc_entropy() # AlignmentStats 60length_stats = aln.calc_length_stats() # LengthStats 61dist_to_ref = aln.calc_pairwise_distance_to_reference() # PairwiseDistance 62variants = aln.get_snps() # VariantCollection 63orfs = aln.get_non_overlapping_conserved_orfs(90) # OrfCollection 64 65print(entropy.stat_name, entropy.positions[:3], entropy.values[:3]) 66print(length_stats.mean_length, length_stats.std_length) 67print(dist_to_ref.reference_id, dist_to_ref.sequence_ids[:2], dist_to_ref.distances[:2]) 68print(variants.chrom, len(variants)) 69print(orfs.keys()[:3]) 70``` 71 72## Plotting (`draw`) 73 74All plotting functions return a matplotlib `Axes`. 75You can either: 76 771. pass an existing axis (best for multi-panel figures), or 782. pass only an alignment/path for a one-liner plot. 79 80### One-Liner Examples (path input) 81 82```python 83from msaexplorer import draw 84import matplotlib.pyplot as plt 85 86draw.identity_alignment('example_alignments/DNA.fasta') 87plt.show() 88draw.stat_plot('example_alignments/DNA.fasta', stat_type='entropy', rolling_average=5) 89plt.show() 90``` 91 92### Multi-Panel Figure with Main Plot Types 93 94```python 95# import necessary packages 96from pathlib import Path 97import matplotlib.pyplot as plt 98from msaexplorer import explore, draw 99 100# Example 1 101# load DNA alignment 102base = Path('example_alignments') 103aln = explore.MSA(str(base / 'DNA.fasta'), zoom_range=(0, 100)) 104 105# ini figure 106fig, ax = plt.subplots(nrows=3, ncols=1, figsize=(14, 15), height_ratios=[1, 1, 10]) 107# plot 108draw.stat_plot(aln, ax=ax[0], stat_type='coverage', rolling_average=1, show_title=True) 109draw.sequence_logo(aln, ax=ax[1], plot_type='logo') 110draw.identity_alignment(aln, ax=ax[2], show_consensus=True, show_seq_names=True, fancy_gaps=True, show_legend=True, color_scheme='standard', show_identity_sequence=True) 111 112plt.tight_layout() 113plt.show() 114 115# Example 2 116# load AA alignment 117aln = explore.MSA(str(base / 'AS.fasta')) 118# ini figure 119fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(15, 5), height_ratios=[1, 2]) 120 121draw.stat_plot(aln, ax=ax[0], stat_type='entropy', rolling_average=5, show_title=True) 122draw.identity_alignment(aln, ax=ax[1], show_consensus=True, show_seq_names=True, fancy_gaps=True, show_legend=True, color_scheme='hydrophobicity') 123 124plt.tight_layout() 125plt.show() 126``` 127 128## Export (`export`) 129 130```python 131from pathlib import Path 132from msaexplorer import explore, export 133 134aln = explore.MSA(str(Path('example_alignments') / 'DNA.fasta'), zoom_range=(0, 300)) 135aln.reference_id = aln.sequence_ids[0] 136 137variants = aln.get_snps() 138entropy = aln.calc_entropy() 139consensus = aln.get_consensus() 140 141# return as strings 142vcf_text = export.snps(variants, format_type='vcf') 143stats_text = export.stats(entropy) 144fasta_text = export.fasta(consensus, header='consensus_dna') 145 146# or write to disk (path argument) 147# export.snps(variants, format_type='tabular', path='results/snps') 148# export.stats(entropy, path='results/entropy.tsv') 149``` 150 151## Notes 152 153- `zoom` can be reset with `aln.zoom = None`. 154- If no `reference_id` is set, methods that need a reference use the consensus. 155- For API-level details, inspect module/class docstrings in `explore`, `draw`, and `export`. 156 157""" 158 159from importlib.metadata import version, PackageNotFoundError 160 161try: 162 __version__ = version("msaexplorer") 163except PackageNotFoundError: 164 __version__ = "unknown"