msaexplorer

MSAexplorer

MSAexplorer is a lightweight toolkit to explore, plot, and export multiple sequence alignments. The API is organized into three modules:

  • explore: parse alignments/annotations and compute statistics.
  • draw: create matplotlib plots directly from MSA objects or file paths.
  • export: serialize computed results (SNPs, stats, FASTA, ORFs, recovery tables).

All examples below are tested with files from example_alignments/.

Quick Start (explore)

from pathlib import Path
from msaexplorer import explore

base = Path('example_alignments')
aln = explore.MSA(str(base / 'DNA.fasta'))

# set reference and zoom window (start inclusive, end exclusive)
aln.reference_id = aln.sequence_ids[0]
aln.zoom = (0, 300)

print(aln.aln_type)        # 'DNA'
print(len(aln), aln.length)
print(aln.get_reference_coords())

Biopython Interoperability

from pathlib import Path
from Bio import AlignIO, SeqIO
from msaexplorer import explore

base = Path('example_alignments')

# alignment from Bio.Align.MultipleSeqAlignment
bio_aln = AlignIO.read(str(base / 'DNA.fasta'), 'fasta')
aln = explore.MSA(bio_aln, reference_id=bio_aln[0].id, zoom_range=(0, 200))

# annotation from Bio.SeqIO GenBank iterator
gb_iter = SeqIO.parse(str(base / 'DNA_RNA.gb'), 'genbank')
ann = explore.Annotation(aln, gb_iter)
print(ann.ann_type, list(ann.features.keys())[:3])

Working with Downstream Dataclasses

from pathlib import Path
from msaexplorer import explore

aln = explore.MSA(str(Path('example_alignments') / 'DNA.fasta'), zoom_range=(0, 300))
aln.reference_id = aln.sequence_ids[0]

entropy = aln.calc_entropy()                              # AlignmentStats
length_stats = aln.calc_length_stats()                    # LengthStats
dist_to_ref = aln.calc_pairwise_distance_to_reference()   # PairwiseDistance
variants = aln.get_snps()                                 # VariantCollection
orfs = aln.get_non_overlapping_conserved_orfs(90)         # OrfCollection

print(entropy.stat_name, entropy.positions[:3], entropy.values[:3])
print(length_stats.mean_length, length_stats.std_length)
print(dist_to_ref.reference_id, dist_to_ref.sequence_ids[:2], dist_to_ref.distances[:2])
print(variants.chrom, len(variants))
print(orfs.keys()[:3])

Plotting (draw)

All plotting functions return a matplotlib Axes. You can either:

  1. pass an existing axis (best for multi-panel figures), or
  2. pass only an alignment/path for a one-liner plot.

One-Liner Examples (path input)

from msaexplorer import draw
import matplotlib.pyplot as plt

draw.identity_alignment('example_alignments/DNA.fasta')
plt.show()
draw.stat_plot('example_alignments/DNA.fasta', stat_type='entropy', rolling_average=5)
plt.show()

Multi-Panel Figure with Main Plot Types

# import necessary packages
from pathlib import Path
import matplotlib.pyplot as plt
from msaexplorer import explore, draw

# Example 1
# load DNA alignment
base = Path('example_alignments')
aln = explore.MSA(str(base / 'DNA.fasta'), zoom_range=(0, 100))

# ini figure
fig, ax = plt.subplots(nrows=3, ncols=1, figsize=(14, 15), height_ratios=[1, 1, 10])
# plot
draw.stat_plot(aln, ax=ax[0], stat_type='coverage', rolling_average=1, show_title=True)
draw.sequence_logo(aln, ax=ax[1], plot_type='logo')
draw.identity_alignment(aln, ax=ax[2], show_consensus=True, show_seq_names=True, fancy_gaps=True, show_legend=True, color_scheme='standard', show_identity_sequence=True)

plt.tight_layout()
plt.show()

# Example 2
# load AA alignment
aln = explore.MSA(str(base / 'AS.fasta'))
# ini figure
fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(15, 5), height_ratios=[1, 2])

draw.stat_plot(aln, ax=ax[0], stat_type='entropy', rolling_average=5, show_title=True)
draw.identity_alignment(aln, ax=ax[1], show_consensus=True, show_seq_names=True, fancy_gaps=True, show_legend=True, color_scheme='hydrophobicity')

plt.tight_layout()
plt.show()

Export (export)

from pathlib import Path
from msaexplorer import explore, export

aln = explore.MSA(str(Path('example_alignments') / 'DNA.fasta'), zoom_range=(0, 300))
aln.reference_id = aln.sequence_ids[0]

variants = aln.get_snps()
entropy = aln.calc_entropy()
consensus = aln.get_consensus()

# return as strings
vcf_text = export.snps(variants, format_type='vcf')
stats_text = export.stats(entropy)
fasta_text = export.fasta(consensus, header='consensus_dna')

# or write to disk (path argument)
# export.snps(variants, format_type='tabular', path='results/snps')
# export.stats(entropy, path='results/entropy.tsv')

Notes

  • zoom can be reset with aln.zoom = None.
  • If no reference_id is set, methods that need a reference use the consensus.
  • For API-level details, inspect module/class docstrings in explore, draw, and export.
  1r"""
  2# MSAexplorer
  3
  4MSAexplorer is a lightweight toolkit to **explore**, **plot**, and **export** multiple sequence alignments.
  5The API is organized into three modules:
  6
  7- `explore`: parse alignments/annotations and compute statistics.
  8- `draw`: create matplotlib plots directly from `MSA` objects or file paths.
  9- `export`: serialize computed results (SNPs, stats, FASTA, ORFs, recovery tables).
 10
 11All examples below are tested with files from `example_alignments/`.
 12
 13## Quick Start (`explore`)
 14
 15```python
 16from pathlib import Path
 17from msaexplorer import explore
 18
 19base = Path('example_alignments')
 20aln = explore.MSA(str(base / 'DNA.fasta'))
 21
 22# set reference and zoom window (start inclusive, end exclusive)
 23aln.reference_id = aln.sequence_ids[0]
 24aln.zoom = (0, 300)
 25
 26print(aln.aln_type)        # 'DNA'
 27print(len(aln), aln.length)
 28print(aln.get_reference_coords())
 29```
 30
 31### Biopython Interoperability
 32
 33```python
 34from pathlib import Path
 35from Bio import AlignIO, SeqIO
 36from msaexplorer import explore
 37
 38base = Path('example_alignments')
 39
 40# alignment from Bio.Align.MultipleSeqAlignment
 41bio_aln = AlignIO.read(str(base / 'DNA.fasta'), 'fasta')
 42aln = explore.MSA(bio_aln, reference_id=bio_aln[0].id, zoom_range=(0, 200))
 43
 44# annotation from Bio.SeqIO GenBank iterator
 45gb_iter = SeqIO.parse(str(base / 'DNA_RNA.gb'), 'genbank')
 46ann = explore.Annotation(aln, gb_iter)
 47print(ann.ann_type, list(ann.features.keys())[:3])
 48```
 49
 50### Working with Downstream Dataclasses
 51
 52```python
 53from pathlib import Path
 54from msaexplorer import explore
 55
 56aln = explore.MSA(str(Path('example_alignments') / 'DNA.fasta'), zoom_range=(0, 300))
 57aln.reference_id = aln.sequence_ids[0]
 58
 59entropy = aln.calc_entropy()                              # AlignmentStats
 60length_stats = aln.calc_length_stats()                    # LengthStats
 61dist_to_ref = aln.calc_pairwise_distance_to_reference()   # PairwiseDistance
 62variants = aln.get_snps()                                 # VariantCollection
 63orfs = aln.get_non_overlapping_conserved_orfs(90)         # OrfCollection
 64
 65print(entropy.stat_name, entropy.positions[:3], entropy.values[:3])
 66print(length_stats.mean_length, length_stats.std_length)
 67print(dist_to_ref.reference_id, dist_to_ref.sequence_ids[:2], dist_to_ref.distances[:2])
 68print(variants.chrom, len(variants))
 69print(orfs.keys()[:3])
 70```
 71
 72## Plotting (`draw`)
 73
 74All plotting functions return a matplotlib `Axes`.
 75You can either:
 76
 771. pass an existing axis (best for multi-panel figures), or
 782. pass only an alignment/path for a one-liner plot.
 79
 80### One-Liner Examples (path input)
 81
 82```python
 83from msaexplorer import draw
 84import matplotlib.pyplot as plt
 85
 86draw.identity_alignment('example_alignments/DNA.fasta')
 87plt.show()
 88draw.stat_plot('example_alignments/DNA.fasta', stat_type='entropy', rolling_average=5)
 89plt.show()
 90```
 91
 92### Multi-Panel Figure with Main Plot Types
 93
 94```python
 95# import necessary packages
 96from pathlib import Path
 97import matplotlib.pyplot as plt
 98from msaexplorer import explore, draw
 99
100# Example 1
101# load DNA alignment
102base = Path('example_alignments')
103aln = explore.MSA(str(base / 'DNA.fasta'), zoom_range=(0, 100))
104
105# ini figure
106fig, ax = plt.subplots(nrows=3, ncols=1, figsize=(14, 15), height_ratios=[1, 1, 10])
107# plot
108draw.stat_plot(aln, ax=ax[0], stat_type='coverage', rolling_average=1, show_title=True)
109draw.sequence_logo(aln, ax=ax[1], plot_type='logo')
110draw.identity_alignment(aln, ax=ax[2], show_consensus=True, show_seq_names=True, fancy_gaps=True, show_legend=True, color_scheme='standard', show_identity_sequence=True)
111
112plt.tight_layout()
113plt.show()
114
115# Example 2
116# load AA alignment
117aln = explore.MSA(str(base / 'AS.fasta'))
118# ini figure
119fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(15, 5), height_ratios=[1, 2])
120
121draw.stat_plot(aln, ax=ax[0], stat_type='entropy', rolling_average=5, show_title=True)
122draw.identity_alignment(aln, ax=ax[1], show_consensus=True, show_seq_names=True, fancy_gaps=True, show_legend=True, color_scheme='hydrophobicity')
123
124plt.tight_layout()
125plt.show()
126```
127
128## Export (`export`)
129
130```python
131from pathlib import Path
132from msaexplorer import explore, export
133
134aln = explore.MSA(str(Path('example_alignments') / 'DNA.fasta'), zoom_range=(0, 300))
135aln.reference_id = aln.sequence_ids[0]
136
137variants = aln.get_snps()
138entropy = aln.calc_entropy()
139consensus = aln.get_consensus()
140
141# return as strings
142vcf_text = export.snps(variants, format_type='vcf')
143stats_text = export.stats(entropy)
144fasta_text = export.fasta(consensus, header='consensus_dna')
145
146# or write to disk (path argument)
147# export.snps(variants, format_type='tabular', path='results/snps')
148# export.stats(entropy, path='results/entropy.tsv')
149```
150
151## Notes
152
153- `zoom` can be reset with `aln.zoom = None`.
154- If no `reference_id` is set, methods that need a reference use the consensus.
155- For API-level details, inspect module/class docstrings in `explore`, `draw`, and `export`.
156
157"""
158
159from importlib.metadata import version, PackageNotFoundError
160
161try:
162    __version__ = version("msaexplorer")
163except PackageNotFoundError:
164    __version__ = "unknown"