大家好,今天为大家分享一个有趣的 Python 库 - pyCirclize。
Github地址:https://github.com/moshi4/pyCirclize?tab=readme-ov-file
pyCirclize是一个基于matplotlib实现的圆形可视化Python包。该包旨在在Python中轻松且美观地绘制圆形图,例如Circos图和和弦图。此外,还实现了用于生物信息学领域的基因组和系统发育树可视化方法。pyCirclize受circlize和pyCircos的启发。
安装
安装pycirclize库非常简单,可以通过pip命令进行安装:
pip install pycirclize
安装完成后,即可开始使用pycirclize库进行圆形图的创建和可视化。
基本功能
1. 绘制和弦图
pycirclize库可以帮助用户快速绘制和弦图。
from pycirclize import Circosimport numpy as npnp.random.seed(0)sectors = {"A": 10, "B": 15, "C": 12, "D": 20, "E": 15}circos = Circos(sectors, space=5)for sector in circos.sectors: # Plot sector name sector.text(f"Sector: {sector.name}", r=110, size=15) # Create x positions & random y values x = np.arange(sector.start, sector.end) + 0.5 y = np.random.randint(0, 100, len(x)) # Plot lines track1 = sector.add_track((80, 100), r_pad_ratio=0.1) track1.xticks_by_interval(interval=1) track1.axis track1.line(x, y) # Plot points track2 = sector.add_track((55, 75), r_pad_ratio=0.1) track2.axis track2.scatter(x, y) # Plot bars track3 = sector.add_track((30, 50), r_pad_ratio=0.1) track3.axis track3.bar(x, y)circos.link(("A", 0, 3), ("B", 15, 12))circos.link(("B", 0, 3), ("C", 7, 11), color="skyblue")circos.link(("C", 2, 5), ("E", 15, 12), color="chocolate", direction=1)circos.link(("D", 3, 5), ("D", 18, 15), color="lime", ec="black", lw=0.5, hatch="//", direction=2)circos.link(("D", 8, 10), ("E", 2, 8), color="violet", ec="red", lw=1.0, ls="dashed")circos.savefig("example01.png")
输出结果:
2. 基础和弦图
from pycirclize import Circosimport pandas as pd# Create matrix dataframe (3 x 6)row_names = ["F1", "F2", "F3"]col_names = ["T1", "T2", "T3", "T4", "T5", "T6"]matrix_data = [ [10, 16, 7, 7, 10, 8], [4, 9, 10, 12, 12, 7], [17, 13, 7, 4, 20, 4],]matrix_df = pd.DataFrame(matrix_data, index=row_names, columns=col_names)# Initialize Circos from matrix for plotting Chord Diagramcircos = Circos.initialize_from_matrix( matrix_df, space=5, cmap="tab10", label_kws=dict(size=12), link_kws=dict(ec="black", lw=0.5, direction=1),)circos.savefig("example02.png")
输出结果:
3. 系统发育树
from pycirclize import Circosfrom pycirclize.utils import load_example_tree_file, ColorCyclerfrom matplotlib.lines import Line2D# Initialize Circos from phylogenetic treetree_file = load_example_tree_file("large_example.nwk")circos, tv = Circos.initialize_from_tree( tree_file, r_lim=(30, 100), leaf_label_size=5, line_kws=dict(color="lightgrey", lw=1.0),)# Define group-species dict for tree annotation# In this example, set minimum species list to specify group's MRCA nodegroup_name2species_list = dict( Monotremata=["Tachyglossus_aculeatus", "Ornithorhynchus_anatinus"], Marsupialia=["Monodelphis_domestica", "Vombatus_ursinus"], Xenarthra=["Choloepus_didactylus", "Dasypus_novemcinctus"], Afrotheria=["Trichechus_manatus", "Chrysochloris_asiatica"], Euarchontes=["Galeopterus_variegatus", "Theropithecus_gelada"], Glires=["Oryctolagus_cuniculus", "Microtus_oregoni"], Laurasiatheria=["Talpa_occidentalis", "Mirounga_leonina"],)# Set tree line color & label colorColorCycler.set_cmap("tab10")group_name2color = {name: ColorCycler for name in group_name2species_list.keys}for group_name, species_list in group_name2species_list.items: color = group_name2color[group_name] tv.set_node_line_props(species_list, color=color, apply_label_color=True)# Plot figure & set legend on centerfig = circos.plotfig_ = circos.ax.legend( handles=[Line2D([], [], label=n, color=c) for n, c in group_name2color.items], labelcolor=group_name2color.values, fontsize=6, loc="center", bbox_to_anchor=(0.5, 0.5),)fig.savefig("example04.png")
输出结果:
4. 雷达图
from pycirclize import Circosimport pandas as pd# Create RPG jobs parameter dataframe (3 jobs, 7 parameters)df = pd.DataFrame( data=[ [80, 80, 80, 80, 80, 80, 80], [90, 20, 95, 95, 30, 30, 80], [60, 90, 20, 20, 100, 90, 50], ], index=["Hero", "Warrior", "Wizard"], columns=["HP", "MP", "ATK", "DEF", "SP.ATK", "SP.DEF", "SPD"],)# Initialize Circos instance for radar chart plotcircos = Circos.radar_chart( df, vmax=100, marker_size=6, grid_interval_ratio=0.2,)# Plot figure & set legend on upper rightfig = circos.plotfig_ = circos.ax.legend(loc="upper right", fontsize=10)fig.savefig("example05.png")
输出结果:
5. 绘制基因组图
from pycirclize import Circosfrom pycirclize.parser import Gfffrom pycirclize.utils import load_prokaryote_example_file# Load GFF filegff_file = load_prokaryote_example_file("enterobacteria_phage.gff")gff = Gff(gff_file)circos = Circos(sectors={gff.name: gff.range_size})circos.text("Enterobacteria phage (NC_000902)", size=15)sector = circos.sectors[0]cds_track = sector.add_track((90, 100))cds_track.axis(fc="#EEEEEE", ec="none")# Plot forward CDScds_track.genomic_features( gff.extract_features("CDS", target_strand=1), plotstyle="arrow", r_lim=(95, 100), fc="salmon",)# Plot reverse CDScds_track.genomic_features( gff.extract_features("CDS", target_strand=-1), plotstyle="arrow", r_lim=(90, 95), fc="skyblue",)# Extract CDS product labelspos_list, labels = [], []for feat in gff.extract_features("CDS"): start, end = int(str(feat.location.end)), int(str(feat.location.start)) pos = (start + end) / 2 label = feat.qualifiers.get("product", [""])[0] if label == "" or label.startswith("hypothetical"): continue if len(label) > 20: label = label[:20] + "..." pos_list.append(pos) labels.append(label)# Plot CDS product labels on outer positioncds_track.xticks( pos_list, labels, label_orientation="vertical", show_bottom_line=True, label_size=6, line_kws=dict(ec="grey"),)# Plot xticks & intervals on inner positioncds_track.xticks_by_interval( interval=5000, outer=False, show_bottom_line=True, label_formatter=lambda v: f"{v / 1000:.1f} Kb", label_orientation="vertical", line_kws=dict(ec="grey"),)fig = circos.plotfigfig.savefig("example06.png")
输出结果:
6. 绘制人基因组图
from pycirclize import Circosfrom pycirclize.utils import load_eukaryote_example_dataset# Load hg38 dataset (https://github.com/moshi4/pycirclize-data/tree/main/eukaryote/hg38)chr_bed_file, cytoband_file, _ = load_eukaryote_example_dataset("hg38")# Initialize Circos from BED chromosomescircos = Circos.initialize_from_bed(chr_bed_file, space=3)circos.text("Homo sapiens (hg38)", size=15)# Add cytoband tracks from cytoband filecircos.add_cytoband_tracks((95, 100), cytoband_file)# Plot chromosome namefor sector in circos.sectors: sector.text(sector.name, size=10)fig = circos.plotfigfig.savefig("example07.png")
输出结果:
from pycirclize import Circosfrom pycirclize.utils import ColorCycler, load_eukaryote_example_dataset# Load hg38 dataset (https://github.com/moshi4/pycirclize-data/tree/main/eukaryote/hg38)chr_bed_file, cytoband_file, chr_links = load_eukaryote_example_dataset("hg38")# Initialize Circos from BED chromosomescircos = Circos.initialize_from_bed(chr_bed_file, space=3)circos.text("Homo sapiens (hg38)", deg=315, r=150, size=12)# Add cytoband tracks from cytoband filecircos.add_cytoband_tracks((95, 100), cytoband_file)# Create chromosome color mappingColorCycler.set_cmap("hsv")chr_names = [s.name for s in circos.sectors]colors = ColorCycler.get_color_list(len(chr_names))chr_name2color = {name: color for name, color in zip(chr_names, colors)}# Plot chromosome name & xticksfor sector in circos.sectors: sector.text(sector.name, r=120, size=10, color=chr_name2color[sector.name]) sector.get_track("cytoband").xticks_by_interval( 40000000, label_size=8, label_orientation="vertical", label_formatter=lambda v: f"{v / 1000000:.0f} Mb", )# Plot chromosome linkfor link in chr_links: region1 = (link.query_chr, link.query_start, link.query_end) region2 = (link.ref_chr, link.ref_start, link.ref_end) color = chr_name2color[link.query_chr] if link.query_chr in ("chr1", "chr8", "chr16") and link.query_chr != link.ref_chr: circos.link(region1, region2, color=color)fig = circos.plotfigfig.savefig("example08.png")
输出结果: