序言:
????Python的可視化工具,以下截圖,均以展示圖表實例,如需了解部分對象的輸出結果,可參照我Github上的代碼,3Q??
【課程3.6】 基本圖表繪制 plt.plot()
????圖表類別:線形圖、柱狀圖、密度圖,以橫縱坐標兩個維度為主
同時可延展出多種其他圖表樣式
plt.plot(kind='line', ax=None, figsize=None, use_index=True, title=None, grid=None, legend=False,
style=None, logx=False, logy=False, loglog=False, xticks=None, yticks=None, xlim=None, ylim=None,
rot=None, fontsize=None, colormap=None, table=False, yerr=None, xerr=None, label=None, secondary_y=False, **kwds)
Series直接生成圖表
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# Series直接生成圖表
ts = pd.Series(np.random.randn(1000), index = pd.date_range("1/1/2000", periods = 1000))
ts = ts.cumsum()
ts.plot(kind = "line",
label = "heheheh", # Series需要Lable
style = "--g.",
color = "red",
alpha = 0.4,
use_index = True,
rot = 45,
grid = True,
ylim = [-50, 50],
yticks = list(range(-50, 50, 10)),
figsize = (8,4),
title = "test",
legend = True)
# plt.grid(True, linestyle = "--",color = "gray", linewidth = "0.5",axis = 'x') # 網格
plt.legend()
# Series.plot():series的index為橫坐標,value為縱坐標
# kind → line,bar,barh...(折線圖,柱狀圖,柱狀圖-橫...)
# label → 圖例標簽,Dataframe格式以列名為label
# style → 風格字符串,這里包括了linestyle(-),marker(.),color(g)
# color → 顏色,有color指定時候,以color顏色為準
# alpha → 透明度,0-1
# use_index → 將索引用為刻度標簽,默認為True
# rot → 旋轉刻度標簽,0-360
# grid → 顯示網格,一般直接用plt.grid
# xlim,ylim → x,y軸界限
# xticks,yticks → x,y軸刻度值
# figsize → 圖像大小
# title → 圖名
# legend → 是否顯示圖例,一般直接用plt.legend()
# 也可以 → plt.plot()
DataFrame直接生成圖表
# DataFrame直接生成圖表
df = pd.DataFrame(np.random.randn(1000, 4), index = ts.index, columns = list("ABCD"))
df = df.cumsum()
df.plot(kind = "line",
style = "--.",
alpha = 0.4,
use_index = True,
rot = 45,
grid = True,
figsize = (8,4),
title = "test",
legend = True,
subplots = False,
colormap = "Greens")
# subplots -> 是否將各個列繪制到不同圖表,默認False
# 也可以 -> plt.plot(df)
【課程3.7】 柱狀圖、堆疊圖
????plt.plot(kind='bar/barh') , plt.bar()
柱狀圖與堆疊圖
# 柱狀圖與堆疊圖
fig, axes = plt.subplots(4,1,figsize = (10,10))
s = pd.Series(np.random.randint(0,10,16), index = list("abcdefghijklmnop"))
df = pd.DataFrame(np.random.rand(10,3), columns = ["a", "b", "c"])
s.plot(kind = "bar", color = "k", grid = True, alpha = 0.5, ax = axes[0]) # ax參數 -> 選擇第幾個子圖
# 單系列柱狀圖方法一:plt.plot(kind = "bar/barh")
df.plot(kind = "bar", ax = axes[1], grid = True, colormap = "Reds_r")
# 多序列柱狀圖
df.plot(kind = "bar", ax = axes[2], grid = True, colormap = "Blues_r", stacked = True)
# 多系列堆疊圖
# stacked -> 堆疊
df.plot.barh(ax = axes[3], grid = True, stacked = True, colormap = "BuGn_r")
# 新版本plt.plot.<kind>
柱狀圖 plt.bar()
# 柱狀圖 plt.bar()
plt.figure(figsize=(10,4))
x = np.arange(10)
y1 = np.random.rand(10)
y2 = -np.random.rand(10)
plt.bar(x, y1, width = 1, facecolor = "yellowgreen", edgecolor = "white", yerr = y1*0.1)
plt.bar(x, y2, width = 1, facecolor = "lightskyblue", edgecolor = "white", yerr = y2*0.1)
# x,y參數:x,y值
# width:寬度比例
# facecolor柱狀圖里填充的顏色、edgecolor是邊框的顏色
# left-每個柱x軸左邊界,bottom-每個柱y軸下邊界 → bottom擴展即可化為甘特圖 Gantt Chart
# align:決定整個bar圖分布,默認left表示默認從左邊界開始繪制,center會將圖繪制在中間位置
# xerr/yerr :x/y方向error bar
for i,j in zip(x,y1):
plt.text(i+0.3, j-0.15, "%.2f" %j, color = "k")
for i,j in zip(x,y2):
plt.text(i+0.3, j+0.05, "%.2f" %-j, color = "k")
外嵌圖表plt.table()
# 外嵌圖表plt.table()
# table(cellText=None, cellColours=None,cellLoc='right', colWidths=None,rowLabels=None, rowColours=None, rowLoc='left',
# colLabels=None, colColours=None, colLoc='center',loc='bottom', bbox=None)
data = [[ 66386, 174296, 75131, 577908, 32015],
[ 58230, 381139, 78045, 99308, 160454],
[ 89135, 80552, 152558, 497981, 603535],
[ 78415, 81858, 150656, 193263, 69638],
[139361, 331509, 343164, 781380, 52269]]
columns = ('Freeze', 'Wind', 'Flood', 'Quake', 'Hail')
rows = ['%d year' % x for x in (100, 50, 20, 10, 5)]
df = pd.DataFrame(data,columns = ('Freeze', 'Wind', 'Flood', 'Quake', 'Hail'),
index = ['%d year' % x for x in (100, 50, 20, 10, 5)])
print(df)
df.plot(kind='bar',grid = True,colormap='Blues_r',stacked=True,figsize=(8,3))
# 創建堆疊圖
plt.table(cellText = data,
cellLoc='center',
cellColours = None,
rowLabels = rows,
rowColours = plt.cm.BuPu(np.linspace(0, 0.5,5))[::-1], # BuPu可替換成其他colormap
colLabels = columns,
colColours = plt.cm.Reds(np.linspace(0, 0.5,5))[::-1],
rowLoc='right',
loc='bottom')
# cellText:表格文本
# cellLoc:cell內文本對齊位置
# rowLabels:行標簽
# colLabels:列標簽
# rowLoc:行標簽對齊位置
# loc:表格位置 → left,right,top,bottom
plt.xticks([])
# 不顯示x軸標注
【課程3.8】 面積圖、填圖、餅圖
????plt.plot.area()
????plt.fill(), plt.fill_between()
????plt.pie()
面積圖
# 面積圖
fig, axes = plt.subplots(2, 1, figsize = (8,6))
df1 = pd.DataFrame(np.random.rand(10, 4), columns = ["a", "b", "c", "d"])
df2 = pd.DataFrame(np.random.randn(10, 4), columns = ["a", "b", "c", "d"])
df1.plot.area(colormap = "Greens_r", alpha = 0.5, ax = axes[0])
df2.plot.area(stacked = False, colormap = "Set2", alpha = 0.5, ax = axes[1])
# 使用Series.plot.area()和DataFrame.plot.area()創建面積圖
# stacked:是否堆疊,默認情況下,區域圖被堆疊
# 為了產生堆積面積圖,每列必須是正值或全部負值!
# 當數據有NaN時候,自動填充0,所以圖標簽需要清洗掉缺失值
填圖
# 填圖
fig, axes = plt.subplots(2, 1, figsize = (8,6))
x = np.linspace(0, 1, 500)
y1 = np.sin(4 * np.pi * x) * np.exp(-5 * x)
y2 = -np.sin(4 * np.pi * x) * np.exp(-5 * x)
axes[0].fill(x, y1, "r", alpha = 0.5, label = "y1")
axes[0].fill(x, y2, "g", alpha = 0.5, label = "y2")
# 對函數與坐標軸之間的區域進行填充,使用fill函數
# 也可寫成:plt.fill(x, y1, "r", x, y2, "g", alpha = 0.5)
x = np.linspace(0, 5 * np.pi, 1000)
y1 = np.sin(x)
y2 = np.sin(2 * x)
axes[1].fill_between(x, y1, y2, color = "b", alpha = 0.5, label = "area")
# 填充兩個函數之間的區域,使用fill_between函數
for i in range(2):
axes[i].legend()
axes[i].grid()
# 添加圖例、網格
餅圖 plt.pie()
# 餅圖 plt.pie()
# plt.pie(x, explode=None, labels=None, colors=None, autopct=None, pctdistance=0.6, shadow=False, labeldistance=1.1, startangle=None,
# radius=None, counterclock=True, wedgeprops=None, textprops=None, center=(0, 0), frame=False, hold=None, data=None)
s = pd.Series(3 * np.random.rand(4), index = ["a", "b", "c", "d"], name = "series")
plt.axis("equal") # 保證長寬相等
plt.pie(s,
explode = [0.1, 0, 0, 0],
labels = s.index,
colors = ["r", "g", "b", "c"],
autopct = ".2f%%",
pctdistance = 0.6,
labeldistance = 1.2,
shadow = True,
startangle = 0,
radius = 1.5,
frame = False)
print(s)
# 第一個參數:數據
# explode:指定每部分的偏移量
# labels:標簽
# colors:顏色
# autopct:餅圖上的數據標簽顯示方式
# pctdistance:每個餅切片的中心和通過autopct生成的文本開始之間的比例
# labeldistance:被畫餅標記的直徑,默認值:1.1
# shadow:陰影
# startangle:開始角度
# radius:半徑
# frame:圖框
# counterclock:指定指針方向,順時針或者逆時針
【課程3.9】 直方圖
????plt.hist(x, bins=10, range=None, normed=False, weights=None, cumulative=False, bottom=None,
histtype='bar', align='mid', orientation='vertical',rwidth=None, log=False, color=None, label=None,
stacked=False, hold=None, data=None, **kwargs)
直方圖+密度圖
# 直方圖+密度圖
s = pd.Series(np.random.randn(1000))
s.hist(bins = 20,
histtype = "bar",
align = "mid",
orientation = "vertical",
alpha = 0.5,
normed = True)
# bin:箱子的寬度
# normed 標準化
# histtype 風格,bar,barstacked,step,stepfilled
# orientation 水平還是垂直{"horizontal", "vertical"}
# align:{"left", "mid", "right"}, optional(對齊方式)
s.plot(kind = "kde", style = "k--")
# 密度圖
堆疊直方圖
# 堆疊直方圖
plt.figure(num=1)
df = pd.DataFrame({"a": np.random.randn(1000) + 1,
"b": np.random.randn(1000),
"c": np.random.randn(1000) - 1,
"d": np.random.randn(1000) - 2},
columns = ["a", "b", "c", "d"])
df.plot.hist(stacked = True,
bins = 20,
colormap = "Greens_r",
alpha = 0.5,
grid = True)
# 使用DataFrame.plot.hist()和Series.plot.hist()方法繪制
# stacked:是否堆疊
df.hist(bins = 50)
# 生成多個直方圖
【課程3.10】 散點圖、矩陣散點圖
????plt.scatter(), pd.scatter_matrix()
plt.scatter()散點圖
# plt.scatter()散點圖
# plt.scatter(x, y, s=20, c=None, marker='o', cmap=None, norm=None, vmin=None, vmax=None,
# alpha=None, linewidths=None, verts=None, edgecolors=None, hold=None, data=None, **kwargs)
plt.figure(figsize = (8,6))
x = np.random.randn(1000)
y = np.random.randn(1000)
plt.scatter(x, y, marker = ".",
s = np.random.randn(1000)*100,
cmap = "Reds",
c = y,
alpha = 0.8)
plt.grid()
# s:散點的大小
# c:散點的顏色
# vmin,vmax:高度設置,標量
# cmap:colormap
pd.scatter_matrix()散點矩陣
# pd.scatter_matrix()散點矩陣
# pd.scatter_matrix(frame, alpha=0.5, figsize=None, ax=None,
# grid=False, diagonal='hist', marker='.', density_kwds=None, hist_kwds=None, range_padding=0.05, **kwds)
df = pd.DataFrame(np.random.randn(100, 4), columns = ["a", "b", "c", "d"])
pd.scatter_matrix(df, figsize = (10,6),
marker = "o",
diagonal = "kde",
alpha = 0.5,
range_padding = 0.1)
# diagonal:({‘hist’, ‘kde’}),必須且只能在{‘hist’, ‘kde’}中選擇1個 → 每個指標的頻率圖
# range_padding:(float, 可選),圖像在x軸、y軸原點附近的留白(padding),該值越大,留白距離越大,圖像遠離坐標原點
【課程3.11】 極坐標圖
????調用subplot()創建子圖時通過設置projection='polar',便可創建一個極坐標子圖,然后調用plot()在極坐標子圖中繪圖
創建極坐標軸
# 創建極坐標軸
s = pd.Series(np.arange(20))
theta = np.arange(0, 2*np.pi, 0.02)
print(s.head())
print(theta[:10])
# 創建數據
fig = plt.figure(figsize = (8,4))
ax1 = plt.subplot(121, projection = "polar")
ax2 = plt.subplot(122)
# 創建極坐標子圖
# 還可以寫:ax = fig.add_subplot(111, polar = True)
ax1.plot(theta, theta*3, linestyle = "--", lw = 1)
ax1.plot(s, linestyle = "--", marker = ".", lw = 2)
ax2.plot(theta, theta*3, linestyle = "--", lw = 1)
ax2.plot(s)
plt.grid()
# 創建極坐標圖,參數1為角度(弧度制),參數2為value
# lw -> 線寬
極坐標參數設置
# 極坐標參數設置
theta = np.arange(0, 2*np.pi, 0.02)
plt.figure(figsize = (8,4))
ax1 = plt.subplot(121, projection = "polar")
ax2 = plt.subplot(122, projection = "polar")
ax1.plot(theta, theta/6, "--", lw = 2)
ax2.plot(theta, theta/6, "--", lw = 2)
# 創建極坐標子圖ax
ax2.set_theta_direction(-1)
# set_theta_direction():坐標軸正方向,默認1逆時針,如果是-1為順時針
ax2.set_thetagrids(np.arange(0.0, 360.0, 90), ["a", "b","c", "d"])
ax2.set_rgrids(np.arange(0.2, 2, 0.4))
# set_thetagrids():設置極坐標角度網格線顯示及標簽 -> 網格和標簽數量一致
# set_rgrids():設置極經網格線顯示,其中參數必須是正數
ax2.set_theta_offset(np.pi/2)
# set_theta_offset():設置角度偏移,逆時針,弧度制
ax2.set_rlim(0.2, 1.2)
ax2.set_rmax(2)
ax2.set_rticks(np.arange(0.1, 1.5, 0.2))
# set_rlim():設置顯示的極徑范圍
# set_rmax():設置顯示的極徑最大值
# set_rticks():設置極徑網格線的顯示范圍
雷達圖1 - 極坐標的折線圖/填圖 - plt.plot()
# 雷達圖1 - 極坐標的折線圖/填圖 - plt.plot()
plt.figure(figsize = (8,4))
ax1 = plt.subplot(111, projection = "polar")
ax1.set_title("radar map\n") # 創建標題
ax1.set_rlim(0, 12)
data1 = np.random.randint(1, 10, 10)
data2 = np.random.randint(1, 10, 10)
data3 = np.random.randint(1, 10, 10)
theta = np.arange(0, 2*np.pi, 2*np.pi/10)
# 創建數據
ax1.plot(theta, data1, ".--", label = "data1")
ax1.fill(theta, data1, alpha = 0.2)
ax1.plot(theta, data2, ".--", label = "data2")
ax1.fill(theta, data2, alpha = 0.2)
ax1.plot(theta, data3, ".--", label = "data3")
ax1.fill(theta, data3, alpha = 0.2)
# 繪制雷達線
雷達圖2 - 極坐標的折線圖/填圖 - plt.polar()
# 雷達圖2 - 極坐標的折線圖/填圖 - plt.polar()
# 首尾閉合
labels = np.array(["a", "b", "c", "d", "e", "f"]) # 標簽
dataLenth = 6 # 數據長度
data1 = np.random.randint(0, 10, 6)
data2 = np.random.randint(0, 10, 6) # 數據
angles = np.linspace(0, 2*np.pi, dataLenth, endpoint = False) # 分割圓周長
data1 = np.concatenate((data1, [data1[0]])) # 閉合
data2 = np.concatenate((data2, [data2[0]])) # 閉合
angles = np.concatenate((angles, [angles[0]])) # 閉合
plt.polar(angles, data1, "o-", linewidth = 1) # 做極坐標系
plt.fill(angles, data1, alpha = 0.25) # 填充
plt.polar(angles, data2, "o-", linewidth = 1) # 做極坐標系
plt.fill(angles, data2, alpha = 0.25) # 填充
plt.thetagrids(angles * 180/np.pi, labels) # 設置網格、標簽
plt.ylim(0, 10) # polar的極值設置為ylim
極軸圖 - 極坐標的柱狀圖
# 極軸圖 - 極坐標的柱狀圖
plt.figure(figsize = (8,4))
ax1 = plt.subplot(111, projection = "polar")
ax1.set_title("radar map\n") # 創建標題
ax1.set_rlim(0, 12)
data = np.random.randint(1, 10, 10)
theta = np.arange(0, 2*np.pi, 2*np.pi / 10)
# 創建數據
bar = ax1.bar(theta, data, alpha = 0.5)
for r,bar in zip(data, bar):
bar.set_facecolor(plt.cm.jet(r/10.)) # 設置顏色
plt.thetagrids(np.arange(0.0, 360.0, 90), []) # 設置網格、標簽(這里是空標簽,則不顯示內容)
【課程3.12】 箱型圖
????箱型圖:又稱為盒須圖、盒式圖、盒狀圖或箱線圖,是一種用作顯示一組數據分散情況資料的統計圖
包含一組數據的:最大值、最小值、中位數、上四分位數(Q3)、下四分位數(Q1)、異常值
① 中位數 → 一組數據平均分成兩份,中間的數
② 上四分位數Q1 → 是將序列平均分成四份,計算(n+1)/4與(n-1)/4兩種,一般使用(n+1)/4
③ 下四分位數Q3 → 是將序列平均分成四份,計算(1+n)/4*3=6.75
④ 內限 → T形的盒須就是內限,最大值區間Q3+1.5IQR,最小值區間Q1-1.5IQR (IQR=Q3-Q1)
⑤ 外限 → T形的盒須就是內限,最大值區間Q3+3IQR,最小值區間Q1-3IQR (IQR=Q3-Q1)
⑥ 異常值 → 內限之外 - 中度異常,外限之外 - 極度異常
plt.plot.box(),plt.boxplot()
plt.plot.box()繪制
# plt.plot.box()繪制
fig, axes = plt.subplots(2, 1, figsize = (10,6))
df = pd.DataFrame(np.random.rand(10, 5), columns = ["A", "B", "C", "D", "E"])
color = dict(boxes = "DarkGreen", whiskers = "DarkOrange", medians = "DarkBlue", caps = "Gray")
# 箱型圖著色
# boxes → 箱線
# whiskers → 分位數與error bar橫線之間豎線的顏色
# medians → 中位數線顏色
# caps → error bar橫線顏色
df.plot.box(ylim = [0, 1.2],
grid = True,
color = color,
ax = axes[0])
# color:樣式填充
df.plot.box(vert = False,
positions = [1, 4, 5, 6, 8],
ax = axes[1],
grid = True,
color = color)
# vert:是否垂直,默認True
# position:箱型圖占位
plt.boxplot()繪制
# plt.boxplot()繪制
# pltboxplot(x, notch=None, sym=None, vert=None, whis=None, positions=None, widths=None, patch_artist=None, bootstrap=None,
# usermedians=None, conf_intervals=None, meanline=None, showmeans=None, showcaps=None, showbox=None, showfliers=None, boxprops=None,
# labels=None, flierprops=None, medianprops=None, meanprops=None, capprops=None, whiskerprops=None, manage_xticks=True, autorange=False,
# zorder=None, hold=None, data=None)
df = pd.DataFrame(np.random.rand(10, 5), columns=['A', 'B', 'C', 'D', 'E'])
plt.figure(figsize=(10,4))
# 創建圖表、數據
f = df.boxplot(sym = "o", # 異常點形狀,參考marker
vert = True, # 是否垂直
whis = 1.5, # IQR,默認1.5, 也可以設置區間比如[5, 95], 代表強制上下邊緣為數據95%和5%位置
patch_artist = True, # 上下四分位框內是否填充,True會填充
meanline = False, showmeans = True, # 是否有均值線及其形狀
showbox = True, # 是否顯示箱線
showcaps = True, # 是否顯示邊緣線
showfliers = True, # 是否顯示異常值
notch = False, # 中間箱體是否缺口
return_type = 'dict' # 返回類型為字典
)
plt.title("boxplot")
print(f)
for box in f["boxes"]:
box.set(color = "b", linewidth = 1) # 箱體邊框顏色
box.set(facecolor = "b", alpha = 0.5) # 箱體內部填充顏色
for whisker in f["whiskers"]:
whisker.set(color = "k", linewidth = 0.5, linestyle = "-")
for cap in f["caps"]:
cap.set(color = "red", linewidth = 2)
for median in f["medians"]:
median.set(color = "DarkBlue", linewidth = 2)
for flier in f["fliers"]:
flier.set(marker = "o", color = "y", alpha = 0.5)
# boxes, 箱線
# medians, 中位值的橫線,
# whiskers, 從box到error bar之間的豎線.
# fliers, 異常值
# caps, error bar橫線
# means, 均值的橫線,
plt.boxplot()繪制
# plt.boxplot()繪制
# 分組匯總
df = pd.DataFrame(np.random.rand(10,2), columns=['Col1', 'Col2'] )
df['X'] = pd.Series(['A','A','A','A','A','B','B','B','B','B'])
df['Y'] = pd.Series(['A','B','A','B','A','B','A','B','A','B'])
print(df.head())
df.boxplot(by = 'X')
df.boxplot(column=['Col1','Col2'], by=['X','Y'])
# columns:按照數據的列分子圖
# by:按照列分組做箱型圖