基本操作¶
数组创建¶
In [4]:
import numpy as np # Shift + Enter
In [7]:
# 创建可以将Python,中list列表转换成NumPy数组
l = [1,2,3,4,5]
print(type(l))
# NumPy数组
nd1 = np.array(l) # 输入一部分arr + tab(命令中自动补全,按键) 代码提示,自动补全
print(type(nd1))
print(nd1)
display(nd1) # 显示
<class 'list'> <class 'numpy.ndarray'> [1 2 3 4 5]
array([1, 2, 3, 4, 5])
In [8]:
nd2 = np.zeros(shape = (3,4),dtype = np.int16) # shift + tab提示方法的属性,使用
nd2
Out[8]:
array([[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0]], dtype=int16)
In [9]:
nd3 = np.ones(shape = (3,5),dtype=np.float32)
nd3 # juppyter中执行程序,代码,最后一行,默认就是输出
Out[9]:
array([[1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1.]], dtype=float32)
In [10]:
# 三维数组
nd4 = np.full(shape = (3,4,5),fill_value=3.1415926) # 生成任意指定的数组
nd4
Out[10]:
array([[[3.1415926, 3.1415926, 3.1415926, 3.1415926, 3.1415926],
[3.1415926, 3.1415926, 3.1415926, 3.1415926, 3.1415926],
[3.1415926, 3.1415926, 3.1415926, 3.1415926, 3.1415926],
[3.1415926, 3.1415926, 3.1415926, 3.1415926, 3.1415926]],
[[3.1415926, 3.1415926, 3.1415926, 3.1415926, 3.1415926],
[3.1415926, 3.1415926, 3.1415926, 3.1415926, 3.1415926],
[3.1415926, 3.1415926, 3.1415926, 3.1415926, 3.1415926],
[3.1415926, 3.1415926, 3.1415926, 3.1415926, 3.1415926]],
[[3.1415926, 3.1415926, 3.1415926, 3.1415926, 3.1415926],
[3.1415926, 3.1415926, 3.1415926, 3.1415926, 3.1415926],
[3.1415926, 3.1415926, 3.1415926, 3.1415926, 3.1415926],
[3.1415926, 3.1415926, 3.1415926, 3.1415926, 3.1415926]]])
In [11]:
nd5 = np.random.randint(0,100,size = 20) # 从0,到100,生成随机数字,int,整数
nd5
Out[11]:
array([29, 55, 81, 12, 39, 37, 37, 89, 54, 37, 92, 78, 83, 41, 11, 32, 11,
86, 91, 98])
In [12]:
nd6 = np.random.rand(3,5) # 生成0~1之间随机数
nd6
Out[12]:
array([[0.67036576, 0.43653046, 0.2979778 , 0.06837794, 0.54090946],
[0.27114164, 0.76483251, 0.12721327, 0.71820577, 0.54516333],
[0.64264774, 0.33046503, 0.36786702, 0.69014896, 0.8171368 ]])
In [13]:
nd7 = np.random.randn(3,5) # 正态分布,平均值是0,标准差是1
display(nd7)
array([[-0.42218425, 0.95115056, -0.41017739, -0.26602147, 0.49654654],
[ 1.71202519, 2.1225116 , -0.81943756, -0.80074587, 1.00700306],
[ 0.86081524, 0.34641142, 1.13366415, -0.63908059, -2.04486065]])
In [14]:
nd8 = np.random.normal(loc = 175,scale = 10,size = (3,5)) # 正态分布,平均值是175,标准差是10
print(nd8)
[[167.97722757 182.16512646 174.03773962 174.96646108 176.93450653] [172.50858374 191.6568226 179.85685052 172.49917994 157.18267321] [177.39827496 180.37539934 173.85067095 151.834833 192.72103757]]
In [15]:
nd9 = np.arange(1,100,step = 10) # 等差数列,左闭右开,100取不到
nd9
Out[15]:
array([ 1, 11, 21, 31, 41, 51, 61, 71, 81, 91])
In [16]:
nd10 = np.linspace(1,100,num = 19) # 等差数列,左闭右闭,num表示生成等差数列长度
nd10
Out[16]:
array([ 1. , 6.5, 12. , 17.5, 23. , 28.5, 34. , 39.5, 45. ,
50.5, 56. , 61.5, 67. , 72.5, 78. , 83.5, 89. , 94.5,
100. ])
查看数组属性¶
In [17]:
import numpy as np
nd = np.random.randn(5,3)
nd
Out[17]:
array([[-1.53114385, -0.11485466, -0.42988947],
[-0.15424231, -0.65266943, -1.03234285],
[ 0.74381265, -0.21856703, -0.43023787],
[-0.60374027, 1.41289403, 1.33099784],
[ 0.42553665, 1.49022796, -0.82975521]])
In [18]:
# 查看数组形状,返回了形状 shape = (5,3)
nd.shape
Out[18]:
(5, 3)
In [19]:
nd.dtype # 告诉数组的数据类型 float64 位,一位占一个0或者一个1
Out[19]:
dtype('float64')
In [20]:
nd.size # 尺寸,数组可以是多维的,请问,里面共有多少数据 3*5 = 15
Out[20]:
15
In [21]:
nd.ndim # 数组维度
Out[21]:
2
In [22]:
nd.itemsize # 条目 尺寸长度 8 字节
# 数据类型是float64 64位 -----> 1个字节8位-----> 64/8 = 8 字节
Out[22]:
8
文件读写¶
In [23]:
nd1 = np.random.randint(0,100,size = (3,5))
nd2 = np.random.randn(3,5)
display(nd1,nd2)
array([[90, 28, 67, 62, 80],
[20, 31, 87, 92, 93],
[53, 66, 32, 42, 52]])
array([[ 0.73576191, -0.58661205, -0.1080972 , -1.49771413, -0.38173688],
[ 0.00835928, 1.69569423, 0.22548728, -0.80018898, -0.3942879 ],
[ 0.19742953, 0.53589805, 1.10191471, 1.20453664, -1.60685986]])
In [24]:
np.save('./data',nd1) # 把一个数据存到文件中
In [25]:
np.load('./data.npy') # 默认添加npy后缀
Out[25]:
array([[90, 28, 67, 62, 80],
[20, 31, 87, 92, 93],
[53, 66, 32, 42, 52]])
In [26]:
# 多个数据存到一个文件中
np.savez('./data.npz',a = nd1,abc = nd2) # 保存数据是起名:a,abc,称为key,自己命名
In [27]:
data = np.load('./data.npz')
data
Out[27]:
<numpy.lib.npyio.NpzFile at 0x7ff4ddd0e1d0>
In [28]:
data['a'] # 单引号
Out[28]:
array([[90, 28, 67, 62, 80],
[20, 31, 87, 92, 93],
[53, 66, 32, 42, 52]])
In [29]:
data['abc']
Out[29]:
array([[ 0.73576191, -0.58661205, -0.1080972 , -1.49771413, -0.38173688],
[ 0.00835928, 1.69569423, 0.22548728, -0.80018898, -0.3942879 ],
[ 0.19742953, 0.53589805, 1.10191471, 1.20453664, -1.60685986]])
In [30]:
data['www'] # 没有保存,无法获取
--------------------------------------------------------------------------- KeyError Traceback (most recent call last) <ipython-input-30-16132569e0e7> in <module> ----> 1 data['www'] # 没有保存,无法获取 /usr/local/lib64/python3.6/site-packages/numpy/lib/npyio.py in __getitem__(self, key) 257 return self.zip.read(key) 258 else: --> 259 raise KeyError("%s is not a file in the archive" % key) 260 261 KeyError: 'www is not a file in the archive'
In [31]:
np.savez_compressed('./data2.npz',x = nd1,y = nd2)
In [32]:
np.load('./data2.npz')['x']
Out[32]:
array([[90, 28, 67, 62, 80],
[20, 31, 87, 92, 93],
[53, 66, 32, 42, 52]])
In [33]:
np.savetxt(fname = './data.txt',# 文件名
X = nd1, # 数据
fmt='%0.2f', # 格式
delimiter=',')# 分隔符
In [34]:
np.savetxt(fname = './data.cvs',# 文件名
X = nd1, # 数据
fmt='%d', # 格式
delimiter=';')# 分隔符
In [35]:
np.loadtxt('./data.cvs',delimiter=';')
Out[35]:
array([[90., 28., 67., 62., 80.],
[20., 31., 87., 92., 93.],
[53., 66., 32., 42., 52.]])
In [36]:
np.loadtxt('./data.txt',delimiter=',')
Out[36]:
array([[90., 28., 67., 62., 80.],
[20., 31., 87., 92., 93.],
[53., 66., 32., 42., 52.]])
数据类型¶
In [37]:
# int8,int16,int32,int64,uint8无符号
# float16,float32,float64
# str字符串类型
# int8 表示 2**8个数字 256个 -128 ~ 127 有符号
# uint8 表示256个数字,无符号,表明只有正数:0 ~ 255
np.array([2,4,7],dtype = np.int8)
Out[37]:
array([2, 4, 7], dtype=int8)
In [38]:
np.array([-3,-7,255,108,0,256],dtype = np.uint8)
Out[38]:
array([253, 249, 255, 108, 0, 0], dtype=uint8)
In [39]:
np.random.randint(0,100,size = 10,dtype = 'int64')
Out[39]:
array([60, 78, 84, 30, 45, 28, 47, 92, 69, 83])
In [40]:
nd = np.random.rand(10,2)
nd
Out[40]:
array([[0.44493108, 0.72024556],
[0.54671827, 0.42103622],
[0.06699006, 0.23754349],
[0.15899371, 0.07287394],
[0.31975899, 0.91496018],
[0.15010473, 0.72895954],
[0.13697461, 0.96091523],
[0.83054823, 0.2960615 ],
[0.1664575 , 0.22952562],
[0.35626474, 0.00976409]])
In [41]:
nd.dtype
Out[41]:
dtype('float64')
In [42]:
np.asarray(nd,dtype = 'float16')
Out[42]:
array([[0.4448 , 0.72 ],
[0.547 , 0.4211 ],
[0.067 , 0.2375 ],
[0.1589 , 0.0729 ],
[0.3198 , 0.915 ],
[0.1501 , 0.729 ],
[0.137 , 0.961 ],
[0.8306 , 0.2961 ],
[0.1665 , 0.2295 ],
[0.3562 , 0.009766]], dtype=float16)
In [43]:
nd.astype(dtype = np.float16)
Out[43]:
array([[0.4448 , 0.72 ],
[0.547 , 0.4211 ],
[0.067 , 0.2375 ],
[0.1589 , 0.0729 ],
[0.3198 , 0.915 ],
[0.1501 , 0.729 ],
[0.137 , 0.961 ],
[0.8306 , 0.2961 ],
[0.1665 , 0.2295 ],
[0.3562 , 0.009766]], dtype=float16)
In [44]:
nd = np.random.randn(1000,3) # 默认数据类型是float64
np.save('./data1',nd)
In [45]:
np.save('./data2',nd.astype('float16'))
In [46]:
nd2 = np.array(list('abcdefghi'))
nd2
Out[46]:
array(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i'], dtype='<U1')
In [47]:
nd2.dtype
Out[47]:
dtype('<U1')
数组运算¶
基本运算¶
In [48]:
# 加减乘除指数幂运算
nd1 = np.random.randint(0,10,size = 5)
nd2 = np.random.randint(0,10,size = 5)
display(nd1,nd2)
array([8, 3, 5, 8, 0])
array([8, 3, 3, 1, 7])
In [49]:
nd3 = nd1 - nd2 # 返回一个新对象,原来的数组,内容不变!
nd3 # nd3数组操作后,接收的对象
Out[49]:
array([ 0, 0, 2, 7, -7])
In [50]:
nd1 * nd2 # 乘法
Out[50]:
array([64, 9, 15, 8, 0])
In [51]:
nd1 / nd2 # 除法
Out[51]:
array([1. , 1. , 1.66666667, 8. , 0. ])
In [52]:
nd1**nd2 # 幂运算
Out[52]:
array([16777216, 27, 125, 8, 0])
In [53]:
2**3
Out[53]:
8
In [54]:
np.power(2,3) # 表示2的3次幂
Out[54]:
8
In [55]:
np.power(nd1,nd2) # 表示nd1的nd2次幂,对应位置,进行计算
Out[55]:
array([16777216, 27, 125, 8, 0])
In [56]:
np.log(100) # 底数是自然底数e 2.718
Out[56]:
4.605170185988092
In [57]:
np.log10(1000) # 对数运算返回结果是:3
Out[57]:
3.0
In [58]:
np.log2(1024) # 返回结果就是:10
Out[58]:
10.0
逻辑运算¶
In [59]:
display(nd1,nd2)
array([8, 3, 5, 8, 0])
array([8, 3, 3, 1, 7])
In [60]:
nd1 > nd2
Out[60]:
array([False, False, True, True, False])
In [61]:
nd1 < nd2
Out[61]:
array([False, False, False, False, True])
In [62]:
nd1 >= nd2 # 表示nd1数组中的数据,是否大于等于nd2中的对应位置的数据,如果大于等于,放回True
Out[62]:
array([ True, True, True, True, False])
In [63]:
nd1 == nd2 # 两个等号表示逻辑判断,问,是否相等
Out[63]:
array([ True, True, False, False, False])
数组与标量计算¶
In [64]:
nd1
Out[64]:
array([8, 3, 5, 8, 0])
In [65]:
# 数字3,4,5……都是标量
nd1 + 10 # 所有的位置都加了10,广播
Out[65]:
array([18, 13, 15, 18, 10])
nd1 - 1024
In [66]:
nd1 * 256
Out[66]:
array([2048, 768, 1280, 2048, 0])
In [67]:
nd1 / 1024
Out[67]:
array([0.0078125 , 0.00292969, 0.00488281, 0.0078125 , 0. ])
In [68]:
# 数组可以做分母,注意不能有0
1/nd1
/usr/local/lib/python3.6/site-packages/ipykernel_launcher.py:2: RuntimeWarning: divide by zero encountered in true_divide
Out[68]:
array([0.125 , 0.33333333, 0.2 , 0.125 , inf])
-= += *=直接改变原数组¶
In [69]:
1/np.array([1,3,0,5]) # 0不能作为分母计算结果:inf
/usr/local/lib/python3.6/site-packages/ipykernel_launcher.py:1: RuntimeWarning: divide by zero encountered in true_divide """Entry point for launching an IPython kernel.
Out[69]:
array([1. , 0.33333333, inf, 0.2 ])
In [70]:
display(nd1,nd2) # 没变化
array([8, 3, 5, 8, 0])
array([8, 3, 3, 1, 7])
In [71]:
nd1 -= 100 # 没有打印输出,说明,改变了原来的数组
In [72]:
nd1
Out[72]:
array([ -92, -97, -95, -92, -100])
In [73]:
nd2 +=100
nd2
Out[73]:
array([108, 103, 103, 101, 107])
In [74]:
nd1 *= 3
nd1
Out[74]:
array([-276, -291, -285, -276, -300])
In [75]:
# nd1 /= 10 数组不支持 /=
复制和视图¶
完全没有复制¶
In [76]:
a = np.random.randint(0,10,size = 5)
b = a # 赋值操作
display(a,b)
array([7, 1, 8, 0, 4])
array([7, 1, 8, 0, 4])
In [77]:
a is b # 返回True说明,赋值操作,a和b一回事
Out[77]:
True
In [78]:
a[0] = 1024 # 改变a那么b也发生了变化
display(a,b)
array([1024, 1, 8, 0, 4])
array([1024, 1, 8, 0, 4])
视图、查看或者浅拷贝¶
In [79]:
a = np.random.randint(0,100,size = 5)
b = a.view() # 视图,查看,浅拷贝
display(a,b)
array([12, 12, 6, 55, 10])
array([12, 12, 6, 55, 10])
In [80]:
a is b # 说明a和b不一样
Out[80]:
False
In [81]:
a.flags.owndata # a数组数据是自己的
Out[81]:
True
In [82]:
b.flags.owndata # b是浅拷贝a的数据,也就是b并不拥有自己的数据
Out[82]:
False
In [83]:
a[0] = 1024
b[1] = 2048 # 无论修改谁,最终结果两个数组都发生了变化
display(a,b)
array([1024, 2048, 6, 55, 10])
array([1024, 2048, 6, 55, 10])
深拷贝¶
In [84]:
a = np.random.randint(-100,0,size = 10)
b = a.copy() # 深拷贝,此时,a和b没有关系了
display(a,b)
array([-88, -25, -93, -3, -9, -77, -6, -72, -47, -39])
array([-88, -25, -93, -3, -9, -77, -6, -72, -47, -39])
In [85]:
display(a is b)
display(a.flags.owndata)
display(b.flags.owndata) # b 对象拥有自己的数据
False
True
True
In [86]:
a[0] = 1024
b[2] = 2048 # 井水不犯河水
display(a,b)
array([1024, -25, -93, -3, -9, -77, -6, -72, -47, -39])
array([ -88, -25, 2048, -3, -9, -77, -6, -72, -47, -39])
In [87]:
a = np.arange(1e8) # 0 ~ 1亿,数据量非常多的
a
Out[87]:
array([0.0000000e+00, 1.0000000e+00, 2.0000000e+00, ..., 9.9999997e+07,
9.9999998e+07, 9.9999999e+07])
In [88]:
b = a[[1,3,5,7,9,99]].copy() # 取出一部分数据,原来的数组,没有了,但是占内存特别大
del a # 删除原来的数组,内存优化
In [89]:
b
Out[89]:
array([ 1., 3., 5., 7., 9., 99.])
索引、切片和迭代¶
基本索引和切片¶
In [90]:
a = np.random.randint(0,30,size = 10)
a
Out[90]:
array([26, 11, 2, 21, 28, 24, 18, 13, 23, 0])
In [91]:
a[3] # 取一个
a[[1,3,5]] # 取多个
Out[91]:
array([11, 21, 24])
In [92]:
a[0:3] # 左闭右开
Out[92]:
array([26, 11, 2])
In [93]:
a[:3] # 如果冒号前面不写,默认从0开始
Out[93]:
array([26, 11, 2])
In [94]:
a[5:9] # 从某个索引开始切片
Out[94]:
array([24, 18, 13, 23])
In [95]:
a[5:] # 冒号后面不写内容,那么默认就是到左后
Out[95]:
array([24, 18, 13, 23, 0])
In [96]:
a[::2] # 两个中取一个
Out[96]:
array([26, 2, 28, 18, 23])
In [97]:
a[3::3] # 从索引3开始,每三个数中,取一个
Out[97]:
array([21, 18, 0])
In [98]:
a[::-1] # 倒着数,数组进行了颠倒
Out[98]:
array([ 0, 23, 13, 18, 24, 28, 21, 2, 11, 26])
In [99]:
a
Out[99]:
array([26, 11, 2, 21, 28, 24, 18, 13, 23, 0])
In [100]:
a[::-2] # 颠倒,两个中取一个
Out[100]:
array([ 0, 13, 24, 21, 11])
In [101]:
a[5::-3]
Out[101]:
array([24, 2])
In [102]:
a[1:7:2] # 从索引1开始到7结束,每两个中取一个
Out[102]:
array([11, 21, 24])
In [103]:
b = np.random.randint(0,30,size = (10,10))
b # 二维数组,多维数据索引和切片和上面的规律一样
Out[103]:
array([[ 2, 24, 24, 4, 24, 10, 18, 10, 7, 20],
[ 7, 27, 25, 18, 19, 28, 16, 22, 14, 18],
[ 4, 22, 18, 1, 2, 26, 28, 23, 22, 11],
[11, 19, 0, 21, 3, 3, 7, 3, 24, 2],
[29, 8, 25, 8, 8, 24, 19, 25, 13, 3],
[24, 24, 1, 27, 14, 26, 17, 11, 28, 24],
[ 4, 18, 16, 19, 4, 2, 13, 9, 21, 0],
[ 6, 16, 11, 4, 23, 7, 23, 6, 20, 18],
[20, 1, 25, 17, 15, 29, 20, 27, 3, 1],
[11, 14, 24, 17, 25, 15, 19, 12, 25, 24]])
In [104]:
b[1]
Out[104]:
array([ 7, 27, 25, 18, 19, 28, 16, 22, 14, 18])
In [105]:
b[[0,3,5]]
Out[105]:
array([[ 2, 24, 24, 4, 24, 10, 18, 10, 7, 20],
[11, 19, 0, 21, 3, 3, 7, 3, 24, 2],
[24, 24, 1, 27, 14, 26, 17, 11, 28, 24]])
In [106]:
b[1,6]
Out[106]:
16
In [107]:
b[3,[2,5,6]] # 多维数组,不怕,我们可以用逗号,分割
Out[107]:
array([0, 3, 7])
In [108]:
b[2:7,1::3] # 行:从2到索引7。列:从1开始,每3个中取一个数字
Out[108]:
array([[22, 2, 23],
[19, 3, 3],
[ 8, 8, 25],
[24, 14, 11],
[18, 4, 9]])
In [109]:
b[-1,-1] # 给-1表示倒着数
Out[109]:
24
In [110]:
b[-2,[-2,-3,-4]]
Out[110]:
array([ 3, 27, 20])
花式索引和索引技巧¶
In [111]:
a = np.arange(20)
a
Out[111]:
array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19])
In [112]:
b = a[3:7] # 切片时,返回的数据,不是深拷贝
b
Out[112]:
array([3, 4, 5, 6])
In [113]:
b[0] = 1024
display(a,b)
array([ 0, 1, 2, 1024, 4, 5, 6, 7, 8, 9, 10,
11, 12, 13, 14, 15, 16, 17, 18, 19])
array([1024, 4, 5, 6])
In [114]:
a = np.arange(20)
# 花式索引返回的深拷贝的数据
b = a[[3,4,5,6]] # 花式索引:就是在索引是,给了一个数组
display(a,b)
array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19])
array([3, 4, 5, 6])
In [115]:
b[0] = 1024
display(a,b)
array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19])
array([1024, 4, 5, 6])
In [116]:
a = np.random.randint(0,151,size = (100,3)) # 100名学生,参加了3门考试:Python、Math、En
a
Out[116]:
array([[ 13, 21, 141],
[ 25, 2, 96],
[ 91, 145, 85],
[ 90, 9, 80],
[119, 106, 132],
[104, 26, 145],
[ 42, 124, 84],
[ 5, 129, 10],
[ 4, 133, 49],
[100, 101, 68],
[141, 12, 142],
[106, 18, 29],
[117, 71, 79],
[ 75, 91, 48],
[ 11, 89, 105],
[143, 9, 97],
[104, 37, 66],
[122, 141, 45],
[ 92, 60, 45],
[ 4, 99, 122],
[ 74, 61, 68],
[ 31, 67, 59],
[ 73, 62, 87],
[100, 150, 58],
[ 40, 27, 30],
[ 59, 47, 11],
[ 9, 117, 125],
[ 59, 47, 60],
[ 69, 4, 10],
[ 15, 127, 17],
[ 90, 107, 73],
[146, 104, 118],
[ 66, 32, 15],
[ 33, 148, 119],
[ 66, 36, 7],
[ 88, 119, 54],
[127, 22, 99],
[130, 10, 13],
[ 89, 70, 132],
[ 72, 83, 138],
[114, 131, 77],
[ 41, 13, 5],
[ 27, 116, 66],
[ 67, 83, 15],
[ 59, 143, 125],
[ 45, 49, 48],
[ 56, 20, 40],
[118, 35, 73],
[ 60, 32, 77],
[ 79, 108, 117],
[ 3, 101, 5],
[143, 16, 123],
[ 37, 88, 132],
[ 47, 80, 5],
[114, 122, 8],
[ 0, 81, 137],
[135, 98, 94],
[ 35, 37, 23],
[ 39, 93, 38],
[130, 96, 24],
[129, 143, 46],
[ 59, 126, 14],
[ 49, 33, 38],
[ 81, 86, 78],
[149, 111, 139],
[125, 67, 92],
[ 62, 123, 147],
[108, 81, 105],
[ 80, 90, 87],
[ 10, 71, 111],
[135, 136, 54],
[146, 68, 68],
[ 89, 83, 108],
[139, 136, 126],
[ 41, 45, 125],
[ 27, 43, 14],
[ 67, 28, 65],
[ 16, 54, 139],
[ 91, 56, 52],
[ 29, 97, 82],
[ 46, 29, 32],
[ 10, 72, 62],
[133, 138, 124],
[ 44, 86, 52],
[138, 123, 54],
[ 79, 16, 66],
[110, 80, 15],
[ 30, 24, 111],
[ 72, 122, 110],
[110, 21, 105],
[132, 13, 146],
[ 13, 58, 27],
[ 83, 11, 94],
[ 19, 73, 25],
[ 79, 30, 84],
[ 35, 129, 23],
[134, 115, 14],
[108, 20, 87],
[ 73, 122, 43],
[ 14, 67, 110]])
In [117]:
cond = a >= 120 # 逻辑运算
# 根据条件,筛选数据,只要大于120,返回,一门大于120,就会返回这一门
a[cond]
Out[117]:
array([141, 145, 132, 145, 124, 129, 133, 141, 142, 143, 122, 141, 122,
150, 125, 127, 146, 148, 127, 130, 132, 138, 131, 143, 125, 143,
123, 132, 122, 137, 135, 130, 129, 143, 126, 149, 139, 125, 123,
147, 135, 136, 146, 139, 136, 126, 125, 139, 133, 138, 124, 138,
123, 122, 132, 146, 129, 134, 122])
In [118]:
# boolean True = 1;False = 0
# 三门科目的条件进行相乘
# 三个科目都是 大于120的同学
cond2 = cond[:,0]*cond[:,1]*cond[:,2]
a[cond2]
Out[118]:
array([[139, 136, 126],
[133, 138, 124]])
In [119]:
# 大于等于120,小于等于30找到
cond1 = a >=120
cond2 = a <= 30
a[cond2[:,0]*cond2[:,1]*cond2[:,2]]
Out[119]:
array([], shape=(0, 3), dtype=int64)
形状操作¶
数组变形¶
In [120]:
a = np.random.randint(0,10,size = (3,5))
a
Out[120]:
array([[2, 4, 9, 8, 0],
[3, 4, 8, 0, 4],
[8, 5, 1, 4, 4]])
In [121]:
a.reshape(5,3) # 只是改变形状
Out[121]:
array([[2, 4, 9],
[8, 0, 3],
[4, 8, 0],
[4, 8, 5],
[1, 4, 4]])
In [122]:
a.reshape(15,1,1)
Out[122]:
array([[[2]],
[[4]],
[[9]],
[[8]],
[[0]],
[[3]],
[[4]],
[[8]],
[[0]],
[[4]],
[[8]],
[[5]],
[[1]],
[[4]],
[[4]]])
In [128]:
a.reshape(-1,3) # -1表示数据,3自动计算-1 = 5
Out[128]:
array([[2, 4, 9],
[8, 0, 3],
[4, 8, 0],
[4, 8, 5],
[1, 4, 4]])
数组转置¶
In [131]:
# 转置,行变列,列变行
a.T # 矩阵 shape = (5,3)
Out[131]:
array([[2, 3, 8],
[4, 4, 5],
[9, 8, 1],
[8, 0, 4],
[0, 4, 4]])
In [132]:
np.transpose(a,(1,0)) # 行0,列1。默认情况下(0,1)----->调整(1,0)
Out[132]:
array([[2, 3, 8],
[4, 4, 5],
[9, 8, 1],
[8, 0, 4],
[0, 4, 4]])
In [137]:
np.transpose(a,(0,1))
Out[137]:
array([[2, 4, 9, 8, 0],
[3, 4, 8, 0, 4],
[8, 5, 1, 4, 4]])
In [138]:
b = np.random.randint(0,10,size = (3,5,7)) # shape = (0,1,2)
b
Out[138]:
array([[[3, 8, 1, 7, 2, 7, 3],
[3, 9, 8, 0, 4, 6, 2],
[4, 6, 0, 2, 9, 7, 0],
[9, 4, 9, 8, 2, 4, 5],
[3, 6, 3, 6, 8, 8, 1]],
[[0, 0, 1, 5, 2, 9, 6],
[5, 7, 0, 3, 8, 5, 9],
[9, 9, 2, 7, 0, 5, 8],
[9, 6, 5, 6, 0, 4, 4],
[2, 3, 2, 0, 1, 4, 6]],
[[9, 6, 9, 3, 3, 4, 1],
[7, 2, 7, 3, 9, 1, 9],
[2, 3, 5, 0, 5, 1, 8],
[8, 0, 6, 2, 6, 5, 0],
[1, 8, 9, 2, 6, 9, 6]]])
In [139]:
c = np.transpose(b,(2,1,0)) # 就是调整维度结构2和0维度数据对调
c.shape
Out[139]:
(7, 5, 3)
数据堆叠合并¶
In [140]:
nd1 = np.random.randint(0,10,size = (3,5))
nd2 = np.random.randint(0,10,size = (3,5))
display(nd1,nd2)
array([[4, 8, 1, 7, 2],
[3, 5, 4, 5, 5],
[0, 9, 1, 8, 5]])
array([[1, 5, 1, 6, 2],
[2, 3, 0, 1, 0],
[1, 5, 7, 3, 4]])
In [141]:
np.concatenate([nd1,nd2]) # 默认合并行增加
Out[141]:
array([[4, 8, 1, 7, 2],
[3, 5, 4, 5, 5],
[0, 9, 1, 8, 5],
[1, 5, 1, 6, 2],
[2, 3, 0, 1, 0],
[1, 5, 7, 3, 4]])
In [142]:
# 修改axis参数调整数据合并方向
np.concatenate([nd1,nd2],axis = 1) # axis 轴,方向 0 = 行,1 = 列
Out[142]:
array([[4, 8, 1, 7, 2, 1, 5, 1, 6, 2],
[3, 5, 4, 5, 5, 2, 3, 0, 1, 0],
[0, 9, 1, 8, 5, 1, 5, 7, 3, 4]])
In [143]:
np.hstack((nd1,nd2)) # 堆叠,摞起来,增多,合并 h表示水平,列增多
Out[143]:
array([[4, 8, 1, 7, 2, 1, 5, 1, 6, 2],
[3, 5, 4, 5, 5, 2, 3, 0, 1, 0],
[0, 9, 1, 8, 5, 1, 5, 7, 3, 4]])
In [144]:
np.vstack((nd1,nd2,nd2,nd2,nd1)) # v 竖直,行增加
Out[144]:
array([[4, 8, 1, 7, 2],
[3, 5, 4, 5, 5],
[0, 9, 1, 8, 5],
[1, 5, 1, 6, 2],
[2, 3, 0, 1, 0],
[1, 5, 7, 3, 4],
[1, 5, 1, 6, 2],
[2, 3, 0, 1, 0],
[1, 5, 7, 3, 4],
[1, 5, 1, 6, 2],
[2, 3, 0, 1, 0],
[1, 5, 7, 3, 4],
[4, 8, 1, 7, 2],
[3, 5, 4, 5, 5],
[0, 9, 1, 8, 5]])
In [145]:
a = np.random.randint(0,10,size = (3,5))
b = np.random.randint(0,10,size = (3,6))
display(a,b)
np.concatenate([a,b])
array([[7, 4, 6, 9, 0],
[5, 4, 4, 5, 9],
[2, 4, 5, 2, 9]])
array([[5, 8, 9, 5, 9, 3],
[5, 7, 8, 8, 9, 5],
[4, 4, 8, 2, 7, 5]])
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) <ipython-input-145-4986daef2954> in <module> 2 b = np.random.randint(0,10,size = (3,6)) 3 display(a,b) ----> 4 np.concatenate([a,b]) <__array_function__ internals> in concatenate(*args, **kwargs) ValueError: all the input array dimensions for the concatenation axis must match exactly, but along dimension 1, the array at index 0 has size 5 and the array at index 1 has size 6
In [146]:
np.concatenate([a,b],axis =1)
Out[146]:
array([[7, 4, 6, 9, 0, 5, 8, 9, 5, 9, 3],
[5, 4, 4, 5, 9, 5, 7, 8, 8, 9, 5],
[2, 4, 5, 2, 9, 4, 4, 8, 2, 7, 5]])
数组拆分¶
In [147]:
a = np.random.randint(0,100,size = (15,10))
a
Out[147]:
array([[69, 6, 25, 39, 60, 10, 60, 26, 42, 35],
[69, 89, 4, 40, 47, 56, 6, 92, 86, 8],
[89, 66, 77, 11, 1, 30, 22, 14, 0, 74],
[31, 65, 50, 9, 32, 78, 64, 40, 33, 9],
[73, 97, 15, 20, 89, 81, 82, 42, 72, 71],
[87, 1, 57, 13, 13, 40, 84, 20, 49, 75],
[71, 1, 8, 46, 89, 57, 46, 7, 72, 42],
[21, 52, 69, 75, 31, 19, 95, 75, 42, 86],
[16, 69, 93, 26, 90, 28, 97, 59, 11, 5],
[61, 27, 28, 42, 47, 96, 12, 22, 0, 72],
[77, 34, 13, 96, 59, 46, 95, 29, 44, 46],
[19, 55, 77, 95, 38, 10, 39, 94, 1, 82],
[83, 1, 12, 86, 91, 76, 83, 27, 98, 8],
[18, 55, 92, 44, 93, 31, 36, 96, 65, 52],
[ 2, 25, 61, 11, 53, 9, 22, 84, 8, 41]])
In [148]:
np.split(a,indices_or_sections=5) # 给数字,表示平均分成多少分
Out[148]:
[array([[69, 6, 25, 39, 60, 10, 60, 26, 42, 35],
[69, 89, 4, 40, 47, 56, 6, 92, 86, 8],
[89, 66, 77, 11, 1, 30, 22, 14, 0, 74]]),
array([[31, 65, 50, 9, 32, 78, 64, 40, 33, 9],
[73, 97, 15, 20, 89, 81, 82, 42, 72, 71],
[87, 1, 57, 13, 13, 40, 84, 20, 49, 75]]),
array([[71, 1, 8, 46, 89, 57, 46, 7, 72, 42],
[21, 52, 69, 75, 31, 19, 95, 75, 42, 86],
[16, 69, 93, 26, 90, 28, 97, 59, 11, 5]]),
array([[61, 27, 28, 42, 47, 96, 12, 22, 0, 72],
[77, 34, 13, 96, 59, 46, 95, 29, 44, 46],
[19, 55, 77, 95, 38, 10, 39, 94, 1, 82]]),
array([[83, 1, 12, 86, 91, 76, 83, 27, 98, 8],
[18, 55, 92, 44, 93, 31, 36, 96, 65, 52],
[ 2, 25, 61, 11, 53, 9, 22, 84, 8, 41]])]
In [149]:
np.split(a,indices_or_sections=2,axis =1) # axis = 1 表示列,平均分成两份
Out[149]:
[array([[69, 6, 25, 39, 60],
[69, 89, 4, 40, 47],
[89, 66, 77, 11, 1],
[31, 65, 50, 9, 32],
[73, 97, 15, 20, 89],
[87, 1, 57, 13, 13],
[71, 1, 8, 46, 89],
[21, 52, 69, 75, 31],
[16, 69, 93, 26, 90],
[61, 27, 28, 42, 47],
[77, 34, 13, 96, 59],
[19, 55, 77, 95, 38],
[83, 1, 12, 86, 91],
[18, 55, 92, 44, 93],
[ 2, 25, 61, 11, 53]]),
array([[10, 60, 26, 42, 35],
[56, 6, 92, 86, 8],
[30, 22, 14, 0, 74],
[78, 64, 40, 33, 9],
[81, 82, 42, 72, 71],
[40, 84, 20, 49, 75],
[57, 46, 7, 72, 42],
[19, 95, 75, 42, 86],
[28, 97, 59, 11, 5],
[96, 12, 22, 0, 72],
[46, 95, 29, 44, 46],
[10, 39, 94, 1, 82],
[76, 83, 27, 98, 8],
[31, 36, 96, 65, 52],
[ 9, 22, 84, 8, 41]])]
In [150]:
# 参数给列表,根据列表中的索引,进行切片
np.split(a,indices_or_sections=[1,5,9]) # 0~1,1~5,5~9,9~
Out[150]:
[array([[69, 6, 25, 39, 60, 10, 60, 26, 42, 35]]),
array([[69, 89, 4, 40, 47, 56, 6, 92, 86, 8],
[89, 66, 77, 11, 1, 30, 22, 14, 0, 74],
[31, 65, 50, 9, 32, 78, 64, 40, 33, 9],
[73, 97, 15, 20, 89, 81, 82, 42, 72, 71]]),
array([[87, 1, 57, 13, 13, 40, 84, 20, 49, 75],
[71, 1, 8, 46, 89, 57, 46, 7, 72, 42],
[21, 52, 69, 75, 31, 19, 95, 75, 42, 86],
[16, 69, 93, 26, 90, 28, 97, 59, 11, 5]]),
array([[61, 27, 28, 42, 47, 96, 12, 22, 0, 72],
[77, 34, 13, 96, 59, 46, 95, 29, 44, 46],
[19, 55, 77, 95, 38, 10, 39, 94, 1, 82],
[83, 1, 12, 86, 91, 76, 83, 27, 98, 8],
[18, 55, 92, 44, 93, 31, 36, 96, 65, 52],
[ 2, 25, 61, 11, 53, 9, 22, 84, 8, 41]])]
In [151]:
np.hsplit(a,indices_or_sections=2) # h水平,列方向上分割成了两份
Out[151]:
[array([[69, 6, 25, 39, 60],
[69, 89, 4, 40, 47],
[89, 66, 77, 11, 1],
[31, 65, 50, 9, 32],
[73, 97, 15, 20, 89],
[87, 1, 57, 13, 13],
[71, 1, 8, 46, 89],
[21, 52, 69, 75, 31],
[16, 69, 93, 26, 90],
[61, 27, 28, 42, 47],
[77, 34, 13, 96, 59],
[19, 55, 77, 95, 38],
[83, 1, 12, 86, 91],
[18, 55, 92, 44, 93],
[ 2, 25, 61, 11, 53]]),
array([[10, 60, 26, 42, 35],
[56, 6, 92, 86, 8],
[30, 22, 14, 0, 74],
[78, 64, 40, 33, 9],
[81, 82, 42, 72, 71],
[40, 84, 20, 49, 75],
[57, 46, 7, 72, 42],
[19, 95, 75, 42, 86],
[28, 97, 59, 11, 5],
[96, 12, 22, 0, 72],
[46, 95, 29, 44, 46],
[10, 39, 94, 1, 82],
[76, 83, 27, 98, 8],
[31, 36, 96, 65, 52],
[ 9, 22, 84, 8, 41]])]
In [152]:
np.vsplit(a,indices_or_sections=[3,7,11]) # v表示竖直,行切片,行分割
Out[152]:
[array([[69, 6, 25, 39, 60, 10, 60, 26, 42, 35],
[69, 89, 4, 40, 47, 56, 6, 92, 86, 8],
[89, 66, 77, 11, 1, 30, 22, 14, 0, 74]]),
array([[31, 65, 50, 9, 32, 78, 64, 40, 33, 9],
[73, 97, 15, 20, 89, 81, 82, 42, 72, 71],
[87, 1, 57, 13, 13, 40, 84, 20, 49, 75],
[71, 1, 8, 46, 89, 57, 46, 7, 72, 42]]),
array([[21, 52, 69, 75, 31, 19, 95, 75, 42, 86],
[16, 69, 93, 26, 90, 28, 97, 59, 11, 5],
[61, 27, 28, 42, 47, 96, 12, 22, 0, 72],
[77, 34, 13, 96, 59, 46, 95, 29, 44, 46]]),
array([[19, 55, 77, 95, 38, 10, 39, 94, 1, 82],
[83, 1, 12, 86, 91, 76, 83, 27, 98, 8],
[18, 55, 92, 44, 93, 31, 36, 96, 65, 52],
[ 2, 25, 61, 11, 53, 9, 22, 84, 8, 41]])]
广播机制¶
In [154]:
arr1 = np.array([0,1,2,3]*3)
print(arr1)
arr1.sort() # 排序,从小到大
arr1 = arr1.reshape(4,3)
arr1
[0 1 2 3 0 1 2 3 0 1 2 3]
Out[154]:
array([[0, 0, 0],
[1, 1, 1],
[2, 2, 2],
[3, 3, 3]])
In [155]:
arr2 = np.array([1,2,3])
display(arr1,arr2) # 形状不对应,依然可以进行运算:NumPy底层,为我们进行了广播
array([[0, 0, 0],
[1, 1, 1],
[2, 2, 2],
[3, 3, 3]])
array([1, 2, 3])
In [156]:
# 行不够,广播行
arr1 + arr2 # arr2 和arr1 中每一行,进行相加:广播机制
Out[156]:
array([[1, 2, 3],
[2, 3, 4],
[3, 4, 5],
[4, 5, 6]])
In [157]:
arr3 = np.array([[1],[2],[3],[4]])
display(arr1,arr3)
array([[0, 0, 0],
[1, 1, 1],
[2, 2, 2],
[3, 3, 3]])
array([[1],
[2],
[3],
[4]])
In [158]:
# 列不够,广播列
arr1 + arr3 # 广播,arr2和arr1中每一列,进行相加:广播
Out[158]:
array([[1, 1, 1],
[3, 3, 3],
[5, 5, 5],
[7, 7, 7]])
In [159]:
# 广播,和复制,意思有点近似
In [164]:
a = np.array([0,1,2,3,4,5,6,7]*3).reshape(3,4,2)
a
Out[164]:
array([[[0, 1],
[2, 3],
[4, 5],
[6, 7]],
[[0, 1],
[2, 3],
[4, 5],
[6, 7]],
[[0, 1],
[2, 3],
[4, 5],
[6, 7]]])
In [165]:
b = np.array([0,1,2,3,4,5,6,7]).reshape(4,2)
b
Out[165]:
array([[0, 1],
[2, 3],
[4, 5],
[6, 7]])
In [166]:
display(a.shape,b.shape)
(3, 4, 2)
(4, 2)
In [167]:
a * b # b形状(4,2),b 广播了三份
Out[167]:
array([[[ 0, 1],
[ 4, 9],
[16, 25],
[36, 49]],
[[ 0, 1],
[ 4, 9],
[16, 25],
[36, 49]],
[[ 0, 1],
[ 4, 9],
[16, 25],
[36, 49]]])
通用函数¶
元素级数字级别的方法¶
In [168]:
# abs、sqrt、square、exp、log、sin、cos、tan,maxinmum、minimum、
# all、any、inner、clip、round、trace、ceil、floor
In [169]:
a = np.array([-1,-3,-5,1,5,8,9])
c = np.random.randint(-5,10,size = 7)
display(a,c)
array([-1, -3, -5, 1, 5, 8, 9])
array([ 8, -4, 6, -1, 6, -1, -5])
In [170]:
np.abs(a) # 求绝对值
Out[170]:
array([1, 3, 5, 1, 5, 8, 9])
In [171]:
b = np.array([1,4,9,16,36,49])
b
Out[171]:
array([ 1, 4, 9, 16, 36, 49])
In [172]:
np.sqrt(b) # 开平发
Out[172]:
array([1., 2., 3., 4., 6., 7.])
In [173]:
np.square(b) # 平方
np.exp(3) # 自然底数e的多少次幂
np.log(20.085536) # 自然底数e对数求解
np.sin(np.pi/2) # 90度sin正弦值
np.cos(0) # 余弦值
np.tan(np.pi/6) # 正切,30度正切值
# 给两个数组,从中选取大的,或者选取小的
np.maximum(a,c) # 从a和c中选取最大的值
np.minimum(a,c) # 选取最小的值
Out[173]:
array([-1, -4, -5, -1, 5, -1, -5])
In [174]:
nd1 = np.array([1,3,0]) # 出现0那么对应False,非零True
nd2 = np.array([-1,-3,4,8])
display(nd1,nd2)
array([1, 3, 0])
array([-1, -3, 4, 8])
In [175]:
nd1.any() # 只要有一个True,返回True
Out[175]:
True
In [176]:
nd1.all() # 所有True,返回True
Out[176]:
False
In [177]:
nd2.all() # 所有的都是True,返回True
Out[177]:
True
In [178]:
a = np.array([1,2,3,4,5])
b = np.array([1,2,3,4,6])
np.inner(a,b) # 1*1 + 2*2 + 3*3 + 4*4 + 5*6 = 60
# 返回的是两个数组的内积,对应位置相乘加和
Out[178]:
60
In [180]:
nd1 = np.random.randint(0,100,size = 30)
nd1
Out[180]:
array([ 0, 22, 96, 6, 95, 62, 65, 95, 82, 67, 6, 56, 27, 79, 80, 40, 22,
84, 97, 74, 80, 18, 43, 60, 82, 59, 50, 86, 48, 90])
In [181]:
np.clip(nd1,10,80) # 数据裁剪,将小于10变成10,将大于80的变成80
Out[181]:
array([10, 22, 80, 10, 80, 62, 65, 80, 80, 67, 10, 56, 27, 79, 80, 40, 22,
80, 80, 74, 80, 18, 43, 60, 80, 59, 50, 80, 48, 80])
In [182]:
nd2 = np.random.randn(20)
nd2
Out[182]:
array([ 0.49134829, 0.29010687, 0.2460869 , 0.61815082, -0.94788053,
-0.46327909, -0.86474008, 0.11453481, 0.3857616 , -1.49319832,
0.9761417 , 0.84428886, -1.72183057, 0.77589501, -0.33813559,
0.08415212, -1.04590919, -1.25153568, 0.45393534, 0.64955645])
In [183]:
nd2.round(2)
Out[183]:
array([ 0.49, 0.29, 0.25, 0.62, -0.95, -0.46, -0.86, 0.11, 0.39,
-1.49, 0.98, 0.84, -1.72, 0.78, -0.34, 0.08, -1.05, -1.25,
0.45, 0.65])
In [184]:
np.ceil(np.array([2.7,2.1,2.05])) # 天花板,向上取整
Out[184]:
array([3., 3., 3.])
In [185]:
np.floor(np.array([2.99,2.9999,2.1])) # 向下取整
Out[185]:
array([2., 2., 2.])
In [190]:
a = np.random.randint(0,10,size = (3,3))
a
Out[190]:
array([[3, 5, 4],
[3, 9, 5],
[5, 2, 3]])
In [191]:
np.trace(a) # 就算对角线上的和
Out[191]:
15
where函数¶
In [192]:
import numpy as np
nd1 = np.array([1,3,5,7,9])
nd2 = np.array([2,4,6,8,10])
cond = np.array([True,False,False,True,True])
In [193]:
np.where(cond,nd1,nd2) # 条件如果是True,那么返回nd1中数据,如果Flase返回nd2中数据
Out[193]:
array([1, 4, 6, 7, 9])
In [194]:
a = np.random.randint(0,100,size = 50)
display(a) # 数据展示
np.where(a > 50,a,a + 20) # 大于50,返回多少;不然返回-100
array([66, 12, 60, 11, 98, 94, 60, 90, 8, 35, 26, 8, 38, 88, 90, 43, 48,
72, 17, 19, 44, 34, 36, 17, 60, 87, 53, 53, 5, 91, 24, 11, 71, 29,
14, 96, 21, 40, 73, 24, 35, 6, 83, 99, 12, 86, 71, 40, 60, 54])
Out[194]:
array([66, 32, 60, 31, 98, 94, 60, 90, 28, 55, 46, 28, 58, 88, 90, 63, 68,
72, 37, 39, 64, 54, 56, 37, 60, 87, 53, 53, 25, 91, 44, 31, 71, 49,
34, 96, 41, 60, 73, 44, 55, 26, 83, 99, 32, 86, 71, 60, 60, 54])
In [195]:
# 如果分数50~59分之间,自动加10分
a = np.random.randint(0,100,size = 50)
a
Out[195]:
array([30, 64, 79, 93, 26, 0, 68, 76, 48, 57, 49, 94, 94, 18, 59, 52, 8,
8, 80, 8, 35, 74, 88, 48, 10, 30, 18, 56, 29, 37, 14, 67, 3, 86,
79, 44, 53, 65, 63, 76, 88, 34, 68, 8, 36, 36, 99, 54, 5, 61])
In [196]:
cond = (a >=50) & (a < 60) # 与运算
np.where(cond,a + 10,a)
Out[196]:
array([30, 64, 79, 93, 26, 0, 68, 76, 48, 67, 49, 94, 94, 18, 69, 62, 8,
8, 80, 8, 35, 74, 88, 48, 10, 30, 18, 66, 29, 37, 14, 67, 3, 86,
79, 44, 63, 65, 63, 76, 88, 34, 68, 8, 36, 36, 99, 64, 5, 61])
排序¶
In [197]:
a = np.random.randint(0,100,size = 20)
a
Out[197]:
array([41, 46, 64, 22, 62, 8, 53, 5, 91, 35, 49, 11, 37, 54, 5, 76, 97,
81, 22, 61])
In [198]:
b = np.sort(a) # 打印输出,那么,原数组,没有改变
b
Out[198]:
array([ 5, 5, 8, 11, 22, 22, 35, 37, 41, 46, 49, 53, 54, 61, 62, 64, 76,
81, 91, 97])
In [199]:
a.sort() # 没有输出,说明,原数组上进行了排序
In [200]:
a
Out[200]:
array([ 5, 5, 8, 11, 22, 22, 35, 37, 41, 46, 49, 53, 54, 61, 62, 64, 76,
81, 91, 97])
In [201]:
index = a.argsort() # 返回排序的索引
display(index)
array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19])
In [202]:
# 根据索引,花式索引
a[index][::-1]
Out[202]:
array([97, 91, 81, 76, 64, 62, 61, 54, 53, 49, 46, 41, 37, 35, 22, 22, 11,
8, 5, 5])
集合操作¶
In [203]:
a = np.random.randint(0,30,size = 15)
b = np.random.randint(0,30,size = 15)
display(a,b)
array([29, 14, 27, 17, 24, 21, 4, 9, 7, 13, 2, 19, 23, 5, 21])
array([17, 0, 0, 22, 14, 10, 20, 14, 23, 25, 4, 7, 29, 10, 24])
In [204]:
np.intersect1d(a,b) # 交集:a和b中都有
Out[204]:
array([ 4, 7, 14, 17, 23, 24, 29])
In [205]:
np.union1d(a,b) # 并集:a和b中的所有,合并
Out[205]:
array([ 0, 2, 4, 5, 7, 9, 10, 13, 14, 17, 19, 20, 21, 22, 23, 24, 25,
27, 29])
In [206]:
np.setdiff1d(a,b) # 差集,a中有,b中没有
Out[206]:
array([ 2, 5, 9, 13, 19, 21, 27])
数学和统计函数¶
min、max、mean、median、sum、std、var、cumsum、cumprod、argmin、argmax、argwhere、cov、corrcoef
In [207]:
a = np.random.randint(0,100,size = (3,5))
a
Out[207]:
array([[69, 86, 40, 29, 24],
[65, 3, 15, 43, 9],
[32, 38, 24, 80, 61]])
In [208]:
a.min()
Out[208]:
3
In [209]:
a.max(axis = 0) # axis 轴,方向。axis = 0 行,axis = 1列
Out[209]:
array([69, 86, 40, 80, 61])
In [210]:
a.max(axis = 1)
Out[210]:
array([86, 65, 80])
In [211]:
a.mean() # 平均值
Out[211]:
41.2
In [212]:
np.median(a) # 中位数
Out[212]:
38.0
In [213]:
a.sum() # 求和
Out[213]:
618
In [214]:
a.std() # 标准差
Out[214]:
24.854778212649574
In [215]:
a.var() # 方差,数据内部波动
Out[215]:
617.76
In [216]:
a.cumsum() # 累计和
Out[216]:
array([ 69, 155, 195, 224, 248, 313, 316, 331, 374, 383, 415, 453, 477,
557, 618])
In [217]:
b = np.array([1,2,3,4,5,6,7])
b.cumprod() # 累乘和
Out[217]:
array([ 1, 2, 6, 24, 120, 720, 5040])
In [218]:
a.argmin() # 返回最小值的索引
Out[218]:
6
In [219]:
a.argmax() # 返回最大值的索引
Out[219]:
1
In [220]:
a
Out[220]:
array([[69, 86, 40, 29, 24],
[65, 3, 15, 43, 9],
[32, 38, 24, 80, 61]])
In [221]:
index = np.argwhere((a > 50) | (a < 20)) # 返回就是符合条件的索引
index
Out[221]:
array([[0, 0],
[0, 1],
[1, 0],
[1, 1],
[1, 2],
[1, 4],
[2, 3],
[2, 4]])
In [222]:
for i,j in index:
print(a[i,j])
69 86 65 3 15 9 80 61
In [223]:
# cov 协方差(属性之间进行计算),方差概念类似(数据内部,属性内部计算)
# 举例子:一个男生受女生欢迎程度,和这名男生萎缩程度,是否成正比,什么关系
np.cov(a)
Out[223]:
array([[ 718.3, 27.5, -359. ],
[ 27.5, 686. , 49.5],
[-359. , 49.5, 530. ]])
In [224]:
np.corrcoef(a) # 相关性系数,1(正相关) ~ -1(负相关)
# 0 表示,没有关系
Out[224]:
array([[ 1. , 0.03917578, -0.58184001],
[ 0.03917578, 1. , 0.08209283],
[-0.58184001, 0.08209283, 1. ]])
线性代数¶
In [225]:
A = np.random.randint(0,10,size = (3,3))
B = np.random.randint(0,10,size = (3,4))
display(A,B) # A矩阵行长度必须和B列长度一致,不然报错
array([[3, 8, 9],
[7, 8, 3],
[7, 4, 2]])
array([[2, 7, 3, 0],
[8, 8, 9, 3],
[9, 5, 8, 3]])
In [226]:
A.dot(B) # dot矩阵乘法 ,点乘
Out[226]:
array([[151, 130, 153, 51],
[105, 128, 117, 33],
[ 64, 91, 73, 18]])
In [227]:
np.dot(A,B) # 模块提供的方法
Out[227]:
array([[151, 130, 153, 51],
[105, 128, 117, 33],
[ 64, 91, 73, 18]])
In [228]:
A @ B # 邮件中@符号,表示矩阵运算
Out[228]:
array([[151, 130, 153, 51],
[105, 128, 117, 33],
[ 64, 91, 73, 18]])
In [229]:
C = np.random.randint(0,10,size = (4,5))
C
Out[229]:
array([[7, 0, 4, 4, 1],
[8, 5, 5, 4, 1],
[2, 8, 3, 3, 0],
[6, 7, 7, 5, 4]])
In [230]:
A.dot(C) # 形状不对应,无法进行矩阵乘法
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) <ipython-input-230-f96e5078725a> in <module> ----> 1 A.dot(C) # 形状不对应,无法进行矩阵乘法 ValueError: shapes (3,3) and (4,5) not aligned: 3 (dim 1) != 4 (dim 0)
In [231]:
# B.shape = (3,4);C.shape = (4,5)
B.dot(C)
Out[231]:
array([[ 76, 59, 52, 45, 9],
[156, 133, 120, 106, 28],
[137, 110, 106, 95, 26]])
In [232]:
# C.shape = (4,5);B.shape = (3,4)
C.dot(B) # 矩阵乘法不满足交换律!!!
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) <ipython-input-232-0ff241fbe642> in <module> 1 # C.shape = (4,5);B.shape = (3,4) ----> 2 C.dot(B) # 矩阵乘法不满足交换律!!! ValueError: shapes (4,5) and (3,4) not aligned: 5 (dim 1) != 3 (dim 0)
实战-用NumPy分析鸢尾花花萼属性各项指标
案列:读取iris数据集中的花萼长度数据(已保存为csv格式)
并对其进行排序、去重,并求出和、累积和、均值、标准差、方差、最小值、最大值。
In [233]:
iris = np.loadtxt('./iris.csv') # 花萼长度
iris
Out[233]:
array([5.1, 4.9, 4.7, 4.6, 5. , 5.4, 4.6, 5. , 4.4, 4.9, 5.4, 4.8, 4.8,
4.3, 5.8, 5.7, 5.4, 5.1, 5.7, 5.1, 5.4, 5.1, 4.6, 5.1, 4.8, 5. ,
5. , 5.2, 5.2, 4.7, 4.8, 5.4, 5.2, 5.5, 4.9, 5. , 5.5, 4.9, 4.4,
5.1, 5. , 4.5, 4.4, 5. , 5.1, 4.8, 5.1, 4.6, 5.3, 5. , 7. , 6.4,
6.9, 5.5, 6.5, 5.7, 6.3, 4.9, 6.6, 5.2, 5. , 5.9, 6. , 6.1, 5.6,
6.7, 5.6, 5.8, 6.2, 5.6, 5.9, 6.1, 6.3, 6.1, 6.4, 6.6, 6.8, 6.7,
6. , 5.7, 5.5, 5.5, 5.8, 6. , 5.4, 6. , 6.7, 6.3, 5.6, 5.5, 5.5,
6.1, 5.8, 5. , 5.6, 5.7, 5.7, 6.2, 5.1, 5.7, 6.3, 5.8, 7.1, 6.3,
6.5, 7.6, 4.9, 7.3, 6.7, 7.2, 6.5, 6.4, 6.8, 5.7, 5.8, 6.4, 6.5,
7.7, 7.7, 6. , 6.9, 5.6, 7.7, 6.3, 6.7, 7.2, 6.2, 6.1, 6.4, 7.2,
7.4, 7.9, 6.4, 6.3, 6.1, 7.7, 6.3, 6.4, 6. , 6.9, 6.7, 6.9, 5.8,
6.8, 6.7, 6.7, 6.3, 6.5, 6.2, 5.9])
In [234]:
iris = np.sort(iris) # 排序操作
iris
Out[234]:
array([4.3, 4.4, 4.4, 4.4, 4.5, 4.6, 4.6, 4.6, 4.6, 4.7, 4.7, 4.8, 4.8,
4.8, 4.8, 4.8, 4.9, 4.9, 4.9, 4.9, 4.9, 4.9, 5. , 5. , 5. , 5. ,
5. , 5. , 5. , 5. , 5. , 5. , 5.1, 5.1, 5.1, 5.1, 5.1, 5.1, 5.1,
5.1, 5.1, 5.2, 5.2, 5.2, 5.2, 5.3, 5.4, 5.4, 5.4, 5.4, 5.4, 5.4,
5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.6, 5.6, 5.6, 5.6, 5.6, 5.6,
5.7, 5.7, 5.7, 5.7, 5.7, 5.7, 5.7, 5.7, 5.8, 5.8, 5.8, 5.8, 5.8,
5.8, 5.8, 5.9, 5.9, 5.9, 6. , 6. , 6. , 6. , 6. , 6. , 6.1, 6.1,
6.1, 6.1, 6.1, 6.1, 6.2, 6.2, 6.2, 6.2, 6.3, 6.3, 6.3, 6.3, 6.3,
6.3, 6.3, 6.3, 6.3, 6.4, 6.4, 6.4, 6.4, 6.4, 6.4, 6.4, 6.5, 6.5,
6.5, 6.5, 6.5, 6.6, 6.6, 6.7, 6.7, 6.7, 6.7, 6.7, 6.7, 6.7, 6.7,
6.8, 6.8, 6.8, 6.9, 6.9, 6.9, 6.9, 7. , 7.1, 7.2, 7.2, 7.2, 7.3,
7.4, 7.6, 7.7, 7.7, 7.7, 7.7, 7.9])
In [235]:
# 去除重复数据
iris = np.unique(iris)
iris
Out[235]:
array([4.3, 4.4, 4.5, 4.6, 4.7, 4.8, 4.9, 5. , 5.1, 5.2, 5.3, 5.4, 5.5,
5.6, 5.7, 5.8, 5.9, 6. , 6.1, 6.2, 6.3, 6.4, 6.5, 6.6, 6.7, 6.8,
6.9, 7. , 7.1, 7.2, 7.3, 7.4, 7.6, 7.7, 7.9])
In [236]:
np.sum(iris) # 求和
Out[236]:
210.39999999999998
In [237]:
np.mean(iris) # 求平均值
Out[237]:
6.011428571428571
In [238]:
np.cumsum(iris) # 累加和
Out[238]:
array([ 4.3, 8.7, 13.2, 17.8, 22.5, 27.3, 32.2, 37.2, 42.3,
47.5, 52.8, 58.2, 63.7, 69.3, 75. , 80.8, 86.7, 92.7,
98.8, 105. , 111.3, 117.7, 124.2, 130.8, 137.5, 144.3, 151.2,
158.2, 165.3, 172.5, 179.8, 187.2, 194.8, 202.5, 210.4])
In [239]:
np.median(iris) # 中位数
Out[239]:
6.0
In [240]:
np.var(iris) # 方差
Out[240]:
1.0587265306122449
In [241]:
np.std(iris) # 标准差
Out[241]:
1.0289443768310533
In [242]:
iris.min() # 最小值
Out[242]:
4.3
In [243]:
iris.max() # 最大值
Out[243]:
7.9
In [244]:
np.max(iris)
Out[244]:
7.9
In [245]:
x=y=z=1
In [246]:
x,y,z=1,2,3
In [247]:
t = bool(None)
t
Out[247]:
False
In [248]:
n=round(99.9)
n
Out[248]:
100
In [ ]: