1. numpy 的基本用法

numpy 的下载

! conda install numpy
Collecting package metadata (current_repodata.json): ...working... done
Solving environment: ...working... done

# All requested packages already installed.
! conda install pandas

numpy 基本属性

  • 创建 numpy
  • 维度、形状、大小
import numpy as np
# create the numpy array
array = np.array([
    [1, 2, 3],
    [4, 5, 6]
])

print(array)
# 查看他的 dim, shape, size
print('number of dimentsion : ', array.ndim)
print('shape of array : ', array.shape)
print('size : ', array.size)
[[1 2 3]
 [4 5 6]]
number of dimentsion :  2
shape of array :  (2, 3)
size :  6
import numpy as np
X = np.arange(20).reshape(4, 5)
X.shape, X.size
((4, 5), 20)

numpy 之创建 array

  • 注意类型 dtype
  • 区分 numpy 和 torch 的区别
  • 32 和 64 是在精度和速度之间取舍

一维 array

import numpy as np
a = np.array([2, 23, 4], dtype=np.float32)
print(a)
print(a.dtype)
[ 2. 23.  4.]
float32
import torch
a = torch.tensor([2, 23, 4], dtype=torch.float32)
print(a)
print(a.dtype)
tensor([ 2., 23.,  4.])
torch.float32

多维 array

a = np.array([
    [1, 2, 3],
    [4, 5, 6]
])
print(a)
[[1 2 3]
 [4 5 6]]

使用 np.zeros(), np.ones(), np.randn(), np.empty(), np.arange(),np.linspace()
np.random.random()

print(np.zeros((3, 3), dtype=np.int32))
print(np.ones((3, 4, 5)))
print(np.empty((3, 4)))
print(np.empty((3, 4)).dtype)
[[0 0 0]
 [0 0 0]
 [0 0 0]]
[[[1. 1. 1. 1. 1.]
  [1. 1. 1. 1. 1.]
  [1. 1. 1. 1. 1.]
  [1. 1. 1. 1. 1.]]

 [[1. 1. 1. 1. 1.]
  [1. 1. 1. 1. 1.]
  [1. 1. 1. 1. 1.]
  [1. 1. 1. 1. 1.]]

 [[1. 1. 1. 1. 1.]
  [1. 1. 1. 1. 1.]
  [1. 1. 1. 1. 1.]
  [1. 1. 1. 1. 1.]]]
[[1.22847503e-311 2.47032823e-322 0.00000000e+000 0.00000000e+000]
 [1.22651357e-311 1.31370903e-076 6.81858644e-091 8.75703250e+169]
 [1.52513173e-052 1.10983758e-047 3.99910963e+252 6.17134893e-062]]
float64
x = np.arange(12, dtype=np.float32)
X = x.reshape((3, 4)) # 没有 , dtype=np.int32
print(id(x) == id(X))
print(x)
print(X)
False
[ 0.  1.  2.  3.  4.  5.  6.  7.  8.  9. 10. 11.]
[[ 0.  1.  2.  3.]
 [ 4.  5.  6.  7.]
 [ 8.  9. 10. 11.]]
x = np.linspace(0, 10, 100).reshape(5, 20)
print(x)
print(x.dtype)
[[ 0.          0.1010101   0.2020202   0.3030303   0.4040404   0.50505051
   0.60606061  0.70707071  0.80808081  0.90909091  1.01010101  1.11111111
   1.21212121  1.31313131  1.41414141  1.51515152  1.61616162  1.71717172
   1.81818182  1.91919192]
 [ 2.02020202  2.12121212  2.22222222  2.32323232  2.42424242  2.52525253
   2.62626263  2.72727273  2.82828283  2.92929293  3.03030303  3.13131313
   3.23232323  3.33333333  3.43434343  3.53535354  3.63636364  3.73737374
   3.83838384  3.93939394]
 [ 4.04040404  4.14141414  4.24242424  4.34343434  4.44444444  4.54545455
   4.64646465  4.74747475  4.84848485  4.94949495  5.05050505  5.15151515
   5.25252525  5.35353535  5.45454545  5.55555556  5.65656566  5.75757576
   5.85858586  5.95959596]
 [ 6.06060606  6.16161616  6.26262626  6.36363636  6.46464646  6.56565657
   6.66666667  6.76767677  6.86868687  6.96969697  7.07070707  7.17171717
   7.27272727  7.37373737  7.47474747  7.57575758  7.67676768  7.77777778
   7.87878788  7.97979798]
 [ 8.08080808  8.18181818  8.28282828  8.38383838  8.48484848  8.58585859
   8.68686869  8.78787879  8.88888889  8.98989899  9.09090909  9.19191919
   9.29292929  9.39393939  9.49494949  9.5959596   9.6969697   9.7979798
   9.8989899  10.        ]]
float64

numpy 的基础运算形式1

  • 加减乘除四则运算
  • 次方,np.log(), np.exp(), np.sin(), np.tan(), np.dot() 矩阵运算
  • np.dot(X, Y) = X.dot(Y)
  • np.min(), np.max(), np.sum(),或者是 a.min(),具有axis相当于torch里面的dim
import numpy as np
a = np.array([10, 20, 30, 40])
b = np.arange(1, 5)
print(a)
print(b)
print(a + b)
print(a - b)
print(a * b)
print(a / b)
print(a ** b)
print(np.log(a))
print(np.sin(a))
print(np.exp(a))
[10 20 30 40]
[1 2 3 4]
[11 22 33 44]
[ 9 18 27 36]
[ 10  40  90 160]
[10. 10. 10. 10.]
[     10     400   27000 2560000]
[2.30258509 2.99573227 3.40119738 3.68887945]
[-0.54402111  0.91294525 -0.98803162  0.74511316]
[2.20264658e+04 4.85165195e+08 1.06864746e+13 2.35385267e+17]
print(np.sin(180))
# print(np.sin(pi))
-0.8011526357338304



---------------------------------------------------------------------------

NameError                                 Traceback (most recent call last)

~\AppData\Local\Temp/ipykernel_3544/373536527.py in <module>
      1 print(np.sin(180))
----> 2 print(np.sin(pi))


NameError: name 'pi' is not defined
print(b < 3)
print(b == 3)
print(b.sum())
[ True  True False False]
[False False  True False]
10
X = np.array([
    [1, 2],
    [2, 5]
])
Y = np.arange(1, 5).reshape((2, 2))
print(X)
print(Y)
print(X * Y)
print(np.dot(X, Y))
print(X.dot(Y))
[[1 2]
 [2 5]]
[[1 2]
 [3 4]]
[[ 1  4]
 [ 6 20]]
[[ 7 10]
 [17 24]]
[[ 7 10]
 [17 24]]
a = np.random.random((4, 6))
print(a)
print(np.sum(a), a.sum())
print(np.max(a), a.max())
print(np.min(a), a.min())
print(np.sum(a, axis=0), a.sum(axis=0))
print(np.max(a, axis=0), a.max(axis=0))
print(np.min(a, axis=0), a.min(axis=0))
print(np.sum(a, axis=1), a.sum(axis=1))
print(np.max(a, axis=1), a.max(axis=1))
print(np.min(a, axis=1), a.min(axis=1))
[[0.23205598 0.45702942 0.7222314  0.44218721 0.81785802 0.23866066]
 [0.58904727 0.97667908 0.92215784 0.53217002 0.08417314 0.15867867]
 [0.08002107 0.15370827 0.91751546 0.25644443 0.19251754 0.60799192]
 [0.54797931 0.51718364 0.09511536 0.05141212 0.26126018 0.21689572]]
10.070973737257972 10.070973737257972
0.9766790781521906 0.9766790781521906
0.05141211685368341 0.05141211685368341
[1.44910363 2.10460041 2.65702006 1.28221378 1.35580888 1.22222698] [1.44910363 2.10460041 2.65702006 1.28221378 1.35580888 1.22222698]
[0.58904727 0.97667908 0.92215784 0.53217002 0.81785802 0.60799192] [0.58904727 0.97667908 0.92215784 0.53217002 0.81785802 0.60799192]
[0.08002107 0.15370827 0.09511536 0.05141212 0.08417314 0.15867867] [0.08002107 0.15370827 0.09511536 0.05141212 0.08417314 0.15867867]
[2.9100227  3.26290602 2.20819869 1.68984633] [2.9100227  3.26290602 2.20819869 1.68984633]
[0.81785802 0.97667908 0.91751546 0.54797931] [0.81785802 0.97667908 0.91751546 0.54797931]
[0.23205598 0.08417314 0.08002107 0.05141212] [0.23205598 0.08417314 0.08002107 0.05141212]
import numpy as np
X = np.arange(12).reshape((3, 4))
print(X.sum(axis=0))
[12 15 18 21]

numpy 的基础运算2

  • np.argmax(A),A.argmax(),axis : Returns the indices of the maximum values along an axis.
  • np.argmin()
  • np.median()
  • np.average()
  • np.cumsum(A) cumulation_sum 累加
  • np.diff(A) 注意原本 3 X 4 --> 3 X 3
  • np.nonzero(A) 非零的位置,返回元组的大小和维度一样
  • np.sort(A, axis=-1) 注意参数,axis=-1表示按照最后一个维度,axis=None 表示先进行fatten在进行sort
  • np.transpose(A) = A.T 转置
  • np.clip(A, 5, 9) 相当于 5, 9作为上下两个阈值
import numpy as np
A = np.arange(1, 25).reshape(2, 3, 4)
print(A)
print('\n\n')
print(np.argmin(A, axis=0))
print('\n\n')
print(np.argmax(A, axis=0))
print('\n\n')
print(A.argmax(axis=0))
print('\n\n')
print(np.mean(A))
print('\n\n')
print(np.max(A))
print('\n\n')
print('\n\n')
[[[ 1  2  3  4]
  [ 5  6  7  8]
  [ 9 10 11 12]]

 [[13 14 15 16]
  [17 18 19 20]
  [21 22 23 24]]]


[[0 0 0 0]
 [0 0 0 0]
 [0 0 0 0]]


[[1 1 1 1]
 [1 1 1 1]
 [1 1 1 1]]


[[1 1 1 1]
 [1 1 1 1]
 [1 1 1 1]]


12.5


24






print(A.argmax())
print(A.argmin())
print(A.sum())
print(np.median(A))
print(np.average(A))
print(np.cumsum(A))
print(np.diff(A))
print(np.sort(A))
23
0
300
12.5
12.5
[  1   3   6  10  15  21  28  36  45  55  66  78  91 105 120 136 153 171
 190 210 231 253 276 300]
[[[1 1 1]
  [1 1 1]
  [1 1 1]]

 [[1 1 1]
  [1 1 1]
  [1 1 1]]]
[[[ 1  2  3  4]
  [ 5  6  7  8]
  [ 9 10 11 12]]

 [[13 14 15 16]
  [17 18 19 20]
  [21 22 23 24]]]
A = np.arange(0, 18).reshape(2, 3, 3)
print(A)
print(np.nonzero(A))
[[[ 0  1  2]
  [ 3  4  5]
  [ 6  7  8]]

 [[ 9 10 11]
  [12 13 14]
  [15 16 17]]]
(array([0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1], dtype=int64), array([0, 0, 1, 1, 1, 2, 2, 2, 0, 0, 0, 1, 1, 1, 2, 2, 2], dtype=int64), array([1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2], dtype=int64))
A = np.random.random((2, 3, 4))
print(A)
print('\n\n')
print(np.sort(A))
print('\n\n')
print(np.sort(A, axis=None))
[[[0.99333036 0.19783928 0.31423449 0.699383  ]
  [0.41002294 0.67969562 0.92446068 0.3547926 ]
  [0.85395562 0.59284106 0.68725208 0.6029742 ]]

 [[0.03314733 0.13441676 0.29763166 0.36992482]
  [0.51003511 0.13022063 0.21054679 0.97082345]
  [0.89280455 0.1308477  0.04494078 0.80925873]]]


[[[0.19783928 0.31423449 0.699383   0.99333036]
  [0.3547926  0.41002294 0.67969562 0.92446068]
  [0.59284106 0.6029742  0.68725208 0.85395562]]

 [[0.03314733 0.13441676 0.29763166 0.36992482]
  [0.13022063 0.21054679 0.51003511 0.97082345]
  [0.04494078 0.1308477  0.80925873 0.89280455]]]


[0.03314733 0.04494078 0.13022063 0.1308477  0.13441676 0.19783928
 0.21054679 0.29763166 0.31423449 0.3547926  0.36992482 0.41002294
 0.51003511 0.59284106 0.6029742  0.67969562 0.68725208 0.699383
 0.80925873 0.85395562 0.89280455 0.92446068 0.97082345 0.99333036]
  • np.sort(A, axis=-1) 注意参数,axis=-1表示按照最后一个维度,axis=None 表示先进行fatten在进行sort
  • np.transpose(A) = A.T 转置
  • np.clip(A, 5, 9) 相当于 5, 9作为上下两个阈值
A = np.arange(18).reshape((2, 3, 3))
print(A)
print('\n\n')
print(A.clip(4, 12))
[[[ 0  1  2]
  [ 3  4  5]
  [ 6  7  8]]

 [[ 9 10 11]
  [12 13 14]
  [15 16 17]]]


[[[ 4  4  4]
  [ 4  4  5]
  [ 6  7  8]]

 [[ 9 10 11]
  [12 12 12]
  [12 12 12]]]

numpy 的索引

  • A[1,2] == A[1][2]
  • :, 0:3, ::2
  • 进行 for 循环
  • A.flatten() 这是列表
  • A.flat 这是迭代器
import numpy as np
A = np.arange(3, 15)
print(A)
print(A[3])
[ 3  4  5  6  7  8  9 10 11 12 13 14]
6
A = np.arange(3, 15).reshape(3, 4)
print(A[2])
print(A[2,:])
print(A[:,1])
print(A[0, 0])
print(A[::2,:])
print(A[::2][:])
[11 12 13 14]
[11 12 13 14]
[ 4  8 12]
3
[[ 3  4  5  6]
 [11 12 13 14]]
[[ 3  4  5  6]
 [11 12 13 14]]
for row in A:
    print(row)
[3 4 5 6]
[ 7  8  9 10]
[11 12 13 14]
for col in A.T:
    print(col)
[ 3  7 11]
[ 4  8 12]
[ 5  9 13]
[ 6 10 14]
A = np.arange(3, 15).reshape((3, 4))
print(A)
print(A.flatten())
for item in A.flatten():
    print(item, end=' ')
print('\n')
for item in A.flat:
    print(item, end=' ')

[[ 3  4  5  6]
 [ 7  8  9 10]
 [11 12 13 14]]
[ 3  4  5  6  7  8  9 10 11 12 13 14]
3 4 5 6 7 8 9 10 11 12 13 14 

3 4 5 6 7 8 9 10 11 12 13 14 

numpy array 的合并

  • np.vstack() vertical
  • np.hstack() horizontal
  • 向量转置、矩阵转置的不同性,向量是没有办法进行transpose的
  • A = A[:,np.newaxis],这个维度的变化非常有意思!
  • np.concatenate可以进行多个合并,并且指定维度,axis,其实就是 shape 是改的哪个地方
import numpy as np
# A B is a vector
A = np.array([1, 1, 1])
B = np.array([2, 2, 2])

C = np.vstack((A, B)) # vertical stack
D = np.hstack((A, B)) # horizontal stack
print(A.shape, B.shape)
print(C.shape)
print(C)
print(D.shape)
print(D)
(3,) (3,)
(2, 3)
[[1 1 1]
 [2 2 2]]
(6,)
[1 1 1 2 2 2]
import numpy as np
# A, B is a matrix
A = np.array([1, 1, 1]).reshape(1, 3)
B = np.array([2, 2, 2]).reshape(1, 3)

C = np.vstack((A, B)) # vertical stack
D = np.hstack((A, B)) # horizontal stack
print(A.shape, B.shape)
print(C.shape)
print(C)
print(D.shape)
print(D)

(1, 3) (1, 3)
(2, 3)
[[1 1 1]
 [2 2 2]]
(1, 6)
[[1 1 1 2 2 2]]
import numpy as np
A = np.array([1, 1, 1])
B = np.array([2, 2, 2])
# turn vector to matrix
A = A[:, np.newaxis]
B = B[:, np.newaxis]
C = np.concatenate((A, B), axis=0)
D = np.concatenate((A, B), axis=1)
print(A)
print(B)
print(C)
print(D)
[[1]
 [1]
 [1]]
[[2]
 [2]
 [2]]
[[1]
 [1]
 [1]
 [2]
 [2]
 [2]]
[[1 2]
 [1 2]
 [1 2]]
import numpy as np
A = np.array([1, 1, 1])
B = np.array([2, 2, 2])
# turn vector to matrix
A = A[:, np.newaxis]
B = B[:, np.newaxis]
print(A)
print(B)
C = A[np.newaxis, :]
D = B[np.newaxis, :]
print(C)
print(D)
E = A[:, np.newaxis]
F = B[:, np.newaxis]
print(E)
print(F)
[[1]
 [1]
 [1]]
[[2]
 [2]
 [2]]
[[[1]
  [1]
  [1]]]
[[[2]
  [2]
  [2]]]
[[[1]]

 [[1]]

 [[1]]]
[[[2]]

 [[2]]

 [[2]]]

numpy array 分割

  • np.split(A, num, axis) 只能进行等量的分割)
  • np.array_split(A, num, axis) 可以尽心不等量的分割,前面的会多一些
  • np.vsplit() 就是不需要 axis 参数了
  • np.hsplit() 就是不需要 axis 参数了
import numpy as np
A = np.arange(12).reshape(3, 4)

print(np.split(A, 3, axis=0))
print(np.split(A, 2, axis=1))
# print(np.split(A, 3, axis=1)),np.split() 必须要均分

print(np.array_split(A, 3, axis=1))
A = np.arange(15).reshape(3, -1)
print(np.array_split(A, 3, axis=1))

[array([[0, 1, 2, 3]]), array([[4, 5, 6, 7]]), array([[ 8,  9, 10, 11]])]
[array([[0, 1],
       [4, 5],
       [8, 9]]), array([[ 2,  3],
       [ 6,  7],
       [10, 11]])]
[array([[0, 1],
       [4, 5],
       [8, 9]]), array([[ 2],
       [ 6],
       [10]]), array([[ 3],
       [ 7],
       [11]])]
[array([[ 0,  1],
       [ 5,  6],
       [10, 11]]), array([[ 2,  3],
       [ 7,  8],
       [12, 13]]), array([[ 4],
       [ 9],
       [14]])]
import numpy as np
A = np.arange(12).reshape(3, 4)
print(np.vsplit(A, 3))
print(np.hsplit(A, 2))
[array([[0, 1, 2, 3]]), array([[4, 5, 6, 7]]), array([[ 8,  9, 10, 11]])]
[array([[0, 1],
       [4, 5],
       [8, 9]]), array([[ 2,  3],
       [ 6,  7],
       [10, 11]])]

numpy array copy

  • b = a.copy(),仅仅是获取数值,deep copy,指向的内存单元不同了
import numpy as np
a = np.arange(4)
# print(a)
b = a
c = a
d = b
a[0] = 11
print(b)
print(c)
print(id(b) == id(c))
[11  1  2  3]
[11  1  2  3]
True
d[0:3] = 2
a, b, c, d
(array([2, 2, 2, 3]),
 array([2, 2, 2, 3]),
 array([2, 2, 2, 3]),
 array([2, 2, 2, 3]))
Author:luckylight(xyg) Date:2021/11/16
上一篇:【Python数据分析-7】:Numpy常用操作-Numpy基础与创建


下一篇:Python的torch.einsum方法