Numpy tutorial#
Array manipulation in Python
Reference#
Scipy official reference: http://docs.scipy.org/
Scipy Lecture notes: http://www.scipy-lectures.org/
Python Data Science Handbook: https://jakevdp.github.io/PythonDataScienceHandbook/
Python course .eu: https://www.python-course.eu/numpy.php
Call the function#
np.func(a, x, y)
is the same as a.func(x, y)
.
Convention#
The common short name for numpy
is np
.
import numpy as np
np.__version__
'1.26.1'
Creating an array from a sequence#
array(seq)
, asarray(seq)
#
seq
can be a tuple, list, or a numpy array.
asarray()
does not make a new copy if seq is already a numpy array.
Creating a 1D numpy array from a list.
Array basics#
a.ndim
: number of dimensionsa.shape
: Tuple of lengths for each dimensiona.size
: total size (product of shape) =len(a)
a.dtype
: data typea[i]
: accessing i th element in the 1D array (start from 0)a[i, j]
: accessing element i th row, j th column (2D array, start from 0)a[:, j]
: accessing j th column(2D array)a[i, :]
: accessing j th row (2D array)a.T
: Transpose ofa
a.copy()
: make a copy of a that does not share memorya.reshape(shape)
: reshape the array if the new size is compatible (i.e. the same total size)
a = np.array([1, 9, 8, 7])
a
array([1, 9, 8, 7])
a.ndim
1
a.shape
(4,)
a.size
4
a.dtype
dtype('int64')
a[3]
7
For complex numbers, j
being the imaginary part.
np.array([1+2j, 3+4j, 5+6*1j]).dtype
dtype('complex128')
Creating a multidimensional array from a nested list , with complex numbers
b = np.asarray([[0.0, 1.0, 2.0], [3.0, 4.0, 5.0+1j]])
b
array([[0.+0.j, 1.+0.j, 2.+0.j],
[3.+0.j, 4.+0.j, 5.+1.j]])
b.ndim
2
b.shape
(2, 3)
b.dtype
dtype('complex128')
b[1, :]
array([3.+0.j, 4.+0.j, 5.+1.j])
b[:, 0]
array([0.+0.j, 3.+0.j])
b.T
array([[0.+0.j, 3.+0.j],
[1.+0.j, 4.+0.j],
[2.+0.j, 5.+1.j]])
b.reshape((3, 2))
array([[0.+0.j, 1.+0.j],
[2.+0.j, 3.+0.j],
[4.+0.j, 5.+1.j]])
b.reshape((1, -1)) # -1 mean caculate dim automatically
array([[0.+0.j, 1.+0.j, 2.+0.j, 3.+0.j, 4.+0.j, 5.+1.j]])
Creating an array from a function#
arange(start, stop, step)
linspace(start, stop, num, endpoint=True)
logspace(start, stop, num, endpoint=True)
ones((d1, d2, ...))
ones_like(arr)
zeros((d1, d2, ...))
zeros_like(arr)
full((d1, d2, ...), val)
eye(k)
diag(seq)
fromfunction(f, (d1, d2, ...))
fromiter(iter)
meshgrid(x1, x2, ...)
np.arange(10, 0, -1)
array([10, 9, 8, 7, 6, 5, 4, 3, 2, 1])
np.linspace(0, 1, 5)
array([0. , 0.25, 0.5 , 0.75, 1. ])
np.linspace(0, 1, 5, endpoint=False)
array([0. , 0.2, 0.4, 0.6, 0.8])
np.logspace(-10.0, 10.0, 11)
array([1.e-10, 1.e-08, 1.e-06, 1.e-04, 1.e-02, 1.e+00, 1.e+02, 1.e+04,
1.e+06, 1.e+08, 1.e+10])
np.ones((3, 3))
array([[1., 1., 1.],
[1., 1., 1.],
[1., 1., 1.]])
a = np.arange(5.0)
print(np.ones_like(a))
[1. 1. 1. 1. 1.]
np.full((3, 4), 42)
array([[42, 42, 42, 42],
[42, 42, 42, 42],
[42, 42, 42, 42]])
np.full_like(a, 69)
array([69., 69., 69., 69., 69.])
np.eye(3)
array([[1., 0., 0.],
[0., 1., 0.],
[0., 0., 1.]])
np.diag([4, 5, 6, 7])
array([[4, 0, 0, 0],
[0, 5, 0, 0],
[0, 0, 6, 0],
[0, 0, 0, 7]])
np.fromfunction(lambda i, j: i >= j, (4, 4))
array([[ True, False, False, False],
[ True, True, False, False],
[ True, True, True, False],
[ True, True, True, True]])
np.fromiter((x*x for x in range(5)) , dtype=np.float64)
array([ 0., 1., 4., 9., 16.])
import numpy as np
import matplotlib.pyplot as plt
x = np.linspace(-5, 5, 100)
y = np.linspace(-5, 5, 100)
# sparse=True to save some memory
xx, yy = np.meshgrid(x, y, sparse=True)
print('xx =', xx, sep='\n')
print('yy =', yy, sep='\n')
plt.contourf(x,y, np.sin(xx**2 + yy**2) / (xx**2 + yy**2))
plt.colorbar()
xx =
[[-5. -4.8989899 -4.7979798 -4.6969697 -4.5959596 -4.49494949
-4.39393939 -4.29292929 -4.19191919 -4.09090909 -3.98989899 -3.88888889
-3.78787879 -3.68686869 -3.58585859 -3.48484848 -3.38383838 -3.28282828
-3.18181818 -3.08080808 -2.97979798 -2.87878788 -2.77777778 -2.67676768
-2.57575758 -2.47474747 -2.37373737 -2.27272727 -2.17171717 -2.07070707
-1.96969697 -1.86868687 -1.76767677 -1.66666667 -1.56565657 -1.46464646
-1.36363636 -1.26262626 -1.16161616 -1.06060606 -0.95959596 -0.85858586
-0.75757576 -0.65656566 -0.55555556 -0.45454545 -0.35353535 -0.25252525
-0.15151515 -0.05050505 0.05050505 0.15151515 0.25252525 0.35353535
0.45454545 0.55555556 0.65656566 0.75757576 0.85858586 0.95959596
1.06060606 1.16161616 1.26262626 1.36363636 1.46464646 1.56565657
1.66666667 1.76767677 1.86868687 1.96969697 2.07070707 2.17171717
2.27272727 2.37373737 2.47474747 2.57575758 2.67676768 2.77777778
2.87878788 2.97979798 3.08080808 3.18181818 3.28282828 3.38383838
3.48484848 3.58585859 3.68686869 3.78787879 3.88888889 3.98989899
4.09090909 4.19191919 4.29292929 4.39393939 4.49494949 4.5959596
4.6969697 4.7979798 4.8989899 5. ]]
yy =
[[-5. ]
[-4.8989899 ]
[-4.7979798 ]
[-4.6969697 ]
[-4.5959596 ]
[-4.49494949]
[-4.39393939]
[-4.29292929]
[-4.19191919]
[-4.09090909]
[-3.98989899]
[-3.88888889]
[-3.78787879]
[-3.68686869]
[-3.58585859]
[-3.48484848]
[-3.38383838]
[-3.28282828]
[-3.18181818]
[-3.08080808]
[-2.97979798]
[-2.87878788]
[-2.77777778]
[-2.67676768]
[-2.57575758]
[-2.47474747]
[-2.37373737]
[-2.27272727]
[-2.17171717]
[-2.07070707]
[-1.96969697]
[-1.86868687]
[-1.76767677]
[-1.66666667]
[-1.56565657]
[-1.46464646]
[-1.36363636]
[-1.26262626]
[-1.16161616]
[-1.06060606]
[-0.95959596]
[-0.85858586]
[-0.75757576]
[-0.65656566]
[-0.55555556]
[-0.45454545]
[-0.35353535]
[-0.25252525]
[-0.15151515]
[-0.05050505]
[ 0.05050505]
[ 0.15151515]
[ 0.25252525]
[ 0.35353535]
[ 0.45454545]
[ 0.55555556]
[ 0.65656566]
[ 0.75757576]
[ 0.85858586]
[ 0.95959596]
[ 1.06060606]
[ 1.16161616]
[ 1.26262626]
[ 1.36363636]
[ 1.46464646]
[ 1.56565657]
[ 1.66666667]
[ 1.76767677]
[ 1.86868687]
[ 1.96969697]
[ 2.07070707]
[ 2.17171717]
[ 2.27272727]
[ 2.37373737]
[ 2.47474747]
[ 2.57575758]
[ 2.67676768]
[ 2.77777778]
[ 2.87878788]
[ 2.97979798]
[ 3.08080808]
[ 3.18181818]
[ 3.28282828]
[ 3.38383838]
[ 3.48484848]
[ 3.58585859]
[ 3.68686869]
[ 3.78787879]
[ 3.88888889]
[ 3.98989899]
[ 4.09090909]
[ 4.19191919]
[ 4.29292929]
[ 4.39393939]
[ 4.49494949]
[ 4.5959596 ]
[ 4.6969697 ]
[ 4.7979798 ]
[ 4.8989899 ]
[ 5. ]]
<matplotlib.colorbar.Colorbar at 0x7f3be0932290>
Random#
The new API: https://numpy.org/doc/stable/reference/random/index.html?highlight=random#module-numpy.random
from numpy.random import default_rng
rng = default_rng()
rng.random()
0.2783722963595373
# Uniform [0, 1)
rng.random((4, 3))
array([[0.58747773, 0.51991983, 0.046182 ],
[0.10955226, 0.88656638, 0.08084073],
[0.662097 , 0.5731594 , 0.1294832 ],
[0.84042674, 0.46414268, 0.41211316]])
# Integers
rng.integers(1, 7, (10, 20))
array([[4, 2, 5, 3, 4, 3, 6, 1, 4, 4, 4, 4, 1, 6, 3, 6, 1, 2, 6, 3],
[2, 5, 6, 6, 4, 3, 6, 5, 6, 6, 2, 2, 3, 5, 3, 5, 5, 1, 5, 1],
[5, 6, 2, 1, 4, 3, 5, 2, 5, 6, 4, 3, 4, 4, 4, 4, 1, 5, 5, 5],
[6, 4, 3, 1, 4, 3, 3, 2, 6, 2, 2, 6, 2, 6, 4, 5, 2, 4, 2, 5],
[2, 3, 6, 2, 5, 2, 5, 2, 6, 2, 2, 6, 1, 4, 1, 3, 4, 3, 5, 4],
[6, 1, 1, 5, 3, 5, 6, 5, 6, 1, 4, 5, 3, 4, 5, 4, 4, 3, 2, 6],
[1, 1, 3, 2, 1, 4, 6, 2, 6, 5, 3, 2, 5, 3, 5, 6, 2, 6, 6, 1],
[4, 4, 5, 1, 3, 1, 4, 2, 3, 6, 6, 3, 6, 4, 4, 2, 4, 6, 4, 5],
[4, 5, 1, 4, 4, 4, 2, 2, 1, 6, 5, 5, 1, 1, 6, 1, 2, 4, 3, 3],
[1, 6, 4, 4, 6, 3, 6, 4, 1, 1, 5, 1, 5, 6, 3, 5, 5, 6, 3, 6]])
# Standard uniform distribution
rng.standard_normal(10)
array([-0.72433067, -0.27357598, 0.43790871, 0.63502424, 1.1102297 ,
-0.51704628, 0.04863523, 0.35622056, 1.30529302, 0.15792639])
# Random choice
choices = np.array(["one", "two"])
# Select by index
choices[rng.integers(0, 2, (3, 4))]
array([['one', 'one', 'one', 'one'],
['two', 'two', 'two', 'one'],
['one', 'one', 'two', 'two']], dtype='<U3')
# Or choice function, prob weights supported
rng.choice(choices, size=(5, 3), p=[0.3, 0.7])
array([['two', 'two', 'two'],
['one', 'two', 'one'],
['one', 'two', 'two'],
['one', 'two', 'two'],
['two', 'two', 'two']], dtype='<U3')
Selecting elements#
a = np.arange(10)
a
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
# a[idx]
a[0], a[3], a[7]
(0, 3, 7)
# a[[indices]]
a[[0, 3, 7]]
array([0, 3, 7])
# a[condition]
# Selection from an array of true/false value
a[a<5]
array([0, 1, 2, 3, 4])
# Slice: a[start:end:step]
a[1::2]
array([1, 3, 5, 7, 9])
# Reverse
a[::-1]
array([9, 8, 7, 6, 5, 4, 3, 2, 1, 0])
# Mutating the elements
a[0] = 1000
a
array([1000, 1, 2, 3, 4, 5, 6, 7, 8, 9])
Indexing for 2D / 3D arrays#
In 2D, the first dimension corresponds to rows, the second to columns. Numpy is row-major by default, as in C-styled arrays.
a[i, j]
for the element from ith row and jth column.
b = np.arange(25).reshape((5,5))
b
array([[ 0, 1, 2, 3, 4],
[ 5, 6, 7, 8, 9],
[10, 11, 12, 13, 14],
[15, 16, 17, 18, 19],
[20, 21, 22, 23, 24]])
# Each index is separated by comma
b[2, 3]
13
# Slices share the same underlying object of the original.
c = b[1::2, 1::2]
c
array([[ 6, 8],
[16, 18]])
c[0, 0] = 666 # Mutates b !!!
print("After mutating:")
b
After mutating:
array([[ 0, 1, 2, 3, 4],
[ 5, 666, 7, 8, 9],
[ 10, 11, 12, 13, 14],
[ 15, 16, 17, 18, 19],
[ 20, 21, 22, 23, 24]])
np.may_share_memory(c, b)
True
# Use copy to prevent unwanted overwriting
a = np.arange(10)
c = a[::2].copy()
c[0] = 12
a
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
# combining assignment and slicing
a = np.arange(10)
a[5:] = 10
a
array([ 0, 1, 2, 3, 4, 10, 10, 10, 10, 10])
b = np.arange(5)
a[5:] = b[::-1]
a
array([0, 1, 2, 3, 4, 4, 3, 2, 1, 0])
Numerical operations on arrays#
Element-wise (broadcasting) operations by default.
Some math functions could be found in numpy (e.g. sin, cos): use
np.lookfor(desc.)
Others could be found in scipy documentations.
a = np.arange(10)
a
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
a+1
array([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
a-3
array([-3, -2, -1, 0, 1, 2, 3, 4, 5, 6])
a*2
array([ 0, 2, 4, 6, 8, 10, 12, 14, 16, 18])
a/4
array([0. , 0.25, 0.5 , 0.75, 1. , 1.25, 1.5 , 1.75, 2. , 2.25])
2**a
array([ 1, 2, 4, 8, 16, 32, 64, 128, 256, 512])
With an array: Only if dimension sizes are compatible: either the same or 1.
a = np.array([[1, 2, 3, 4],
[5, 6, 7, 8]])
b = rng.random((2, 4))
a+b
array([[1.69820387, 2.17553438, 3.23806583, 4.86068711],
[5.84009213, 6.79565407, 7.3630058 , 8.42174073]])
a-b
array([[0.30179613, 1.82446562, 2.76193417, 3.13931289],
[4.15990787, 5.20434593, 6.6369942 , 7.57825927]])
a*b
array([[0.69820387, 0.35106876, 0.71419748, 3.44274844],
[4.20046063, 4.77392443, 2.54104057, 3.37392582]])
a/b
array([[ 1.43224642, 11.39377951, 12.60155665, 4.64744964],
[ 5.9517282 , 7.54096563, 19.28343866, 18.96900034]])
np.sin(b)
array([[0.64284289, 0.17463433, 0.23582345, 0.75829068],
[0.74470461, 0.71432149, 0.35508575, 0.40934927]])
a = np.array([1, 2, 3, 4])
b = np.array([4, 2, 2, 4])
a == b
array([False, True, False, True])
a = np.arange(1, 10)
b = np.arange(1, 8).reshape((-1, 1))
a
array([1, 2, 3, 4, 5, 6, 7, 8, 9])
b
array([[1],
[2],
[3],
[4],
[5],
[6],
[7]])
# Broadcasting: A(1*M) * B(N*1) = C(N * M)
a*b
array([[ 1, 2, 3, 4, 5, 6, 7, 8, 9],
[ 2, 4, 6, 8, 10, 12, 14, 16, 18],
[ 3, 6, 9, 12, 15, 18, 21, 24, 27],
[ 4, 8, 12, 16, 20, 24, 28, 32, 36],
[ 5, 10, 15, 20, 25, 30, 35, 40, 45],
[ 6, 12, 18, 24, 30, 36, 42, 48, 54],
[ 7, 14, 21, 28, 35, 42, 49, 56, 63]])
Matrix multiplication
dot(a, b)
, a@b
a = rng.random((5, 5))
b = rng.random((5, 5))
# Element-wise multiplication
a*b
array([[0.78868342, 0.49118797, 0.08852474, 0.39031773, 0.1036044 ],
[0.32554843, 0.84162837, 0.33406562, 0.41390956, 0.01483335],
[0.14103514, 0.4394715 , 0.61674883, 0.32026644, 0.25262 ],
[0.30810766, 0.11214607, 0.43462416, 0.00689956, 0.30660707],
[0.70384478, 0.69272969, 0.05537662, 0.6562527 , 0.68798726]])
# Matrix multiplication
a@b
array([[1.68196594, 1.84647145, 1.28724573, 1.50478122, 1.70571853],
[1.65859451, 1.90927827, 1.41927731, 1.46326817, 1.63331052],
[1.63787964, 1.94050331, 1.60760857, 1.38089309, 1.66037035],
[2.06133206, 2.15748522, 1.22029498, 1.82453664, 2.15058793],
[2.46568113, 2.70205421, 1.50635695, 2.09120754, 2.41339118]])
# Matrix multiplication
np.dot(a,b)
array([[1.68196594, 1.84647145, 1.28724573, 1.50478122, 1.70571853],
[1.65859451, 1.90927827, 1.41927731, 1.46326817, 1.63331052],
[1.63787964, 1.94050331, 1.60760857, 1.38089309, 1.66037035],
[2.06133206, 2.15748522, 1.22029498, 1.82453664, 2.15058793],
[2.46568113, 2.70205421, 1.50635695, 2.09120754, 2.41339118]])
# No need to transpose 1D array for `dot(a, b)`
a = rng.random((5, 5))
b = rng.random(5)
c = rng.random(5)
# Matrix x vector
np.dot(a,b)
array([0.90748978, 1.14805149, 1.18529568, 1.24239126, 0.52249168])
# Vector * vector
np.dot(c,b)
1.0679300588160907
Combing Arrays#
This one will give your headaches.
concatenate((a, b), axis=n)
stack((a,b), axis=n)
The former joins arrays in the existing axis; the latter creates a new axis.
a = np.arange(0, 10)
b = np.arange(0, 10) + 10
# along the row (1st axis), existing axis
np.concatenate((a, b), axis=0)
array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19])
# along the column (2nd axis)
np.stack((a, b), axis=1)
array([[ 0, 10],
[ 1, 11],
[ 2, 12],
[ 3, 13],
[ 4, 14],
[ 5, 15],
[ 6, 16],
[ 7, 17],
[ 8, 18],
[ 9, 19]])
Reduction#
sum(v, axis=n)
, cumsum(v, axis=n)
a = np.arange(0, 6).reshape((2, 3))
a
array([[0, 1, 2],
[3, 4, 5]])
np.sum(a)
15
np.sum(a, axis=1)
array([ 3, 12])
np.sum(a, axis=0)
array([3, 5, 7])
np.cumsum(a)
array([ 0, 1, 3, 6, 10, 15])
np.cumsum(a, axis=1)
array([[ 0, 1, 3],
[ 3, 7, 12]])
np.cumsum(a, axis=0)
array([[0, 1, 2],
[3, 5, 7]])
amin(v, axis=n)
amax(v, axis=n)
minimum(a, b)
maximum(a, b)
argmin(v, axis=n)
argmax(v, axis=n)
np.amin(a)
0
np.argmin(a)
0
np.amax(a)
5
np.argmax(a)
5
b = (rng.standard_normal((2, 3)) + 1) * 5
b
array([[10.31301003, 12.1457774 , 8.54297976],
[ 3.22853517, -0.46068757, 11.74965212]])
np.minimum(a, b)
array([[ 0. , 1. , 2. ],
[ 3. , -0.46068757, 5. ]])
np.maximum(a, b)
array([[10.31301003, 12.1457774 , 8.54297976],
[ 3.22853517, 4. , 11.74965212]])
np.all([True, True, False])
False
np.any([True, True, False])
True