Numpy tutorial

Numpy tutorial#

Array manipulation in Python

Reference#

Numpy official reference https://numpy.org/doc/stable/
Scipy official reference: https://docs.scipy.org/doc/scipy/
Python Data Science Handbook: https://jakevdp.github.io/PythonDataScienceHandbook/
Python course EU: https://www.python-course.eu/numpy.php

Call the function#

np.func(a, x, y) is the same as a.func(x, y).

Convention#

The common short name for numpy is np.

import numpy as np
np.__version__

'2.2.4'

Creating an array from a sequence#

`array(seq)`, `asarray(seq)`#

seq can be a tuple, list, or a numpy array.

asarray() does not make a new copy if seq is already a numpy array.

Creating a 1D numpy array from a list.

Array basics#

a.ndim : number of dimensions
a.shape: Tuple of lengths for each dimension
a.size : total size (product of shape) = len(a)
a.dtype: data type
a[i] : accessing i th element in the 1D array (start from 0)
a[i, j]: accessing element i th row, j th column (2D array, start from 0)
a[:, j]: accessing j th column(2D array)
a[i, :]: accessing j th row (2D array)
a.T : Transpose of a
a.copy(): make a copy of a that does not share memory
a.reshape(shape): reshape the array if the new size is compatible (i.e. the same total size)

a = np.array([1, 9, 8, 7])
a

array([1, 9, 8, 7])

a.ndim

a.shape

(4,)

a.size

a.dtype

dtype('int64')

a[3]

np.int64(7)

In complex numbers, j is the imaginary part in numpy.

np.array([1+2j, 3+4j, 5+6*1j]).dtype

dtype('complex128')

Creating a multidimensional array from a nested list , with complex numbers

b = np.asarray([[0.0, 1.0, 2.0], [3.0, 4.0, 5.0+1j]])
b

array([[0.+0.j, 1.+0.j, 2.+0.j],
       [3.+0.j, 4.+0.j, 5.+1.j]])

b.ndim

b.shape

(2, 3)

b.dtype

dtype('complex128')

b[1, :]

array([3.+0.j, 4.+0.j, 5.+1.j])

b[:, 0]

array([0.+0.j, 3.+0.j])

b.T

array([[0.+0.j, 3.+0.j],
       [1.+0.j, 4.+0.j],
       [2.+0.j, 5.+1.j]])

b.reshape((3, 2))

array([[0.+0.j, 1.+0.j],
       [2.+0.j, 3.+0.j],
       [4.+0.j, 5.+1.j]])

b.reshape((1, -1)) # -1 mean caculate dim automatically

array([[0.+0.j, 1.+0.j, 2.+0.j, 3.+0.j, 4.+0.j, 5.+1.j]])

Creating a labelled array.

dtype = [('label1',int),('label2',int)]
a = np.array([(3,4)], dtype=dtype)
a['label1']

array([3])

Creating an array from a function#

arange(start, stop, step)
linspace(start, stop, num, endpoint=True)
logspace(start, stop, num, endpoint=True)
ones((d1, d2, ...))
ones_like(arr)
zeros((d1, d2, ...))
zeros_like(arr)
full((d1, d2, ...), val)
eye(k)
diag(seq)
fromfunction(f, (d1, d2, ...))
fromiter(iter)
meshgrid(x1, x2, ...)

np.arange(10, 0, -1)

array([10,  9,  8,  7,  6,  5,  4,  3,  2,  1])

np.linspace(0, 1, 5)

array([0.  , 0.25, 0.5 , 0.75, 1.  ])

np.linspace(0, 1, 5, endpoint=False)

array([0. , 0.2, 0.4, 0.6, 0.8])

np.logspace(-10.0, 10.0, 11)

array([1.e-10, 1.e-08, 1.e-06, 1.e-04, 1.e-02, 1.e+00, 1.e+02, 1.e+04,
       1.e+06, 1.e+08, 1.e+10])

np.ones((3, 3))

array([[1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.]])

a = np.arange(5.0)
print(np.ones_like(a))

[1. 1. 1. 1. 1.]

np.full((3, 4), 42)

array([[42, 42, 42, 42],
       [42, 42, 42, 42],
       [42, 42, 42, 42]])

np.full_like(a, 69)

array([69., 69., 69., 69., 69.])

np.eye(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

np.diag([4, 5, 6, 7])

array([[4, 0, 0, 0],
       [0, 5, 0, 0],
       [0, 0, 6, 0],
       [0, 0, 0, 7]])

np.fromfunction(lambda i, j: i >= j, (4, 4))

array([[ True, False, False, False],
       [ True,  True, False, False],
       [ True,  True,  True, False],
       [ True,  True,  True,  True]])

np.fromiter((x*x for x in range(5)) , dtype=np.float64)

array([ 0.,  1.,  4.,  9., 16.])

import numpy as np
import matplotlib.pyplot as plt
x = np.linspace(-5, 5, 100)
y = np.linspace(-5, 5, 100)
# sparse=True to save some memory
xx, yy = np.meshgrid(x, y, sparse=True)
print('xx =', xx, sep='\n')
print('yy =', yy, sep='\n')
plt.contourf(x,y, np.sin(xx**2 + yy**2) / (xx**2 + yy**2))
plt.colorbar()

xx =
[[-5.         -4.8989899  -4.7979798  -4.6969697  -4.5959596  -4.49494949
  -4.39393939 -4.29292929 -4.19191919 -4.09090909 -3.98989899 -3.88888889
  -3.78787879 -3.68686869 -3.58585859 -3.48484848 -3.38383838 -3.28282828
  -3.18181818 -3.08080808 -2.97979798 -2.87878788 -2.77777778 -2.67676768
  -2.57575758 -2.47474747 -2.37373737 -2.27272727 -2.17171717 -2.07070707
  -1.96969697 -1.86868687 -1.76767677 -1.66666667 -1.56565657 -1.46464646
  -1.36363636 -1.26262626 -1.16161616 -1.06060606 -0.95959596 -0.85858586
  -0.75757576 -0.65656566 -0.55555556 -0.45454545 -0.35353535 -0.25252525
  -0.15151515 -0.05050505  0.05050505  0.15151515  0.25252525  0.35353535
   0.45454545  0.55555556  0.65656566  0.75757576  0.85858586  0.95959596
   1.06060606  1.16161616  1.26262626  1.36363636  1.46464646  1.56565657
   1.66666667  1.76767677  1.86868687  1.96969697  2.07070707  2.17171717
   2.27272727  2.37373737  2.47474747  2.57575758  2.67676768  2.77777778
   2.87878788  2.97979798  3.08080808  3.18181818  3.28282828  3.38383838
   3.48484848  3.58585859  3.68686869  3.78787879  3.88888889  3.98989899
   4.09090909  4.19191919  4.29292929  4.39393939  4.49494949  4.5959596
   4.6969697   4.7979798   4.8989899   5.        ]]
yy =
[[-5.        ]
 [-4.8989899 ]
 [-4.7979798 ]
 [-4.6969697 ]
 [-4.5959596 ]
 [-4.49494949]
 [-4.39393939]
 [-4.29292929]
 [-4.19191919]
 [-4.09090909]
 [-3.98989899]
 [-3.88888889]
 [-3.78787879]
 [-3.68686869]
 [-3.58585859]
 [-3.48484848]
 [-3.38383838]
 [-3.28282828]
 [-3.18181818]
 [-3.08080808]
 [-2.97979798]
 [-2.87878788]
 [-2.77777778]
 [-2.67676768]
 [-2.57575758]
 [-2.47474747]
 [-2.37373737]
 [-2.27272727]
 [-2.17171717]
 [-2.07070707]
 [-1.96969697]
 [-1.86868687]
 [-1.76767677]
 [-1.66666667]
 [-1.56565657]
 [-1.46464646]
 [-1.36363636]
 [-1.26262626]
 [-1.16161616]
 [-1.06060606]
 [-0.95959596]
 [-0.85858586]
 [-0.75757576]
 [-0.65656566]
 [-0.55555556]
 [-0.45454545]
 [-0.35353535]
 [-0.25252525]
 [-0.15151515]
 [-0.05050505]
 [ 0.05050505]
 [ 0.15151515]
 [ 0.25252525]
 [ 0.35353535]
 [ 0.45454545]
 [ 0.55555556]
 [ 0.65656566]
 [ 0.75757576]
 [ 0.85858586]
 [ 0.95959596]
 [ 1.06060606]
 [ 1.16161616]
 [ 1.26262626]
 [ 1.36363636]
 [ 1.46464646]
 [ 1.56565657]
 [ 1.66666667]
 [ 1.76767677]
 [ 1.86868687]
 [ 1.96969697]
 [ 2.07070707]
 [ 2.17171717]
 [ 2.27272727]
 [ 2.37373737]
 [ 2.47474747]
 [ 2.57575758]
 [ 2.67676768]
 [ 2.77777778]
 [ 2.87878788]
 [ 2.97979798]
 [ 3.08080808]
 [ 3.18181818]
 [ 3.28282828]
 [ 3.38383838]
 [ 3.48484848]
 [ 3.58585859]
 [ 3.68686869]
 [ 3.78787879]
 [ 3.88888889]
 [ 3.98989899]
 [ 4.09090909]
 [ 4.19191919]
 [ 4.29292929]
 [ 4.39393939]
 [ 4.49494949]
 [ 4.5959596 ]
 [ 4.6969697 ]
 [ 4.7979798 ]
 [ 4.8989899 ]
 [ 5.        ]]

<matplotlib.colorbar.Colorbar at 0x7f85e02941a0>

_images/e9cbdd7996c0a68f8b7f838e97c922817ce6693fb537659f2ee13c7a8728ebfc.png

Random#

The new API: https://numpy.org/doc/stable/reference/random/index.html?highlight=random#module-numpy.random

from numpy.random import default_rng
rng = default_rng()

rng.random()

0.16254066878111328

# Uniform [0, 1)
rng.random((4, 3))

array([[0.47371834, 0.1426538 , 0.91251011],
       [0.7839131 , 0.99346187, 0.19459953],
       [0.95983776, 0.5296666 , 0.95648684],
       [0.28578261, 0.62607749, 0.71721577]])

# Integers
rng.integers(1, 7, (10, 20))

array([[6, 5, 1, 2, 6, 1, 1, 6, 6, 2, 4, 2, 2, 3, 2, 5, 3, 6, 1, 3],
       [4, 6, 5, 4, 4, 4, 6, 5, 3, 5, 3, 1, 3, 3, 3, 3, 3, 1, 1, 4],
       [4, 3, 6, 4, 3, 6, 3, 3, 2, 3, 2, 3, 1, 6, 1, 4, 6, 1, 3, 5],
       [6, 3, 2, 6, 1, 1, 5, 5, 1, 5, 6, 6, 6, 1, 1, 4, 2, 5, 4, 4],
       [4, 3, 1, 3, 5, 6, 1, 3, 6, 1, 1, 5, 1, 2, 6, 2, 4, 1, 3, 3],
       [1, 3, 6, 2, 4, 1, 2, 5, 3, 1, 3, 1, 5, 3, 1, 6, 2, 1, 1, 2],
       [1, 2, 5, 3, 2, 6, 5, 6, 2, 6, 6, 6, 2, 5, 2, 2, 2, 5, 4, 2],
       [6, 5, 1, 1, 4, 4, 1, 4, 5, 2, 1, 6, 1, 6, 3, 2, 6, 4, 4, 4],
       [6, 5, 1, 1, 2, 3, 4, 4, 5, 1, 3, 2, 2, 6, 3, 2, 1, 3, 2, 5],
       [1, 5, 3, 4, 6, 1, 2, 4, 4, 2, 5, 3, 3, 6, 3, 4, 1, 2, 1, 3]])

# Standard uniform distribution
rng.standard_normal(10)

array([ 8.94944799e-04,  3.32425801e-02, -1.24122549e+00,  5.23751872e-01,
        2.20360062e-01,  1.35153325e+00, -1.22623284e-01, -1.15118664e+00,
        5.27489627e-01,  1.09999138e+00])

# Random choice
choices = np.array(["one", "two"])
# Select by index
choices[rng.integers(0, 2, (3, 4))]

array([['two', 'one', 'one', 'two'],
       ['two', 'one', 'one', 'one'],
       ['two', 'two', 'two', 'two']], dtype='<U3')

# Or choice function, prob weights supported
rng.choice(choices, size=(5, 3), p=[0.3, 0.7])

array([['one', 'two', 'two'],
       ['one', 'two', 'two'],
       ['two', 'two', 'two'],
       ['two', 'one', 'one'],
       ['one', 'two', 'two']], dtype='<U3')

Selecting elements#

a = np.arange(10)
a

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

# a[idx]
a[0], a[3], a[7]

(np.int64(0), np.int64(3), np.int64(7))

# a[[indices]]
a[[0, 3, 7]]

array([0, 3, 7])

# a[condition]
# Selection from an array of true/false value
a[a<5]

array([0, 1, 2, 3, 4])

# Slice: a[start:end:step]
a[1::2]

array([1, 3, 5, 7, 9])

# Reverse
a[::-1]

array([9, 8, 7, 6, 5, 4, 3, 2, 1, 0])

# Mutating the elements
a[0] = 1000
a

array([1000,    1,    2,    3,    4,    5,    6,    7,    8,    9])

Indexing for 2D / 3D arrays#

In 2D, the first dimension corresponds to rows, the second to columns. Numpy is row-major by default, as in C-styled arrays.

a[i, j] for the element from ith row and jth column.

b = np.arange(25).reshape((5,5))
b

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24]])

# Each index is separated by comma
b[2, 3]

np.int64(13)

# Slices share the same underlying object of the original.
c = b[1::2, 1::2]
c

array([[ 6,  8],
       [16, 18]])

c[0, 0] = 666  # Mutates b !!!
print("After mutating:")
b

After mutating:

array([[  0,   1,   2,   3,   4],
       [  5, 666,   7,   8,   9],
       [ 10,  11,  12,  13,  14],
       [ 15,  16,  17,  18,  19],
       [ 20,  21,  22,  23,  24]])

np.may_share_memory(c, b)

True

# Use copy to prevent unwanted overwriting
a = np.arange(10)
c = a[::2].copy()
c[0] = 12
a

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

# combining assignment and slicing
a = np.arange(10)
a[5:] = 10
a

array([ 0,  1,  2,  3,  4, 10, 10, 10, 10, 10])

b = np.arange(5)
a[5:] = b[::-1]
a

array([0, 1, 2, 3, 4, 4, 3, 2, 1, 0])

Numerical operations on arrays#

Element-wise (broadcasting) operations by default.
Some math functions could be found in numpy (e.g. sin, cos): use np.lookfor(desc.)
Others could be found in scipy documentations.

a = np.arange(10)
a

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

a+1

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

a-3

array([-3, -2, -1,  0,  1,  2,  3,  4,  5,  6])

a*2

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18])

a/4

array([0.  , 0.25, 0.5 , 0.75, 1.  , 1.25, 1.5 , 1.75, 2.  , 2.25])

2**a

array([  1,   2,   4,   8,  16,  32,  64, 128, 256, 512])

With an array: Only if dimension sizes are compatible: either the same or 1.

a = np.array([[1, 2, 3, 4],
              [5, 6, 7, 8]])
b = rng.random((2, 4))

a+b

array([[1.76666567, 2.52415105, 3.14545656, 4.58201141],
       [5.37544696, 6.29206026, 7.15904329, 8.57593922]])

a-b

array([[0.23333433, 1.47584895, 2.85454344, 3.41798859],
       [4.62455304, 5.70793974, 6.84095671, 7.42406078]])

a*b

array([[0.76666567, 1.04830211, 0.43636968, 2.32804565],
       [1.87723479, 1.75236156, 1.113303  , 4.60751375]])

a/b

array([[ 1.30434953,  3.81569395, 20.62471445,  6.87271747],
       [13.31746044, 20.54370555, 44.01317505, 13.89035465]])

np.sin(b)

array([[0.69373762, 0.50047821, 0.14494418, 0.5497053 ],
       [0.36668839, 0.28792585, 0.15837364, 0.54462274]])

a = np.array([1, 2, 3, 4])
b = np.array([4, 2, 2, 4])
a == b

array([False,  True, False,  True])

a = np.arange(1, 10)
b = np.arange(1, 8).reshape((-1, 1))

array([1, 2, 3, 4, 5, 6, 7, 8, 9])

array([[1],
       [2],
       [3],
       [4],
       [5],
       [6],
       [7]])

# Broadcasting: A(1*M) * B(N*1) = C(N * M)
a*b

array([[ 1,  2,  3,  4,  5,  6,  7,  8,  9],
       [ 2,  4,  6,  8, 10, 12, 14, 16, 18],
       [ 3,  6,  9, 12, 15, 18, 21, 24, 27],
       [ 4,  8, 12, 16, 20, 24, 28, 32, 36],
       [ 5, 10, 15, 20, 25, 30, 35, 40, 45],
       [ 6, 12, 18, 24, 30, 36, 42, 48, 54],
       [ 7, 14, 21, 28, 35, 42, 49, 56, 63]])

Matrix multiplication

dot(a, b), a@b

a = rng.random((5, 5))
b = rng.random((5, 5))

# Element-wise multiplication
a*b

array([[0.73493789, 0.04637113, 0.05456267, 0.35526117, 0.00954979],
       [0.04769284, 0.03231905, 0.73216169, 0.28095914, 0.0773994 ],
       [0.34888542, 0.45080264, 0.08431408, 0.7504242 , 0.01500193],
       [0.1114805 , 0.1814476 , 0.0441196 , 0.46793799, 0.02086455],
       [0.24268148, 0.46454168, 0.1581651 , 0.21492264, 0.14497576]])

# Matrix multiplication
a@b

array([[1.93132242, 1.22425588, 0.67298767, 1.66265467, 0.53803589],
       [2.1561166 , 1.36591355, 1.02412271, 2.03524358, 0.60691846],
       [1.5321007 , 0.83515489, 0.72314571, 1.35194973, 0.43535381],
       [1.67576889, 1.11432675, 0.57182972, 1.58127803, 0.35543646],
       [2.29212916, 1.16744227, 0.97129464, 1.96208581, 0.56532602]])

# Matrix multiplication
np.dot(a,b)

array([[1.93132242, 1.22425588, 0.67298767, 1.66265467, 0.53803589],
       [2.1561166 , 1.36591355, 1.02412271, 2.03524358, 0.60691846],
       [1.5321007 , 0.83515489, 0.72314571, 1.35194973, 0.43535381],
       [1.67576889, 1.11432675, 0.57182972, 1.58127803, 0.35543646],
       [2.29212916, 1.16744227, 0.97129464, 1.96208581, 0.56532602]])

# No need to transpose 1D array for `dot(a, b)`
a = rng.random((5, 5))
b = rng.random(5)
c = rng.random(5)

# Matrix x vector
np.dot(a,b)

array([0.55356915, 0.69629176, 0.70801912, 0.73457396, 0.54977172])

# Vector * vector
np.dot(c,b)

np.float64(0.6002889812298224)

Combing Arrays#

This one will give your headaches.

concatenate((a, b), axis=n)
stack((a,b), axis=n)

The former joins arrays in the existing axis; the latter creates a new axis.

a = np.arange(0, 10)
b = np.arange(0, 10) + 10
# along the row (1st axis), existing axis
np.concatenate((a, b), axis=0)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19])

# along the column (2nd axis)
np.stack((a, b), axis=1)

array([[ 0, 10],
       [ 1, 11],
       [ 2, 12],
       [ 3, 13],
       [ 4, 14],
       [ 5, 15],
       [ 6, 16],
       [ 7, 17],
       [ 8, 18],
       [ 9, 19]])

Reduction#

sum(v, axis=n), cumsum(v, axis=n)

a = np.arange(0, 6).reshape((2, 3))
a

array([[0, 1, 2],
       [3, 4, 5]])

np.sum(a)

np.int64(15)

np.sum(a, axis=1)

array([ 3, 12])

np.sum(a, axis=0)

array([3, 5, 7])

np.cumsum(a)

array([ 0,  1,  3,  6, 10, 15])

np.cumsum(a, axis=1)

array([[ 0,  1,  3],
       [ 3,  7, 12]])

np.cumsum(a, axis=0)

array([[0, 1, 2],
       [3, 5, 7]])

amin(v, axis=n)
amax(v, axis=n)
minimum(a, b)
maximum(a, b)
argmin(v, axis=n)
argmax(v, axis=n)

np.amin(a)

np.int64(0)

np.argmin(a)

np.int64(0)

np.amax(a)

np.int64(5)

np.argmax(a)

np.int64(5)

b = (rng.standard_normal((2, 3)) + 1) * 5
b

array([[ 8.01205506,  8.11089468,  9.70623476],
       [ 2.93734788,  4.70766494, -2.79205315]])

np.minimum(a, b)

array([[ 0.        ,  1.        ,  2.        ],
       [ 2.93734788,  4.        , -2.79205315]])

np.maximum(a, b)

array([[8.01205506, 8.11089468, 9.70623476],
       [3.        , 4.70766494, 5.        ]])

np.all([True, True, False])

np.False_

np.any([True, True, False])

np.True_