Theme: Programming Fundamentals
Topic: OO Design
Keywords: objects, classes, design
Presenter | James Powell james@dutc.io |
Date | Friday, December 18, 2020 |
Time | 12:00 PM PST |
print("Let's go!")
Attendees asked us:
collections.namedtuple
?xarray.DataArray
?async/await
?collections.namedtuple
?collections.namedtuple
?tuple
?dict
?xarray.DataArray
?numpy.ndarray
?pandas.Series
or pandas.DataFrame
?async/await
?numpy
? Why pandas
? Why xarray
?numpy
? What does it offer?pandas
? What does it offer?xarray
? What does it offer?Genetic algorithm/evolutionary programming:
left = [0, 1, 0, 1, 1]
right = [1, 1, 1, 0, 0]
# .
# [0, ]
# [1, 1, 0 0]
# [0, 1, 1, 0, 0]
from random import choice
def random_chromosome(size):
return [choice([True, False]) for _ in range(size)]
c = random_chromosome(5)
print(f'{c = }')
from random import choice
def random_chromosome(length):
return [choice([True, False]) for _ in range(length)]
def random_population(size, length):
return [random_chromosome(length) for _ in range(size)]
p = random_population(10, 5)
print(f'{p = }')
from random import choice
from statistics import mean
def random_chromosome(length):
return [choice([True, False]) for _ in range(length)]
def random_population(size, length):
return [random_chromosome(length) for _ in range(size)]
def fitness(chromosome):
return mean(chromosome)
c = random_chromosome(5)
print(f'{c = }')
print(f'{fitness(c) = }')
from random import choice, choices
from statistics import mean
from itertools import count, islice
def random_chromosome(length):
return [choice([True, False]) for _ in range(length)]
def random_population(length):
return (random_chromosome(length) for _ in count())
def fitness(chromosome):
return mean(chromosome)
def select(population):
probs = [fitness(c) for c in population]
return (choices(population, weights=probs, k=2) for _ in count())
p = [*islice(random_population(5), 10)]
for c in p:
print(f'{c = }')
for left, right in islice(select(p), 3):
print(f'{left = }')
print(f'{right = }')
print()
from random import choice, choices
from statistics import mean
from itertools import count, islice
from time import perf_counter_ns
from contextlib import contextmanager
@contextmanager
def timed(heading):
start = perf_counter_ns()
try:
yield
finally:
stop = perf_counter_ns()
print(f'Elapsed \N{greek capital letter delta}t: {round(stop - start, -3)/1e9}s')
def random_chromosome(length):
return [choice([True, False]) for _ in range(length)]
def random_population(length):
return (random_chromosome(length) for _ in count())
def fitness(chromosome):
return mean(chromosome)
def select(population):
probs = [fitness(c) for c in population]
return (choices(population, weights=probs, k=2) for _ in count())
def crossover(left, right):
pos = choice(range(min(len(left), len(right))))
if choice([True, False]):
return [*left[:pos], *right[pos:]]
return [*right[:pos], *left[pos:]]
def mutate(child):
pos = choice(range(len(child)))
return [*child[:pos], not child[pos], *child[pos+1:]]
def breed(pairs):
for left, right in pairs:
child = crossover(left, right)
mutate(child)
yield child
if __name__ == '__main__':
NUM_STEPS, POP_SIZE = 500, 100
# { length: [ ... ] }
groups = {length: [] for length in {5, 10, 25, 50}}
for length, hist in groups.items():
pop = [*islice(random_population(length), POP_SIZE)]
hist.append(pop)
with timed('Pure Python'):
for length, hist in groups.items():
for step in range(NUM_STEPS):
new_pop = [*islice(breed(select(hist[-1])), POP_SIZE)]
hist.append(new_pop)
from matplotlib.pyplot import plot, show, legend
for length, hist in groups.items():
xs = range(len(hist))
ys = [mean([fitness(c) for c in pop]) for pop in hist]
plot(xs, ys, label=f'{length = }')
legend(loc='lower right')
show()
from time import perf_counter_ns
from contextlib import contextmanager
@contextmanager
def timed(heading):
start = perf_counter_ns()
try:
yield
finally:
stop = perf_counter_ns()
print(f'{heading:<20} \N{greek capital letter delta}t: {round(stop - start, -3)/1e9}s')
from random import gauss
dot = lambda x, y: sum(x * y for x, y in zip(x, y))
with timed('Pure Python create'):
xs = [gauss(0, 1) for _ in range(1_000_000)]
ys = [gauss(0, 1) for _ in range(1_000_000)]
with timed('Pure Python compute'):
dot(xs, ys)
from numpy.random import normal
with timed('numpy create'):
xs = normal(size=1_000_000)
ys = normal(size=1_000_000)
with timed('numpy compute'):
xs.dot(ys)
from pandas import DataFrame, IndexSlice
from numpy.random import choice
from numpy import arange
from time import perf_counter_ns
from contextlib import contextmanager
@contextmanager
def timed(heading):
start = perf_counter_ns()
try:
yield
finally:
stop = perf_counter_ns()
print(f'{heading:<20} \N{greek capital letter delta}t: {round(stop - start, -3)/1e9}s')
from collections import namedtuple
class Step(namedtuple('Step', 'pop mates xover mutation children')):
@classmethod
def from_children(cls, children):
return cls(None, None, None, None, children)
def random_chromosome(length):
return choice([True, False], size=length)
def random_population(size, length):
return choice([True, False], size=(size, length))
def breed(pop, size):
# pick the mating pairs
p = pop.mean(axis=1)
p = p / p.sum()
mates = choice(arange(size), size=(size, 2), p=p)
left, right = pop[mates[:, 0]], pop[mates[:, 1]]
# pick indices for left, right pair & cross over
# mask = [0 1 1 0 1]
# [0 0 1 1 1]
xover_mask = choice([True, False], size=pop.shape)
xover_mask.sort(axis=1)
mutation_mask = choice([True, False], p=(MUTATION_RATE, 1 - MUTATION_RATE), size=pop.shape)
children = ((left * xover_mask) + (right * ~xover_mask)) ^ mutation_mask
return Step(pop, mates, xover_mask, mutation_mask, children)
if __name__ == '__main__':
MUTATION_RATE = 1e-4
NUM_STEPS, POP_SIZE = 100, 100
# { length: [ ... ] }
groups = {length: [] for length in {5, 10, 25, 50}}
for length, hist in groups.items():
pop = random_population(POP_SIZE, length)
step = Step.from_children(pop)
hist.append(step)
with timed('numpy'):
for length, hist in groups.items():
for _ in range(NUM_STEPS):
new_step = breed(hist[-1].children, POP_SIZE)
hist.append(new_step)
if (show_graph := False):
from matplotlib.pyplot import plot, show, legend
for length, hist in groups.items():
xs = range(len(hist))
ys = [step.children.mean() for step in hist]
plot(xs, ys, label=f'{length = }')
legend(loc='lower right')
show()
df = DataFrame([(length, idx, step.children.mean()) for length, hist in groups.items()
for idx, step in enumerate(hist)])
df.columns = 'length', 'step', 'fitness'
df = df.set_index(['length', 'step']).sort_index()
class Analysis(namedtuple('Analysis', 'raw size sliced increasing largest')):
@classmethod
def from_df(cls, raw, size):
sliced = raw.loc[IndexSlice[size, :]]
increasing = sliced[(sliced.diff() > 0).rolling(3, min_periods=1).sum() == 3].dropna()
return cls(raw, size, sliced, increasing, None)
results = {length: Analysis.from_df(df, length) for length in groups}
print(results[5].increasing)
from numpy.random import normal
xs = normal(size=(3,3))
print(xs)
print(f'{xs.__array_interface__["data"][0] = :#_x}')
print(f'{xs.dtype = }')
print(f'{xs.shape = }')
print(f'{xs.strides = }')
ys = xs[:, 2:]
print(f'{ys.__array_interface__["data"][0] = :#_x}')
print(f'{ys.shape = }')
print(f'{ys.strides = }')
from numpy.random import randint
from time import perf_counter_ns
from contextlib import contextmanager
@contextmanager
def timed(heading):
start = perf_counter_ns()
try:
yield
finally:
stop = perf_counter_ns()
print(f'Elapsed \N{greek capital letter delta}t: {round(stop - start, -3)/1e9}s')
xs = randint(-1000, 1000, size=50_000_000)
ys = xs.copy()
with timed('attempt i'):
ys[ys<0] **= 2
ys[ys>=0] **= 3
with timed('attempt ii'):
mask = xs < 0
zs = (xs ** 2) * mask + (xs ** 3) * ~mask
class A:
def __init__9self):
pass
from pandas import Series
s = Series([1, 2, 3], index=[*'abc'])
print(s)
print(s[0])
print(s['a'])
s = Series([1, 2, 3], index=[2, 1, 0])
print(s[0])
print(s.loc[0]) # lookup by label
print(s.iloc[0]) # lookup by position
class Dataset:
def __init__(self, filename, ..., ..., ...):
with open(filename) as f:
...
Dataset('input.csv')
Dataset('input.xml')
Dataset('input.json')
Dataset('input.xml')
Dataset('input.dat')
class Dataset:
def __init__(self, records):
self.records = records
@classmethod
def from_csv(cls, filename):
with open(filename) as f:
...
@classmethod
def from_json(cls, filename):
with open(filename) as f:
...
from pandas import DataFrame, IndexSlice
from numpy.random import choice
from numpy import arange
from time import perf_counter_ns
from contextlib import contextmanager
from xarray import DataArray
@contextmanager
def timed(heading):
start = perf_counter_ns()
try:
yield
finally:
stop = perf_counter_ns()
print(f'{heading:<20} \N{greek capital letter delta}t: {round(stop - start, -3)/1e9}s')
from collections import namedtuple
class Step(namedtuple('Step', 'pop mates xover mutation children')):
@classmethod
def from_children(cls, children):
return cls(None, None, None, None, children)
def random_chromosome(length):
return choice([True, False], size=length)
def random_population(size, length):
return DataArray(
choice([True, False], size=(size, length)),
dims='size length'.split(),
)
def breed(pop, size):
# pick the mating pairs
p = pop.mean(dim='length')
p = p / p.sum()
mates = DataArray(
choice(arange(size), size=(size, 2), p=p),
dims='size which'.split(),
coords={
'which': 'left right'.split(),
},
)
left, right = pop[mates.sel(which='left')], pop[mates.sel(which='right')]
# pick indices for left, right pair & cross over
# mask = [0 1 1 0 1]
# [0 0 1 1 1]
xover_mask = choice([True, False], size=pop.shape)
xover_mask.sort(axis=1)
mutation_mask = choice([True, False], p=(MUTATION_RATE, 1 - MUTATION_RATE), size=pop.shape)
children = ((left * xover_mask) + (right * ~xover_mask)) ^ mutation_mask
return Step(pop, mates, xover_mask, mutation_mask, children)
if __name__ == '__main__':
MUTATION_RATE = 1e-4
NUM_STEPS, POP_SIZE = 100, 100
# { length: [ ... ] }
groups = {length: [] for length in {5, 10, 25, 50}}
for length, hist in groups.items():
pop = random_population(POP_SIZE, length)
step = Step.from_children(pop)
hist.append(step)
with timed('numpy'):
for length, hist in groups.items():
for _ in range(NUM_STEPS):
new_step = breed(hist[-1].children, POP_SIZE)
hist.append(new_step)
if (show_graph := False):
from matplotlib.pyplot import plot, show, legend
for length, hist in groups.items():
xs = range(len(hist))
ys = [step.children.mean() for step in hist]
plot(xs, ys, label=f'{length = }')
legend(loc='lower right')
show()
df = DataFrame([(length, idx, float(step.children.mean())) for length, hist in groups.items()
for idx, step in enumerate(hist)])
df.columns = 'length', 'step', 'fitness'
df = df.set_index(['length', 'step']).sort_index()
class Analysis(namedtuple('Analysis', 'raw size sliced increasing largest')):
@classmethod
def from_df(cls, raw, size):
sliced = raw.loc[IndexSlice[size, :]]
increasing = sliced[(sliced.diff() > 0).rolling(3, min_periods=1).sum() == 3].dropna()
return cls(raw, size, sliced, increasing, None)
results = {length: Analysis.from_df(df, length) for length in groups}
print(results[5].increasing)
from xarray import DataArray
from numpy.random import choice
board = DataArray(
choice([True, False], size=(90, 6, 8, 8)).astype(int),
dims=('move', 'piece', 'x', 'y'),
coords={
'piece': 'Pawn Rook Knight Bishop Queen King'.split(),
'move': range(90),
'x': range(1, 8+1),
'y': range(1, 8+1),
},
)
print(board.sel(piece='Pawn').sum(dim=('x', 'y')))
print(board.sel(move=0, x=[1,1.5,2], y=[2,2.5,3], method='nearest'))
print(board.interp(move=0, x=[1,1.5,2], y=[2,2.5,3], method='linear'))
from pandas import MultiIndex, DataFrame, date_range
df = DataFrame({
'x': [1, 2, 3],
'y': [4, 5, 6],
})
df.index = *zip(['fsw-123', 'fsw-123', 'ssw-789'], date_range('2020-07-04', periods=3)),
MultIndex.from_tuples
MultIndex.from_product
MultIndex.from_arrays
# update anomaly
class Dataset:
def __init__(self, xs):
self.xs = xs
self.pos = [x for x in xs if x > 0]
from random import randrange
ds = Dataset([randrange(-1000, 1000) for _ in range(10)])
print(ds.xs)
print(ds.pos)
ds.xs.append(10_0000)
print(ds.xs)
print(ds.pos)
class Data:
def __init__(self, x, y):
if y == 0:
raise ValueError("cannot initialize y to zero")
self.x, self.y = x, y
def foo(self):
return self.x / self.y
d = Data(10, 200)
d.y = 0
# d = Data(10, 0) # bug!!
class Data:
def __init__(self, x, y):
self.set_x(x)
self.set_y(y)
def get_x(self):
return self._x
def get_y(self):
return self._y
def set_x(self, x):
self._x = x
def set_y(self, y):
if y == 0:
raise ValueError("cannot set y to zero")
self._y = y
def foo(self):
return self.x / self.y
d = Data(10, 200)
d.set_y(0)
Descriptor protocol.
value = x.a # __getattr__
x.a = value # __setattr__
class A:
def foo(self):
return f'A.foo()',
class B(A):
def foo(self):
return f'B.foo()', *super().foo()
class C(A):
def foo(self):
return f'C.foo()', *super().foo()
class D(B, C):
def foo(self):
return f'D.foo()', *super().foo()
print(f'{A().foo() = }')
print(f'{B().foo() = }')
print(f'{C().foo() = }')
print(f'{D().foo() = }')
print(f'{A.__mro__ = }')
print(f'{B.__mro__ = }')
print(f'{C.__mro__ = }')
print(f'{D.__mro__ = }')
class B:
z = 300
class C(B):
y = 20
def __init__(self):
self.x = 1
def _getattr(obj, attr):
if attr in obj.__dict__:
return obj.__dict__[attr]
for cls in type(obj).__mro__:
if attr in cls.__dict__:
rv = cls.__dict__[attr]
if hasattr(type(rv), '__get__'):
return rv.__get__(obj, cls)
return rv
raise AttributeError(f'no such attribute {attr}')
obj = C()
print(f'{obj.x = }')
print(f'{ getattr(obj, "x") = }')
print(f'{_getattr(obj, "x") = }')
print(f'{ getattr(obj, "y") = }')
print(f'{_getattr(obj, "y") = }')
print(f'{ getattr(obj, "z") = }')
print(f'{_getattr(obj, "z") = }')
class A:
pass
class B(A):
pass
class C(B):
pass
obj = C()
print(type(obj).__mro__)
class Datum:
def __init__(self, x, y):
self.x, self.y = x, y
@property
def x(self):
print(f'getting x')
return self._x
@x.setter
def x(self, value):
print(f'setting x = {value}')
self._x = value
d = Datum(10, 20)
# print(f'{d.x = }')
d.x = ...
# update anomaly
class Dataset:
def __init__(self, xs):
self.xs = xs
@property
def pos(self):
return [x for x in self.xs if x > 0]
from random import randrange
ds = Dataset([randrange(-1000, 1000) for _ in range(10)])
print(ds.xs)
print(ds.pos)
ds.xs.append(10_0000)
print(ds.xs)
print(ds.pos)
class Data:
def __init__(self, x, y):
self.x = x
self.y = y
@property
def y(self):
return self._y
@y.setter
def y(self, y):
if y == 0:
raise ValueError("cannot set y to zero")
self._y = y
def foo(self):
return self.x / self.y
d = Data(10, 200)
d.y = 10
class T:
@property
def x(self):
pass
@x.setter
def x(self, value):
pass
T().x = 10
class Dataset:
def __init__(self, xs):
self.xs = xs
@property
def pos_xs(self):
return [x for x in self.xs if x > 0]
class Dataset:
def __init__(self, xs):
self.xs = xs
@property
def xs(self):
return self._xs
@xs.setter
def xs(self, value):
self._xs = value
self.pos_xs = [x for x in value if x > 0]
from random import randrange
ds = Dataset([randrange(-100, 100) for _ in range(10)])
print(f'{ds.pos_xs = }')
for _ in range(10):
def f():
pass
f()
def do_twice(f, *args, **kwargs):
f(*args, **kwargs)
f(*args, **kwargs)
def hello(name):
print(f'Hello, {name}!')
do_twice(hello, name='Mahmut')
do_twice(hello, name='Ji Yuan')
from time import perf_counter
from time import sleep
from random import random
from functools import wraps
def timed(f):
@wraps(f)
def new_func(*args, **kwargs):
start = perf_counter()
f(*args, **kwargs)
stop = perf_counter()
print(f'Elapsed \N{greek capital letter delta}t: {stop - start:.2f}s')
return new_func
@timed
def slow(a):
''' do something slowly '''
sleep(random())
print(f'slow({a!r})')
# slow = timed(slow)
# help(slow)
@timed
def fast(a, b):
sleep(random()/10)
print(f'fast({a!r}, {b!r})')
# fast = timed(fast)
slow(1)
slow(2)
fast(3, 4)
slow(5)