seminars.fb

Programming Fundamentals → “OO Design”

Seminar (Fri, Dec 18, 2020; 12 PM PST)

Theme: Programming Fundamentals

Topic: OO Design

Keywords: objects, classes, design

Presenter	James Powell james@dutc.io
Date	Friday, December 18, 2020
Time	12:00 PM PST

print("Let's go!")

Questions

Attendees asked us:

1. What (the heck) is a collections.namedtuple?
1. What is an xarray.DataArray?
1. What is async/await?

What is a `collections.namedtuple`?

What is a collections.namedtuple?
Why is it it better than using a tuple?
Why is it it better than using a dict?
What if my data is mutable?
What if I want to extend this structure?

What is a `xarray.DataArray`?

How does it differ than a numpy.ndarray?
How does it differ than a pandas.Series or pandas.DataFrame?

What is `async/await`?

How does it differ from other concurrency approaches?
What does it do well? What does it not do well?

Why `numpy`? Why `pandas`? Why `xarray`?

When should I use numpy? What does it offer?
When should I use pandas? What does it offer?
When should I use xarray? What does it offer?

Genetic algorithm/evolutionary programming:

population of chromosomes
- binary chromosomes [0, 1, 0, 1, 0]
fitness function: mean number of 1s
most fit mates, breed them:
- random bit flip
- random 1-point crossover

left  = [0, 1, 0, 1, 1]
right = [1, 1, 1, 0, 0]
#           .
#       [0, ]
#           [1, 1, 0 0]
#       [0, 1, 1, 0, 0]

from random import choice
def random_chromosome(size):
    return [choice([True, False]) for _ in range(size)]

c = random_chromosome(5)
print(f'{c = }')

from random import choice
def random_chromosome(length):
    return [choice([True, False]) for _ in range(length)]
def random_population(size, length):
    return [random_chromosome(length) for _ in range(size)]

p = random_population(10, 5)
print(f'{p = }')

from random import choice
from statistics import mean
def random_chromosome(length):
    return [choice([True, False]) for _ in range(length)]
def random_population(size, length):
    return [random_chromosome(length) for _ in range(size)]
def fitness(chromosome):
    return mean(chromosome)

c = random_chromosome(5)
print(f'{c          = }')
print(f'{fitness(c) = }')

from random import choice, choices
from statistics import mean
from itertools import count, islice

def random_chromosome(length):
    return [choice([True, False]) for _ in range(length)]
def random_population(length):
    return (random_chromosome(length) for _ in count())
def fitness(chromosome):
    return mean(chromosome)
def select(population):
    probs = [fitness(c) for c in population]
    return (choices(population, weights=probs, k=2) for _ in count())

p = [*islice(random_population(5), 10)]
for c in p:
    print(f'{c = }')

for left, right in islice(select(p), 3):
    print(f'{left  = }')
    print(f'{right = }')
    print()

from random import choice, choices
from statistics import mean
from itertools import count, islice
from time import perf_counter_ns
from contextlib import contextmanager

@contextmanager
def timed(heading):
    start = perf_counter_ns()
    try:
        yield
    finally:
        stop = perf_counter_ns()
        print(f'Elapsed \N{greek capital letter delta}t: {round(stop - start, -3)/1e9}s')

def random_chromosome(length):
    return [choice([True, False]) for _ in range(length)]
def random_population(length):
    return (random_chromosome(length) for _ in count())
def fitness(chromosome):
    return mean(chromosome)
def select(population):
    probs = [fitness(c) for c in population]
    return (choices(population, weights=probs, k=2) for _ in count())
def crossover(left, right):
    pos = choice(range(min(len(left), len(right))))
    if choice([True, False]):
        return [*left[:pos], *right[pos:]]
    return [*right[:pos], *left[pos:]]
def mutate(child):
    pos = choice(range(len(child)))
    return [*child[:pos], not child[pos], *child[pos+1:]]
def breed(pairs):
    for left, right in pairs:
        child = crossover(left, right)
        mutate(child)
        yield child

if __name__ == '__main__':
    NUM_STEPS, POP_SIZE = 500, 100

    # { length: [ ... ] }
    groups = {length: [] for length in {5, 10, 25, 50}}

    for length, hist in groups.items():
        pop = [*islice(random_population(length), POP_SIZE)]
        hist.append(pop)

    with timed('Pure Python'):
        for length, hist in groups.items():
            for step in range(NUM_STEPS):
                new_pop = [*islice(breed(select(hist[-1])), POP_SIZE)]
                hist.append(new_pop)

    from matplotlib.pyplot import plot, show, legend
    for length, hist in groups.items():
        xs = range(len(hist))
        ys = [mean([fitness(c) for c in pop]) for pop in hist]
        plot(xs, ys, label=f'{length = }')
    legend(loc='lower right')
    show()

from time import perf_counter_ns
from contextlib import contextmanager

@contextmanager
def timed(heading):
    start = perf_counter_ns()
    try:
        yield
    finally:
        stop = perf_counter_ns()
        print(f'{heading:<20} \N{greek capital letter delta}t: {round(stop - start, -3)/1e9}s')

from random import gauss
dot = lambda x, y: sum(x * y for x, y in zip(x, y))

with timed('Pure Python create'):
    xs = [gauss(0, 1) for _ in range(1_000_000)]
    ys = [gauss(0, 1) for _ in range(1_000_000)]

with timed('Pure Python compute'):
    dot(xs, ys)

from numpy.random import normal

with timed('numpy create'):
    xs = normal(size=1_000_000)
    ys = normal(size=1_000_000)

with timed('numpy compute'):
    xs.dot(ys)

from pandas import DataFrame, IndexSlice
from numpy.random import choice
from numpy import arange
from time import perf_counter_ns
from contextlib import contextmanager

@contextmanager
def timed(heading):
    start = perf_counter_ns()
    try:
        yield
    finally:
        stop = perf_counter_ns()
        print(f'{heading:<20} \N{greek capital letter delta}t: {round(stop - start, -3)/1e9}s')


from collections import namedtuple
class Step(namedtuple('Step', 'pop mates xover mutation children')):
    @classmethod
    def from_children(cls, children):
        return cls(None, None, None, None, children)

def random_chromosome(length):
    return choice([True, False], size=length)
def random_population(size, length):
    return choice([True, False], size=(size, length))
def breed(pop, size):
    # pick the mating pairs
    p = pop.mean(axis=1)
    p = p / p.sum()

    mates = choice(arange(size), size=(size, 2), p=p)
    left, right = pop[mates[:, 0]], pop[mates[:, 1]]

    # pick indices for left, right pair & cross over
    # mask = [0 1 1 0 1]
    #        [0 0 1 1 1]
    xover_mask = choice([True, False], size=pop.shape)
    xover_mask.sort(axis=1)

    mutation_mask = choice([True, False], p=(MUTATION_RATE, 1 - MUTATION_RATE), size=pop.shape)

    children = ((left * xover_mask) + (right * ~xover_mask)) ^ mutation_mask
    return Step(pop, mates, xover_mask, mutation_mask, children)

if __name__ == '__main__':
    MUTATION_RATE = 1e-4
    NUM_STEPS, POP_SIZE = 100, 100

    # { length: [ ... ] }
    groups = {length: [] for length in {5, 10, 25, 50}}

    for length, hist in groups.items():
        pop = random_population(POP_SIZE, length)
        step = Step.from_children(pop)
        hist.append(step)

    with timed('numpy'):
        for length, hist in groups.items():
            for _ in range(NUM_STEPS):
                new_step = breed(hist[-1].children, POP_SIZE)
                hist.append(new_step)

    if (show_graph := False):
        from matplotlib.pyplot import plot, show, legend
        for length, hist in groups.items():
            xs = range(len(hist))
            ys = [step.children.mean() for step in hist]
            plot(xs, ys, label=f'{length = }')
        legend(loc='lower right')
        show()

    df = DataFrame([(length, idx, step.children.mean()) for length, hist in groups.items()
                                                        for idx, step in enumerate(hist)]) 
    df.columns = 'length', 'step', 'fitness'
    df = df.set_index(['length', 'step']).sort_index()

    class Analysis(namedtuple('Analysis', 'raw size sliced increasing largest')):
        @classmethod
        def from_df(cls, raw, size):
            sliced = raw.loc[IndexSlice[size, :]]
            increasing  = sliced[(sliced.diff() > 0).rolling(3, min_periods=1).sum() == 3].dropna()
            return cls(raw, size, sliced, increasing, None)

    results = {length: Analysis.from_df(df, length) for length in groups}
    print(results[5].increasing)

from numpy.random import normal
xs = normal(size=(3,3))
print(xs)
print(f'{xs.__array_interface__["data"][0] = :#_x}')
print(f'{xs.dtype   = }')
print(f'{xs.shape   = }')
print(f'{xs.strides = }')

ys = xs[:, 2:]
print(f'{ys.__array_interface__["data"][0] = :#_x}')
print(f'{ys.shape   = }')
print(f'{ys.strides = }')

from numpy.random import randint
from time import perf_counter_ns
from contextlib import contextmanager

@contextmanager
def timed(heading):
    start = perf_counter_ns()
    try:
        yield
    finally:
        stop = perf_counter_ns()
        print(f'Elapsed \N{greek capital letter delta}t: {round(stop - start, -3)/1e9}s')

xs = randint(-1000, 1000, size=50_000_000)

ys = xs.copy()
with timed('attempt i'):
    ys[ys<0]  **= 2
    ys[ys>=0] **= 3

with timed('attempt ii'):
    mask = xs < 0
    zs = (xs ** 2) * mask + (xs ** 3) * ~mask

class A:
    def __init__9self):
        pass

from pandas import Series
s = Series([1, 2, 3], index=[*'abc'])
print(s)
print(s[0])
print(s['a'])

s = Series([1, 2, 3], index=[2, 1, 0])
print(s[0])
print(s.loc[0])  # lookup by label
print(s.iloc[0]) # lookup by position

class Dataset:
    def __init__(self, filename, ..., ..., ...):
        with open(filename) as f:
            ...

Dataset('input.csv')
Dataset('input.xml')
Dataset('input.json')
Dataset('input.xml')
Dataset('input.dat')

class Dataset:
    def __init__(self, records):
        self.records = records

    @classmethod
    def from_csv(cls, filename):
        with open(filename) as f:
            ...

    @classmethod
    def from_json(cls, filename):
        with open(filename) as f:
            ...

from pandas import DataFrame, IndexSlice
from numpy.random import choice
from numpy import arange
from time import perf_counter_ns
from contextlib import contextmanager
from xarray import DataArray

@contextmanager
def timed(heading):
    start = perf_counter_ns()
    try:
        yield
    finally:
        stop = perf_counter_ns()
        print(f'{heading:<20} \N{greek capital letter delta}t: {round(stop - start, -3)/1e9}s')


from collections import namedtuple
class Step(namedtuple('Step', 'pop mates xover mutation children')):
    @classmethod
    def from_children(cls, children):
        return cls(None, None, None, None, children)

def random_chromosome(length):
    return choice([True, False], size=length)
def random_population(size, length):
    return DataArray(
        choice([True, False], size=(size, length)),
        dims='size length'.split(),
    )
def breed(pop, size):
    # pick the mating pairs
    p = pop.mean(dim='length')
    p = p / p.sum()

    mates = DataArray(
        choice(arange(size), size=(size, 2), p=p),
        dims='size which'.split(),
        coords={
            'which': 'left right'.split(),
        },
    )
    left, right = pop[mates.sel(which='left')], pop[mates.sel(which='right')]

    # pick indices for left, right pair & cross over
    # mask = [0 1 1 0 1]
    #        [0 0 1 1 1]
    xover_mask = choice([True, False], size=pop.shape)
    xover_mask.sort(axis=1)

    mutation_mask = choice([True, False], p=(MUTATION_RATE, 1 - MUTATION_RATE), size=pop.shape)

    children = ((left * xover_mask) + (right * ~xover_mask)) ^ mutation_mask
    return Step(pop, mates, xover_mask, mutation_mask, children)

if __name__ == '__main__':
    MUTATION_RATE = 1e-4
    NUM_STEPS, POP_SIZE = 100, 100

    # { length: [ ... ] }
    groups = {length: [] for length in {5, 10, 25, 50}}

    for length, hist in groups.items():
        pop = random_population(POP_SIZE, length)
        step = Step.from_children(pop)
        hist.append(step)

    with timed('numpy'):
        for length, hist in groups.items():
            for _ in range(NUM_STEPS):
                new_step = breed(hist[-1].children, POP_SIZE)
                hist.append(new_step)

    if (show_graph := False):
        from matplotlib.pyplot import plot, show, legend
        for length, hist in groups.items():
            xs = range(len(hist))
            ys = [step.children.mean() for step in hist]
            plot(xs, ys, label=f'{length = }')
        legend(loc='lower right')
        show()

    df = DataFrame([(length, idx, float(step.children.mean())) for length, hist in groups.items()
                                                               for idx, step in enumerate(hist)]) 
    df.columns = 'length', 'step', 'fitness'
    df = df.set_index(['length', 'step']).sort_index()

    class Analysis(namedtuple('Analysis', 'raw size sliced increasing largest')):
        @classmethod
        def from_df(cls, raw, size):
            sliced = raw.loc[IndexSlice[size, :]]
            increasing  = sliced[(sliced.diff() > 0).rolling(3, min_periods=1).sum() == 3].dropna()
            return cls(raw, size, sliced, increasing, None)

    results = {length: Analysis.from_df(df, length) for length in groups}
    print(results[5].increasing)

from xarray import DataArray
from numpy.random import choice
board = DataArray(
    choice([True, False], size=(90, 6, 8, 8)).astype(int),
    dims=('move', 'piece', 'x', 'y'),
    coords={
        'piece': 'Pawn Rook Knight Bishop Queen King'.split(),
        'move': range(90),
        'x': range(1, 8+1),
        'y': range(1, 8+1),
    },
)

print(board.sel(piece='Pawn').sum(dim=('x', 'y')))
print(board.sel(move=0, x=[1,1.5,2], y=[2,2.5,3], method='nearest'))
print(board.interp(move=0, x=[1,1.5,2], y=[2,2.5,3], method='linear'))

What are decorators all about?

from pandas import MultiIndex, DataFrame, date_range

df = DataFrame({
    'x': [1, 2, 3],
    'y': [4, 5, 6],
})
df.index = *zip(['fsw-123', 'fsw-123', 'ssw-789'], date_range('2020-07-04', periods=3)),
MultIndex.from_tuples
MultIndex.from_product
MultIndex.from_arrays

# update anomaly
class Dataset:
    def __init__(self, xs):
        self.xs  = xs
        self.pos = [x for x in xs if x > 0]

from random import randrange
ds = Dataset([randrange(-1000, 1000) for _ in range(10)])
print(ds.xs)
print(ds.pos)

ds.xs.append(10_0000)
print(ds.xs)
print(ds.pos)

class Data:
    def __init__(self, x, y):
        if y == 0:
            raise ValueError("cannot initialize y to zero")
        self.x, self.y = x, y
    def foo(self):
        return self.x / self.y

d = Data(10, 200)
d.y = 0
#  d = Data(10, 0) # bug!!

class Data:
    def __init__(self, x, y):
        self.set_x(x)
        self.set_y(y)
    def get_x(self):
        return self._x
    def get_y(self):
        return self._y
    def set_x(self, x):
        self._x = x 
    def set_y(self, y):
        if y == 0:
            raise ValueError("cannot set y to zero")
        self._y = y
    def foo(self):
        return self.x / self.y

d = Data(10, 200)
d.set_y(0)

Descriptor protocol.

value = x.a # __getattr__
x.a = value # __setattr__

class A:
    def foo(self):
        return f'A.foo()',
class B(A):
    def foo(self):
        return f'B.foo()', *super().foo()
class C(A):
    def foo(self):
        return f'C.foo()', *super().foo()
class D(B, C):
    def foo(self):
        return f'D.foo()', *super().foo()
print(f'{A().foo() = }')
print(f'{B().foo() = }')
print(f'{C().foo() = }')
print(f'{D().foo() = }')
print(f'{A.__mro__ = }')
print(f'{B.__mro__ = }')
print(f'{C.__mro__ = }')
print(f'{D.__mro__ = }')

class B:
    z = 300

class C(B):
    y = 20
    def __init__(self):
        self.x = 1

def _getattr(obj, attr):
    if attr in obj.__dict__:
        return obj.__dict__[attr]
    for cls in type(obj).__mro__:
        if attr in cls.__dict__:
            rv = cls.__dict__[attr]
            if hasattr(type(rv), '__get__'):
                return rv.__get__(obj, cls)
            return rv
    raise AttributeError(f'no such attribute {attr}')

obj = C()
print(f'{obj.x              = }')
print(f'{ getattr(obj, "x") = }')
print(f'{_getattr(obj, "x") = }')
print(f'{ getattr(obj, "y") = }')
print(f'{_getattr(obj, "y") = }')
print(f'{ getattr(obj, "z") = }')
print(f'{_getattr(obj, "z") = }')

class A:
    pass
class B(A):
    pass
class C(B):
    pass
obj = C()
print(type(obj).__mro__)

class Datum:
    def __init__(self, x, y):
        self.x, self.y = x, y
    @property
    def x(self):
        print(f'getting x')
        return self._x
    @x.setter
    def x(self, value):
        print(f'setting x = {value}')
        self._x = value

d = Datum(10, 20)
#  print(f'{d.x = }')
d.x = ...

# update anomaly
class Dataset:
    def __init__(self, xs):
        self.xs  = xs
    @property
    def pos(self):
        return [x for x in self.xs if x > 0]

from random import randrange
ds = Dataset([randrange(-1000, 1000) for _ in range(10)])
print(ds.xs)
print(ds.pos)

ds.xs.append(10_0000)
print(ds.xs)
print(ds.pos)

class Data:
    def __init__(self, x, y):
        self.x = x
        self.y = y
    @property
    def y(self):
        return self._y
    @y.setter
    def y(self, y):
        if y == 0:
            raise ValueError("cannot set y to zero")
        self._y = y
    def foo(self):
        return self.x / self.y

d = Data(10, 200)
d.y = 10

class T:
    @property
    def x(self):
        pass
    @x.setter
    def x(self, value):
        pass

T().x = 10

class Dataset:
    def __init__(self, xs):
        self.xs = xs
    @property
    def pos_xs(self):
        return [x for x in self.xs if x > 0]

class Dataset:
    def __init__(self, xs):
        self.xs = xs
    @property
    def xs(self):
        return self._xs
    @xs.setter
    def xs(self, value):
        self._xs = value
        self.pos_xs = [x for x in value if x > 0]

from random import randrange
ds = Dataset([randrange(-100, 100) for _ in range(10)])
print(f'{ds.pos_xs = }')

for _ in range(10):
    def f():
        pass
f()

def do_twice(f, *args, **kwargs):
    f(*args, **kwargs)
    f(*args, **kwargs)

def hello(name):
    print(f'Hello, {name}!')

do_twice(hello, name='Mahmut')
do_twice(hello, name='Ji Yuan')

from time import perf_counter
from time import sleep
from random import random
from functools import wraps

def timed(f):
    @wraps(f)
    def new_func(*args, **kwargs):
        start = perf_counter()
        f(*args, **kwargs)
        stop = perf_counter()
        print(f'Elapsed \N{greek capital letter delta}t: {stop - start:.2f}s')
    return new_func

@timed
def slow(a):
    ''' do something slowly '''
    sleep(random())
    print(f'slow({a!r})')
#  slow = timed(slow)
#  help(slow)

@timed
def fast(a, b):
    sleep(random()/10)
    print(f'fast({a!r}, {b!r})')
#  fast = timed(fast)

slow(1)
slow(2)
fast(3, 4)
slow(5)

seminars.fb

Programming Fundamentals → “OO Design”

Seminar (Fri, Dec 18, 2020; 12 PM PST)

Questions

What is a collections.namedtuple?

What is a xarray.DataArray?

What is async/await?

Why numpy? Why pandas? Why xarray?

What are decorators all about?

What is a `collections.namedtuple`?

What is a `xarray.DataArray`?

What is `async/await`?

Why `numpy`? Why `pandas`? Why `xarray`?