seminars.fb

Tools & Approaches → “designing functions”

Seminar (Fri, Dec 4, 2020; 9 PM PST)

Theme: Tools and Approaches

Topic: Function Design

Keywords: *args, **kwargs, functools.wraps, lambda functions; inspect.signature; decorators

Presenter	James Powell james@dutc.io
Date	Friday, December 4, 2020
Time	9:00 AM PST

print('Good morning!')

from pandas import DataFrame
from numpy.random import normal
from collections import namedtuple
df = DataFrame({'a': normal(size=(size:=3)), 'b': normal(size=size)})
print(df)

from pandas import DataFrame
from numpy.random import normal
from collections import namedtuple
df1 = DataFrame({'a': normal(size=(size:=3)), 'b': normal(size=size)})
df2 = DataFrame({'a': normal(size=(size:=3)), 'b': normal(size=size)})

df1['c'] = df1['a'] + df1['b']
print(df1)

df2['c'] = df2['a'] + df2['b']
print(df2)

from pandas import DataFrame
from numpy.random import normal
from collections import namedtuple
df1 = DataFrame({'a': normal(size=(size:=3)), 'b': normal(size=size)})
df2 = DataFrame({'a': normal(size=(size:=3)), 'b': normal(size=size)})

df1_subset = df1[df1['a'] < df1['b']]
print(df1_subset)

df2_subset = df2[df2['a'] < df2['b']]
print(df2_subset)

from pandas import DataFrame
from numpy.random import normal
from collections import namedtuple
df1 = DataFrame({'a': normal(size=(size:=3)), 'b': normal(size=size)})
df2 = DataFrame({'a': normal(size=(size:=3)), 'b': normal(size=size)})

def subset(df):
    return df[df['a'] < df['b']]

df1_subset = subset(df1)
df2_subset = subset(df2)

from pandas import DataFrame
from numpy.random import normal
from collections import namedtuple
df1 = DataFrame({'a': normal(size=(size:=3)), 'b': normal(size=size)})
df2 = DataFrame({'a': normal(size=(size:=3)), 'b': normal(size=size)})

Subset = namedtuple('Subset', 'orig subset')

def subset(df):
    return Subset(df, df[df['a'] < df['b']])

df1_subset = subset(df1)
df2_subset = subset(df2)

print(df1_subset.orig)
print(df1_subset.subset)

from pandas import DataFrame
from numpy.random import normal
from collections import namedtuple
df1 = DataFrame({'a': normal(size=(size:=3)), 'b': normal(size=size)})
df2 = DataFrame({'a': normal(size=(size:=3)), 'b': normal(size=size)})

Inputs = namedtuple('Inputs', 'x y')
Subset = namedtuple('Subset', 'orig subset')

def subset(x, y):
    return Inputs(
        Subset(x, x[x['a'] < x['b']]),
        Subset(y, y[y['a'] < y['b']]),
    )

subsets = subset(df1, df2)
print(subsets.x.orig)
print(subsets.y.subset)

from pandas import DataFrame
from numpy.random import normal
from collections import namedtuple
df1 = DataFrame({'a': normal(size=(size:=3)), 'b': normal(size=size)})
df2 = DataFrame({'a': normal(size=(size:=3)), 'b': normal(size=size)})

class Inputs(namedtuple('InputsBase', 'x y')):
    Subset = namedtuple('Subset', 'orig subset')
    @classmethod
    def from_df(cls, x, y):
        return cls(
            cls.Subset(x, x[x['a'] <= x['b']]),
            cls.Subset(y, y[y['a'] <= y['b']]),            
        )
    
inputs = Inputs.from_df(x=df1, y=df2)
print(inputs.y.subset)

Theory

Functions are our most basic unit of modularity and computational structuring.

Thus, our design of functions should have, as its goal, the addition of useful structuring and useful modularity.

Structuring is the addition of out-of-band metadata—namely, how data interrelates so that it can be programmatically manipulated.

Modualarity is typically about avoiding repetition to avoid “update anomalies.”

Context: `lambda`, `def`-functions, classes with `call`, `def`-generators

lambda typically connotes either:

a function that is stateless (one single expression)
a function that is used for a single, adhoc purpose (and not intended for reuse)

f = lambda df: df.mean()

rv = sorted({'a': 1, 'b': -2, 'c': 3}.items(), key=lambda kv: abs(kv[-1]))
print(rv)

def function connotes:

some function that performs some computation or some action (may be stateful)
some function that may be reused

f = lambda df: df.mean()

def f(df):
    return df.mean()

class T:
    def __call__(self, df):
        return df.mean()

from pandas import DataFrame
from numpy.random import normal

class T:
    def __init__(self, df):
        self.df = df
    def __call__(self):
        return self.df.mean()

df = DataFrame({'a': normal(size=3)})
x = T(df)
x()

from pandas import DataFrame
from numpy.random import normal

def create_mean(df):
    def mean():
        return df.mean()
    return mean

df = DataFrame({'a': normal(size=3)})
x = create_mean(df)
x()

from pandas import DataFrame
from numpy.random import normal

class T:
    def __init__(self, df):
        self.df = df
    def first_pass(self):
        self.df1 = self.df - self.df.mean()
    def second_pass(self):
        self.df2 = self.df1[self.df1 > 0]
    def third_pass(self):
        self.df3 = self.df2 * 10

df = DataFrame({'a': normal(size=3)})
x = T(df)
x.first_pass()
x.second_pass()
x.third_pass()
x.df3

from pandas import DataFrame
from numpy.random import normal

def g(df):
    yield (df1 := df - df.mean())
    yield (df2 := df1[df1 > 0])
    yield (df3 := df2 * 10)

df = DataFrame({'a': normal(size=3)})
gi = g(df)
next(gi)
next(gi)
next(gi)

from pandas import DataFrame
from numpy.random import normal

class T:
    def __init__(self, df):
        self.df = df
    def __getitem__(self, key):
        return self.df[self.df['a'] > abs(key)]
    
x = T(DataFrame({'a': normal(size=3)}))

x[1]

def f():
    pass

Mechanics of the `def`-function

def f(df):
    df.to_csv('output.csv')
    ...
    ...
    return None

def f(df):
    df.to_csv('output.csv')
    ...
    ...
    ...
    return

def f(df):
    df.to_csv('output.csv')
    ...
    ...

def f(*, b, a):
    pass

f(a=None, b=None)

from pandas import read_csv
read_csv('file.csv', delimiter=',', headers=None)

def f(a, /):
    pass
f(None)

from matplotlib.pyplot import hist
help(hist)

def f(a, b, c):
    return

f(*'123')
f(*(1, 2, 3))
f(*[1, 2, 3])
f(*{1: 'one', 2: 'two', 3: 'three'})
f(*{1, 2, 3})

def f(a, b, c):
    return a + b + c
f(*{1, 2, 3})

def f(nums):
    return sum(nums)
f({1, 2, 3})

def f(*args):
    pass

f(1, 2, 3)

def f(args):
    pass

f([1, 2, 3])

def f(**kwargs):
    pass

f(a=1, b=2, c=3)

def f(**kwargs):
    pass

f({'a': 1, 'b': 2, 'c': 3)

x = 123
isinstance(x, {int, float})

x = 123
isinstance(x, (int, float))

class TMeta(type):
    def __iter__(self):
        return ()
class T(metaclass=TMeta):
    pass

x = object()
isinstance(x, T)

x = 123
isinstance(x, *{int, float})

x = 123
isinstance(x, (*{int, float},))

# helpful for "wrapper" functions
def f(*args, **kwargs):
    pass

f(1, 2, 3, a=1, b=2, c=3)

def f(data, mode=True):
    pass

f(..., mode=False)
f(..., mode=False)
f(..., mode=False)

def helper(data):
    return f(data, mode=False)

helper(...)
helper(...)
helper(...)

def f(data, mode=True, readonly=True):
    pass

f(..., mode=False)
f(..., mode=False)
f(..., mode=False, readonly=False)

def helper(data, readonly):
    return f(data, mode=False, readonly=readonly)

def helper(data, readonly=True):
    return f(data, mode=False, readonly=readonly)

def helper(data, readonly=None):
    if readonly is not None:
        return f(data, mode=False, readonly=readonly)
    return f(data, mode=False)

helper(...)
helper(...)
helper(..., readonly=True)

def f(data, mode=True, readonly=True):
    pass

def helper(data, **kwargs):
    return f(data, mode=False, **kwargs)

helper(...)
helper(...)
helper(..., readonly=True)

def f(data, mode=True, readonly=True):
    pass

def helper(data, **kwargs):
    return f(data, mode=False, **kwargs)

helper(...)
helper(..., mode=True)

def f(data, mode=True, readonly=True):
    pass

def helper(data, **kwargs):
    return f(data, **{'mode': False, **kwargs})

helper(...)
helper(..., mode=True)

An aside about *- and **-unpacking in literal syntax:

def f(xs, ys):
    return xs + ys

print(f'{f(123, 456)             = }')
print(f'{f("abc", "def")         = }')
print(f'{f([1, 2, 3], [4, 5, 6]) = }')
print(f'{f((1, 2, 3), (4, 5, 6)) = }')
#  print(f'{f((1, 2, 3), [4, 5, 6]) = }')

def f(xs, ys):
    return list(xs) + list(ys)

print(f'{f((1, 2, 3), [4, 5, 6]) = }')

def f(xs, ys):
    if not isinstance(xs, list):
        xs = list(xs) 
    if not isinstance(ys, list):
        ys = list(ys) 
    return xs + ys

print(f'{f((1, 2, 3), [4, 5, 6]) = }')

from itertools import chain
def f(xs, ys):
    return list(chain(xs, ys))

print(f'{f((1, 2, 3), [4, 5, 6]) = }')

def f(xs, ys):
    return [*xs, *ys]

print(f'{f((1, 2, 3), [4, 5, 6]) = }')

xs = [1, 2, 3]
ys = [5, 6, 7]
zs = [0, xs, 4, ys, 8]
print(f'{zs = }')
zs = [0, *xs, 4, *ys, 8]
print(f'{zs = }')

some_tuple =  1, 2, 3

some_tuple = (1, 2, 3)
some_list  = [1, 2, 3]
some_dict  = {1: 1, 2: 4, 3: 9}
some_set   = {1, 2, 3}

empty_tuple = ()
empty_list  = []
empty_dict  = {}
empty_set   = set()
#  empty_set   = {*''}

xs = [1, 2, 3, 4]
lc = [x**2 for x in xs]
sc = {x**2 for x in xs}
dc = {x: x**2 for x in xs}
ge = (x**2 for x in xs)
#  tc = *(x**2 for x in xs),

print(f'{lc = }')
print(f'{sc = }')
print(f'{dc = }')
print(f'{ge = }')
#  print(f'{tc = }')

d1 = {1: 'one', 2: 'two'}
d2 = {          2: 'zwei', 3: 'drei'}

d3 = d1.copy()
d3.update(d2)
print(f'{d1 = }')
print(f'{d2 = }')
print(f'{d3 = }')

from itertools import chain
d1 = {1: 'one', 2: 'two'}
d2 = {          2: 'zwei', 3: 'drei'}

d3 = dict(chain(d1.items(), d2.items()))
print(f'{d1 = }')
print(f'{d2 = }')
print(f'{d3 = }')

from itertools import chain
d1 = {1: 'one', 2: 'two'}
d2 = {          2: 'zwei', 3: 'drei'}

d3 = {**d1, **d2}
print(f'{d1 = }')
print(f'{d2 = }')
print(f'{d3 = }')

from itertools import chain
d1 = {1: 'one', 2: 'two'}
d2 = {          2: 'zwei', 3: 'drei'}

d3 = d1 | d2
print(f'{d1 = }')
print(f'{d2 = }')
print(f'{d3 = }')

Back to functions:

def f(field_a, field_b):
    pass
f(1, 2)

Struct = namedtuple('Struct', 'a b')
def f(struct):
    pass

f(Struct(1, 2))

def f(field_a, field_b):
    if field_a > 10 and field_b < 20:
        raise ValueError()
    pass
f(1, 2)

class Struct(namedtuple('Struct', 'a b')):
    def __new__(cls, a, b):
        if a > 10 and b < 20:
            raise ValueError()
        return super().__new__(cls, a, b)

def f(struct):
    pass

f(Struct(1, 2))

from pandas import DataFrame, to_datetime
df = DataFrame({'a': [1]}, index=to_datetime(['2020-07-04']))

print(df.loc['2020-07-04'])
print(f'{type(df.index[0]) = }')

https://github.com/python-variants/variants

# convenience layer
from variants import primary 

@primary
def f(field_a, field_b):
    pass

@f.variant('structured')
def f(struct):
    pass

f('a', 'b')
f.structured(('a', 'b'))

def f(x, y):
    # x and y are integers
    pass

def f(x, y):
    '''x and y are integers'''
print(f.__doc__)

def f(x, y):
    '''
    x: int
    y: int
    '''
print({k.strip(): eval(v.strip())
         for line in f.__doc__.strip().splitlines() 
         for k, v in [line.split(':', 1)]
         if ':' in line
        })

def f(x : int, y : int):
    pass
f.__annotations__

def f(x, y):
    # x and y are pandas.DataFrames with a column 'a'
    pass

def f(x, y):
    '''
    x and y are pandas.DataFrames with a column 'a'
    '''
print(f.__doc__)

from pandas import DataFrame
def f(x, y):
    '''
    x: DataFrame(columns=['a'])
    y: DataFrame(columns=['a'])
    '''
print({k.strip(): eval(v.strip())
         for line in f.__doc__.strip().splitlines() 
         for k, v in [line.split(':', 1)]
         if ':' in line
        })

from pandas import DataFrame
def f(x : DataFrame(columns=['a']), y : DataFrame(columns=['a'])):
    pass
f.__annotations__

def pure(f):
    f.pure = True
    return f

@pure
def f():
    pass

def g():
    pass

print(f'{f.pure = }')
print(f"{getattr(g, 'pure', False) = }")

from collections.abc import Callable
class PureMeta(type):
    def __instancecheck__(self, inst):
        return isinstance(inst, Callable) and getattr(inst, 'pure', False)
class pure(metaclass=PureMeta):
    def __call__(self, f):
        f.pure = True
        return f
    
@pure()
def f(x, y, *args):
    return ...
    return None

def g():
    pass

isinstance(f, pure)
isinstance(g, pure)