seminars.fb

Tools & Approaches → “designing functions”

Seminar (Fri, Dec 4, 2020; 9 PM PST)

Theme: Tools and Approaches

Topic: Function Design

Keywords: *args, **kwargs, functools.wraps, lambda functions; inspect.signature; decorators

Presenter James Powell james@dutc.io
Date Friday, December 4, 2020
Time 9:00 AM PST
print('Good morning!')
from pandas import DataFrame
from numpy.random import normal
from collections import namedtuple
df = DataFrame({'a': normal(size=(size:=3)), 'b': normal(size=size)})
print(df)
from pandas import DataFrame
from numpy.random import normal
from collections import namedtuple
df1 = DataFrame({'a': normal(size=(size:=3)), 'b': normal(size=size)})
df2 = DataFrame({'a': normal(size=(size:=3)), 'b': normal(size=size)})

df1['c'] = df1['a'] + df1['b']
print(df1)

df2['c'] = df2['a'] + df2['b']
print(df2)
from pandas import DataFrame
from numpy.random import normal
from collections import namedtuple
df1 = DataFrame({'a': normal(size=(size:=3)), 'b': normal(size=size)})
df2 = DataFrame({'a': normal(size=(size:=3)), 'b': normal(size=size)})

df1_subset = df1[df1['a'] < df1['b']]
print(df1_subset)

df2_subset = df2[df2['a'] < df2['b']]
print(df2_subset)
from pandas import DataFrame
from numpy.random import normal
from collections import namedtuple
df1 = DataFrame({'a': normal(size=(size:=3)), 'b': normal(size=size)})
df2 = DataFrame({'a': normal(size=(size:=3)), 'b': normal(size=size)})

def subset(df):
    return df[df['a'] < df['b']]

df1_subset = subset(df1)
df2_subset = subset(df2)
from pandas import DataFrame
from numpy.random import normal
from collections import namedtuple
df1 = DataFrame({'a': normal(size=(size:=3)), 'b': normal(size=size)})
df2 = DataFrame({'a': normal(size=(size:=3)), 'b': normal(size=size)})

Subset = namedtuple('Subset', 'orig subset')

def subset(df):
    return Subset(df, df[df['a'] < df['b']])

df1_subset = subset(df1)
df2_subset = subset(df2)

print(df1_subset.orig)
print(df1_subset.subset)
from pandas import DataFrame
from numpy.random import normal
from collections import namedtuple
df1 = DataFrame({'a': normal(size=(size:=3)), 'b': normal(size=size)})
df2 = DataFrame({'a': normal(size=(size:=3)), 'b': normal(size=size)})

Inputs = namedtuple('Inputs', 'x y')
Subset = namedtuple('Subset', 'orig subset')

def subset(x, y):
    return Inputs(
        Subset(x, x[x['a'] < x['b']]),
        Subset(y, y[y['a'] < y['b']]),
    )

subsets = subset(df1, df2)
print(subsets.x.orig)
print(subsets.y.subset)
from pandas import DataFrame
from numpy.random import normal
from collections import namedtuple
df1 = DataFrame({'a': normal(size=(size:=3)), 'b': normal(size=size)})
df2 = DataFrame({'a': normal(size=(size:=3)), 'b': normal(size=size)})

class Inputs(namedtuple('InputsBase', 'x y')):
    Subset = namedtuple('Subset', 'orig subset')
    @classmethod
    def from_df(cls, x, y):
        return cls(
            cls.Subset(x, x[x['a'] <= x['b']]),
            cls.Subset(y, y[y['a'] <= y['b']]),            
        )
    
inputs = Inputs.from_df(x=df1, y=df2)
print(inputs.y.subset)

Theory

Functions are our most basic unit of modularity and computational structuring.

Thus, our design of functions should have, as its goal, the addition of useful structuring and useful modularity.

Structuring is the addition of out-of-band metadata—namely, how data interrelates so that it can be programmatically manipulated.

Modualarity is typically about avoiding repetition to avoid “update anomalies.”

Context: lambda, def-functions, classes with __call__, def-generators

lambda typically connotes either:

f = lambda df: df.mean()
rv = sorted({'a': 1, 'b': -2, 'c': 3}.items(), key=lambda kv: abs(kv[-1]))
print(rv)

def function connotes:

f = lambda df: df.mean()

def f(df):
    return df.mean()

class T:
    def __call__(self, df):
        return df.mean()
from pandas import DataFrame
from numpy.random import normal

class T:
    def __init__(self, df):
        self.df = df
    def __call__(self):
        return self.df.mean()

df = DataFrame({'a': normal(size=3)})
x = T(df)
x()
from pandas import DataFrame
from numpy.random import normal

def create_mean(df):
    def mean():
        return df.mean()
    return mean

df = DataFrame({'a': normal(size=3)})
x = create_mean(df)
x()
from pandas import DataFrame
from numpy.random import normal

class T:
    def __init__(self, df):
        self.df = df
    def first_pass(self):
        self.df1 = self.df - self.df.mean()
    def second_pass(self):
        self.df2 = self.df1[self.df1 > 0]
    def third_pass(self):
        self.df3 = self.df2 * 10

df = DataFrame({'a': normal(size=3)})
x = T(df)
x.first_pass()
x.second_pass()
x.third_pass()
x.df3
from pandas import DataFrame
from numpy.random import normal

def g(df):
    yield (df1 := df - df.mean())
    yield (df2 := df1[df1 > 0])
    yield (df3 := df2 * 10)

df = DataFrame({'a': normal(size=3)})
gi = g(df)
next(gi)
next(gi)
next(gi)
from pandas import DataFrame
from numpy.random import normal

class T:
    def __init__(self, df):
        self.df = df
    def __getitem__(self, key):
        return self.df[self.df['a'] > abs(key)]
    
x = T(DataFrame({'a': normal(size=3)}))

x[1]
def f():
    pass

Mechanics of the def-function

def f(df):
    df.to_csv('output.csv')
    ...
    ...
    return None

def f(df):
    df.to_csv('output.csv')
    ...
    ...
    ...
    return

def f(df):
    df.to_csv('output.csv')
    ...
    ...
def f(*, b, a):
    pass

f(a=None, b=None)
from pandas import read_csv
read_csv('file.csv', delimiter=',', headers=None)
def f(a, /):
    pass
f(None)
from matplotlib.pyplot import hist
help(hist)
def f(a, b, c):
    return

f(*'123')
f(*(1, 2, 3))
f(*[1, 2, 3])
f(*{1: 'one', 2: 'two', 3: 'three'})
f(*{1, 2, 3})
def f(a, b, c):
    return a + b + c
f(*{1, 2, 3})
def f(nums):
    return sum(nums)
f({1, 2, 3})
def f(*args):
    pass

f(1, 2, 3)
def f(args):
    pass

f([1, 2, 3])
def f(**kwargs):
    pass

f(a=1, b=2, c=3)
def f(**kwargs):
    pass

f({'a': 1, 'b': 2, 'c': 3)
x = 123
isinstance(x, {int, float})
x = 123
isinstance(x, (int, float))
class TMeta(type):
    def __iter__(self):
        return ()
class T(metaclass=TMeta):
    pass

x = object()
isinstance(x, T)
x = 123
isinstance(x, *{int, float})
x = 123
isinstance(x, (*{int, float},))
# helpful for "wrapper" functions
def f(*args, **kwargs):
    pass

f(1, 2, 3, a=1, b=2, c=3)
def f(data, mode=True):
    pass

f(..., mode=False)
f(..., mode=False)
f(..., mode=False)

def helper(data):
    return f(data, mode=False)

helper(...)
helper(...)
helper(...)
def f(data, mode=True, readonly=True):
    pass

f(..., mode=False)
f(..., mode=False)
f(..., mode=False, readonly=False)

def helper(data, readonly):
    return f(data, mode=False, readonly=readonly)

def helper(data, readonly=True):
    return f(data, mode=False, readonly=readonly)

def helper(data, readonly=None):
    if readonly is not None:
        return f(data, mode=False, readonly=readonly)
    return f(data, mode=False)

helper(...)
helper(...)
helper(..., readonly=True)
def f(data, mode=True, readonly=True):
    pass

def helper(data, **kwargs):
    return f(data, mode=False, **kwargs)

helper(...)
helper(...)
helper(..., readonly=True)
def f(data, mode=True, readonly=True):
    pass

def helper(data, **kwargs):
    return f(data, mode=False, **kwargs)

helper(...)
helper(..., mode=True)
def f(data, mode=True, readonly=True):
    pass

def helper(data, **kwargs):
    return f(data, **{'mode': False, **kwargs})

helper(...)
helper(..., mode=True)

An aside about *- and **-unpacking in literal syntax:

def f(xs, ys):
    return xs + ys

print(f'{f(123, 456)             = }')
print(f'{f("abc", "def")         = }')
print(f'{f([1, 2, 3], [4, 5, 6]) = }')
print(f'{f((1, 2, 3), (4, 5, 6)) = }')
#  print(f'{f((1, 2, 3), [4, 5, 6]) = }')
def f(xs, ys):
    return list(xs) + list(ys)

print(f'{f((1, 2, 3), [4, 5, 6]) = }')
def f(xs, ys):
    if not isinstance(xs, list):
        xs = list(xs) 
    if not isinstance(ys, list):
        ys = list(ys) 
    return xs + ys

print(f'{f((1, 2, 3), [4, 5, 6]) = }')
from itertools import chain
def f(xs, ys):
    return list(chain(xs, ys))

print(f'{f((1, 2, 3), [4, 5, 6]) = }')
def f(xs, ys):
    return [*xs, *ys]

print(f'{f((1, 2, 3), [4, 5, 6]) = }')
xs = [1, 2, 3]
ys = [5, 6, 7]
zs = [0, xs, 4, ys, 8]
print(f'{zs = }')
zs = [0, *xs, 4, *ys, 8]
print(f'{zs = }')
some_tuple =  1, 2, 3

some_tuple = (1, 2, 3)
some_list  = [1, 2, 3]
some_dict  = {1: 1, 2: 4, 3: 9}
some_set   = {1, 2, 3}

empty_tuple = ()
empty_list  = []
empty_dict  = {}
empty_set   = set()
#  empty_set   = {*''}
xs = [1, 2, 3, 4]
lc = [x**2 for x in xs]
sc = {x**2 for x in xs}
dc = {x: x**2 for x in xs}
ge = (x**2 for x in xs)
#  tc = *(x**2 for x in xs),

print(f'{lc = }')
print(f'{sc = }')
print(f'{dc = }')
print(f'{ge = }')
#  print(f'{tc = }')
d1 = {1: 'one', 2: 'two'}
d2 = {          2: 'zwei', 3: 'drei'}

d3 = d1.copy()
d3.update(d2)
print(f'{d1 = }')
print(f'{d2 = }')
print(f'{d3 = }')
from itertools import chain
d1 = {1: 'one', 2: 'two'}
d2 = {          2: 'zwei', 3: 'drei'}

d3 = dict(chain(d1.items(), d2.items()))
print(f'{d1 = }')
print(f'{d2 = }')
print(f'{d3 = }')
from itertools import chain
d1 = {1: 'one', 2: 'two'}
d2 = {          2: 'zwei', 3: 'drei'}

d3 = {**d1, **d2}
print(f'{d1 = }')
print(f'{d2 = }')
print(f'{d3 = }')
from itertools import chain
d1 = {1: 'one', 2: 'two'}
d2 = {          2: 'zwei', 3: 'drei'}

d3 = d1 | d2
print(f'{d1 = }')
print(f'{d2 = }')
print(f'{d3 = }')

Back to functions:

def f(field_a, field_b):
    pass
f(1, 2)
Struct = namedtuple('Struct', 'a b')
def f(struct):
    pass

f(Struct(1, 2))
def f(field_a, field_b):
    if field_a > 10 and field_b < 20:
        raise ValueError()
    pass
f(1, 2)
class Struct(namedtuple('Struct', 'a b')):
    def __new__(cls, a, b):
        if a > 10 and b < 20:
            raise ValueError()
        return super().__new__(cls, a, b)

def f(struct):
    pass

f(Struct(1, 2))
from pandas import DataFrame, to_datetime
df = DataFrame({'a': [1]}, index=to_datetime(['2020-07-04']))

print(df.loc['2020-07-04'])
print(f'{type(df.index[0]) = }')

https://github.com/python-variants/variants

# convenience layer
from variants import primary 

@primary
def f(field_a, field_b):
    pass

@f.variant('structured')
def f(struct):
    pass

f('a', 'b')
f.structured(('a', 'b'))
def f(x, y):
    # x and y are integers
    pass
def f(x, y):
    '''x and y are integers'''
print(f.__doc__)
def f(x, y):
    '''
    x: int
    y: int
    '''
print({k.strip(): eval(v.strip())
         for line in f.__doc__.strip().splitlines() 
         for k, v in [line.split(':', 1)]
         if ':' in line
        })
def f(x : int, y : int):
    pass
f.__annotations__
def f(x, y):
    # x and y are pandas.DataFrames with a column 'a'
    pass
def f(x, y):
    '''
    x and y are pandas.DataFrames with a column 'a'
    '''
print(f.__doc__)
from pandas import DataFrame
def f(x, y):
    '''
    x: DataFrame(columns=['a'])
    y: DataFrame(columns=['a'])
    '''
print({k.strip(): eval(v.strip())
         for line in f.__doc__.strip().splitlines() 
         for k, v in [line.split(':', 1)]
         if ':' in line
        })
from pandas import DataFrame
def f(x : DataFrame(columns=['a']), y : DataFrame(columns=['a'])):
    pass
f.__annotations__
def pure(f):
    f.pure = True
    return f

@pure
def f():
    pass

def g():
    pass

print(f'{f.pure = }')
print(f"{getattr(g, 'pure', False) = }")
from collections.abc import Callable
class PureMeta(type):
    def __instancecheck__(self, inst):
        return isinstance(inst, Callable) and getattr(inst, 'pure', False)
class pure(metaclass=PureMeta):
    def __call__(self, f):
        f.pure = True
        return f
    
@pure()
def f(x, y, *args):
    return ...
    return None

def g():
    pass

isinstance(f, pure)
isinstance(g, pure)