Theme: Tools and Approaches
Topic: Function Design
Keywords: *args, **kwargs, functools.wraps, lambda functions; inspect.signature; decorators
Presenter | James Powell james@dutc.io |
Date | Friday, December 4, 2020 |
Time | 9:00 AM PST |
print('Good morning!')
from pandas import DataFrame
from numpy.random import normal
from collections import namedtuple
df = DataFrame({'a': normal(size=(size:=3)), 'b': normal(size=size)})
print(df)
from pandas import DataFrame
from numpy.random import normal
from collections import namedtuple
df1 = DataFrame({'a': normal(size=(size:=3)), 'b': normal(size=size)})
df2 = DataFrame({'a': normal(size=(size:=3)), 'b': normal(size=size)})
df1['c'] = df1['a'] + df1['b']
print(df1)
df2['c'] = df2['a'] + df2['b']
print(df2)
from pandas import DataFrame
from numpy.random import normal
from collections import namedtuple
df1 = DataFrame({'a': normal(size=(size:=3)), 'b': normal(size=size)})
df2 = DataFrame({'a': normal(size=(size:=3)), 'b': normal(size=size)})
df1_subset = df1[df1['a'] < df1['b']]
print(df1_subset)
df2_subset = df2[df2['a'] < df2['b']]
print(df2_subset)
from pandas import DataFrame
from numpy.random import normal
from collections import namedtuple
df1 = DataFrame({'a': normal(size=(size:=3)), 'b': normal(size=size)})
df2 = DataFrame({'a': normal(size=(size:=3)), 'b': normal(size=size)})
def subset(df):
return df[df['a'] < df['b']]
df1_subset = subset(df1)
df2_subset = subset(df2)
from pandas import DataFrame
from numpy.random import normal
from collections import namedtuple
df1 = DataFrame({'a': normal(size=(size:=3)), 'b': normal(size=size)})
df2 = DataFrame({'a': normal(size=(size:=3)), 'b': normal(size=size)})
Subset = namedtuple('Subset', 'orig subset')
def subset(df):
return Subset(df, df[df['a'] < df['b']])
df1_subset = subset(df1)
df2_subset = subset(df2)
print(df1_subset.orig)
print(df1_subset.subset)
from pandas import DataFrame
from numpy.random import normal
from collections import namedtuple
df1 = DataFrame({'a': normal(size=(size:=3)), 'b': normal(size=size)})
df2 = DataFrame({'a': normal(size=(size:=3)), 'b': normal(size=size)})
Inputs = namedtuple('Inputs', 'x y')
Subset = namedtuple('Subset', 'orig subset')
def subset(x, y):
return Inputs(
Subset(x, x[x['a'] < x['b']]),
Subset(y, y[y['a'] < y['b']]),
)
subsets = subset(df1, df2)
print(subsets.x.orig)
print(subsets.y.subset)
from pandas import DataFrame
from numpy.random import normal
from collections import namedtuple
df1 = DataFrame({'a': normal(size=(size:=3)), 'b': normal(size=size)})
df2 = DataFrame({'a': normal(size=(size:=3)), 'b': normal(size=size)})
class Inputs(namedtuple('InputsBase', 'x y')):
Subset = namedtuple('Subset', 'orig subset')
@classmethod
def from_df(cls, x, y):
return cls(
cls.Subset(x, x[x['a'] <= x['b']]),
cls.Subset(y, y[y['a'] <= y['b']]),
)
inputs = Inputs.from_df(x=df1, y=df2)
print(inputs.y.subset)
Functions are our most basic unit of modularity and computational structuring.
Thus, our design of functions should have, as its goal, the addition of useful structuring and useful modularity.
Structuring is the addition of out-of-band metadata—namely, how data interrelates so that it can be programmatically manipulated.
Modualarity is typically about avoiding repetition to avoid “update anomalies.”
lambda
, def
-functions, classes with __call__
, def
-generatorslambda
typically connotes either:
f = lambda df: df.mean()
rv = sorted({'a': 1, 'b': -2, 'c': 3}.items(), key=lambda kv: abs(kv[-1]))
print(rv)
def
function connotes:
f = lambda df: df.mean()
def f(df):
return df.mean()
class T:
def __call__(self, df):
return df.mean()
from pandas import DataFrame
from numpy.random import normal
class T:
def __init__(self, df):
self.df = df
def __call__(self):
return self.df.mean()
df = DataFrame({'a': normal(size=3)})
x = T(df)
x()
from pandas import DataFrame
from numpy.random import normal
def create_mean(df):
def mean():
return df.mean()
return mean
df = DataFrame({'a': normal(size=3)})
x = create_mean(df)
x()
from pandas import DataFrame
from numpy.random import normal
class T:
def __init__(self, df):
self.df = df
def first_pass(self):
self.df1 = self.df - self.df.mean()
def second_pass(self):
self.df2 = self.df1[self.df1 > 0]
def third_pass(self):
self.df3 = self.df2 * 10
df = DataFrame({'a': normal(size=3)})
x = T(df)
x.first_pass()
x.second_pass()
x.third_pass()
x.df3
from pandas import DataFrame
from numpy.random import normal
def g(df):
yield (df1 := df - df.mean())
yield (df2 := df1[df1 > 0])
yield (df3 := df2 * 10)
df = DataFrame({'a': normal(size=3)})
gi = g(df)
next(gi)
next(gi)
next(gi)
from pandas import DataFrame
from numpy.random import normal
class T:
def __init__(self, df):
self.df = df
def __getitem__(self, key):
return self.df[self.df['a'] > abs(key)]
x = T(DataFrame({'a': normal(size=3)}))
x[1]
def f():
pass
def
-functiondef f(df):
df.to_csv('output.csv')
...
...
return None
def f(df):
df.to_csv('output.csv')
...
...
...
return
def f(df):
df.to_csv('output.csv')
...
...
def f(*, b, a):
pass
f(a=None, b=None)
from pandas import read_csv
read_csv('file.csv', delimiter=',', headers=None)
def f(a, /):
pass
f(None)
from matplotlib.pyplot import hist
help(hist)
def f(a, b, c):
return
f(*'123')
f(*(1, 2, 3))
f(*[1, 2, 3])
f(*{1: 'one', 2: 'two', 3: 'three'})
f(*{1, 2, 3})
def f(a, b, c):
return a + b + c
f(*{1, 2, 3})
def f(nums):
return sum(nums)
f({1, 2, 3})
def f(*args):
pass
f(1, 2, 3)
def f(args):
pass
f([1, 2, 3])
def f(**kwargs):
pass
f(a=1, b=2, c=3)
def f(**kwargs):
pass
f({'a': 1, 'b': 2, 'c': 3)
x = 123
isinstance(x, {int, float})
x = 123
isinstance(x, (int, float))
class TMeta(type):
def __iter__(self):
return ()
class T(metaclass=TMeta):
pass
x = object()
isinstance(x, T)
x = 123
isinstance(x, *{int, float})
x = 123
isinstance(x, (*{int, float},))
# helpful for "wrapper" functions
def f(*args, **kwargs):
pass
f(1, 2, 3, a=1, b=2, c=3)
def f(data, mode=True):
pass
f(..., mode=False)
f(..., mode=False)
f(..., mode=False)
def helper(data):
return f(data, mode=False)
helper(...)
helper(...)
helper(...)
def f(data, mode=True, readonly=True):
pass
f(..., mode=False)
f(..., mode=False)
f(..., mode=False, readonly=False)
def helper(data, readonly):
return f(data, mode=False, readonly=readonly)
def helper(data, readonly=True):
return f(data, mode=False, readonly=readonly)
def helper(data, readonly=None):
if readonly is not None:
return f(data, mode=False, readonly=readonly)
return f(data, mode=False)
helper(...)
helper(...)
helper(..., readonly=True)
def f(data, mode=True, readonly=True):
pass
def helper(data, **kwargs):
return f(data, mode=False, **kwargs)
helper(...)
helper(...)
helper(..., readonly=True)
def f(data, mode=True, readonly=True):
pass
def helper(data, **kwargs):
return f(data, mode=False, **kwargs)
helper(...)
helper(..., mode=True)
def f(data, mode=True, readonly=True):
pass
def helper(data, **kwargs):
return f(data, **{'mode': False, **kwargs})
helper(...)
helper(..., mode=True)
An aside about *- and **-unpacking in literal syntax:
def f(xs, ys):
return xs + ys
print(f'{f(123, 456) = }')
print(f'{f("abc", "def") = }')
print(f'{f([1, 2, 3], [4, 5, 6]) = }')
print(f'{f((1, 2, 3), (4, 5, 6)) = }')
# print(f'{f((1, 2, 3), [4, 5, 6]) = }')
def f(xs, ys):
return list(xs) + list(ys)
print(f'{f((1, 2, 3), [4, 5, 6]) = }')
def f(xs, ys):
if not isinstance(xs, list):
xs = list(xs)
if not isinstance(ys, list):
ys = list(ys)
return xs + ys
print(f'{f((1, 2, 3), [4, 5, 6]) = }')
from itertools import chain
def f(xs, ys):
return list(chain(xs, ys))
print(f'{f((1, 2, 3), [4, 5, 6]) = }')
def f(xs, ys):
return [*xs, *ys]
print(f'{f((1, 2, 3), [4, 5, 6]) = }')
xs = [1, 2, 3]
ys = [5, 6, 7]
zs = [0, xs, 4, ys, 8]
print(f'{zs = }')
zs = [0, *xs, 4, *ys, 8]
print(f'{zs = }')
some_tuple = 1, 2, 3
some_tuple = (1, 2, 3)
some_list = [1, 2, 3]
some_dict = {1: 1, 2: 4, 3: 9}
some_set = {1, 2, 3}
empty_tuple = ()
empty_list = []
empty_dict = {}
empty_set = set()
# empty_set = {*''}
xs = [1, 2, 3, 4]
lc = [x**2 for x in xs]
sc = {x**2 for x in xs}
dc = {x: x**2 for x in xs}
ge = (x**2 for x in xs)
# tc = *(x**2 for x in xs),
print(f'{lc = }')
print(f'{sc = }')
print(f'{dc = }')
print(f'{ge = }')
# print(f'{tc = }')
d1 = {1: 'one', 2: 'two'}
d2 = { 2: 'zwei', 3: 'drei'}
d3 = d1.copy()
d3.update(d2)
print(f'{d1 = }')
print(f'{d2 = }')
print(f'{d3 = }')
from itertools import chain
d1 = {1: 'one', 2: 'two'}
d2 = { 2: 'zwei', 3: 'drei'}
d3 = dict(chain(d1.items(), d2.items()))
print(f'{d1 = }')
print(f'{d2 = }')
print(f'{d3 = }')
from itertools import chain
d1 = {1: 'one', 2: 'two'}
d2 = { 2: 'zwei', 3: 'drei'}
d3 = {**d1, **d2}
print(f'{d1 = }')
print(f'{d2 = }')
print(f'{d3 = }')
from itertools import chain
d1 = {1: 'one', 2: 'two'}
d2 = { 2: 'zwei', 3: 'drei'}
d3 = d1 | d2
print(f'{d1 = }')
print(f'{d2 = }')
print(f'{d3 = }')
Back to functions:
def f(field_a, field_b):
pass
f(1, 2)
Struct = namedtuple('Struct', 'a b')
def f(struct):
pass
f(Struct(1, 2))
def f(field_a, field_b):
if field_a > 10 and field_b < 20:
raise ValueError()
pass
f(1, 2)
class Struct(namedtuple('Struct', 'a b')):
def __new__(cls, a, b):
if a > 10 and b < 20:
raise ValueError()
return super().__new__(cls, a, b)
def f(struct):
pass
f(Struct(1, 2))
from pandas import DataFrame, to_datetime
df = DataFrame({'a': [1]}, index=to_datetime(['2020-07-04']))
print(df.loc['2020-07-04'])
print(f'{type(df.index[0]) = }')
https://github.com/python-variants/variants
# convenience layer
from variants import primary
@primary
def f(field_a, field_b):
pass
@f.variant('structured')
def f(struct):
pass
f('a', 'b')
f.structured(('a', 'b'))
def f(x, y):
# x and y are integers
pass
def f(x, y):
'''x and y are integers'''
print(f.__doc__)
def f(x, y):
'''
x: int
y: int
'''
print({k.strip(): eval(v.strip())
for line in f.__doc__.strip().splitlines()
for k, v in [line.split(':', 1)]
if ':' in line
})
def f(x : int, y : int):
pass
f.__annotations__
def f(x, y):
# x and y are pandas.DataFrames with a column 'a'
pass
def f(x, y):
'''
x and y are pandas.DataFrames with a column 'a'
'''
print(f.__doc__)
from pandas import DataFrame
def f(x, y):
'''
x: DataFrame(columns=['a'])
y: DataFrame(columns=['a'])
'''
print({k.strip(): eval(v.strip())
for line in f.__doc__.strip().splitlines()
for k, v in [line.split(':', 1)]
if ':' in line
})
from pandas import DataFrame
def f(x : DataFrame(columns=['a']), y : DataFrame(columns=['a'])):
pass
f.__annotations__
def pure(f):
f.pure = True
return f
@pure
def f():
pass
def g():
pass
print(f'{f.pure = }')
print(f"{getattr(g, 'pure', False) = }")
from collections.abc import Callable
class PureMeta(type):
def __instancecheck__(self, inst):
return isinstance(inst, Callable) and getattr(inst, 'pure', False)
class pure(metaclass=PureMeta):
def __call__(self, f):
f.pure = True
return f
@pure()
def f(x, y, *args):
return ...
return None
def g():
pass
isinstance(f, pure)
isinstance(g, pure)