hypothesis
”Theme: Tools and Approaches
Topic: Testing, Property Testing & hypothesis
Keywords: Python, data analysis, data engineering, numpy, pandas
presenter | James Powell james@dutc.io |
date | Wednesday, November 11, 2020 |
time | 12:00 PM PST |
Our goal is to write better tests at the “unit”-level (in this case, per function.)
def add(x, y):
return x + y
print(f'{add(1, 3) = }')
print(f'{add(2, 1) = }')
Here is a sample test. It’s not very good.
def add(x, y):
# NOTE: uncomment these lines to model
# an example of "bug" in `add`
if x < (y * 2):
return x - y
return x + y
# QUESTION: why is this test not very good?
# HINT: what if there is a bug in `add`?
def test_add():
assert add(4, 2) == 6
from pytest import main
if __name__ == '__main__':
main(['-q', __file__])
We can slightly improve the test using the hypothesis
library and test fixtures.
A test fixture is a way to automatically generate tests by choosing test inputs programmatically.
from hypothesis import given
from hypothesis.strategies import integers
def add(x, y):
# NOTE: uncomment these lines to model
# an example of "bug" in `add`
if x == 1:
return x - y
return x + y
# QUESTION: why is this test not very good?
# HINT: how do we determine if the test passed or failed?
# what is our “oracle”?
@given(x=integers(), y=integers())
def test_add(x, y):
assert add(x, y) == x + y
from pytest import main
if __name__ == '__main__':
main(['-q', __file__])
from hypothesis import given
from hypothesis.strategies import integers
def add(x, y):
# # NOTE: uncomment these lines to model
# # an example of "bug" in `add`
# if x < y * 2:
# return x - y
return x + y
# TASK: using `hypothesis`, write a better test
@given(x=integers(), y=integers(), z=integers())
def test_add(x, y, z):
assert add(x, y) == add(y, x)
assert add(x, add(y, z)) == add(add(x, y), z)
assert add(x, 0) == x
assert add(x, -x) == 0
a, b, c = sorted([x, y, z])
assert add(a, b) <= add(b, c)
from pytest import main
if __name__ == '__main__':
main(['-q', __file__])
from hypothesis import given
from hypothesis.strategies import integers, floats
from math import isclose
def add(x, y):
''' note: addition on floating point values is NOT associative! '''
return x + y
# TASK: using `hypothesis`, write a better test
@given(
x=floats(allow_nan=False, allow_infinity=False),
y=floats(allow_nan=False, allow_infinity=False),
z=floats(allow_nan=False, allow_infinity=False),
)
def test_add(x, y, z):
assert isclose(add(x, y), add(y, x), rel_tol=0.01)
# assert isclose(add(x, add(y, z)), add(add(x, y), z), rel_tol=0.01)
assert add(x, 0) == x
assert add(x, -x) == 0
a, b, c = sorted([x, y, z])
assert add(a, b) <= add(b, c)
from pytest import main
if __name__ == '__main__':
main(['-q', __file__])
from datetime import date, timedelta
from collections import deque
from hypothesis import given
from hypothesis.strategies import integers, lists
def pairwise(xs):
'''
get the elements of xs in pairs
e.g.,
>>> pairwise('abcdef')
('a', 'b'), ('b', 'c'), ('c', 'd'), ('d', 'e'), ('e', 'f')
'''
buf = deque(maxlen=2)
for x in xs:
buf.append(x)
if len(buf) == 2:
yield tuple(buf)
# print(f'{[*pairwise("abcdef")] = }')
def next_business_date(dt, n=1):
'''
compute the nth next business date
e.g.,
knowing Fri Jul 3, 2020 was a holiday (US Independence Day)
the third business date would be Wed Jul 8, 2020.
>>> from datetime import date
>>> next_business_date(date(2020, 7, 2), 3)
date(2020, 7, 8)
'''
holidays = {
'independence day': date(2020, 7, 3),
'thanksgiving': date(2020, 11, 26),
'christmas': date(2020, 12, 25),
"new year's eve": date(2020, 12, 31),
}
while n > 0:
while True:
dt = dt + timedelta(days=1)
if dt.weekday() not in {5, 6} and dt not in set(holidays.values()):
break
n -= 1
return dt
# print(f'{next_business_date(date(2020, 7, 2), 3) = }')
# TASK: using `hypothesis`, write better tests
# for the below
# NOTE: you can rewrite the above functions
# if that might make them easier to test
@given(xs=lists(integers()))
def test_pairwise(xs):
pairs = [*pairwise(xs)]
for prev_pair, curr_pair in pairwise(pairs):
assert prev_pair[-1] == curr_pair[0]
if len(xs) > 2:
assert pairs[0][0] == xs[0]
assert pairs[-1][-1] == xs[-1]
# a b c d e f
# a b
# b c
# c d
# d e
# e f
def test_next_business_date():
assert True
from pytest import main
if __name__ == '__main__':
main(['-q', __file__])
from datetime import datetime, timedelta
from collections import namedtuple
from collections import deque
def pairwise(xs):
'''
get the elements of xs in pairs
e.g.,
>>> pairwise('abcdef')
('a', 'b'), ('b', 'c'), ('c', 'd'), ('d', 'e'), ('e', 'f')
'''
buf = deque(maxlen=2)
for x in xs:
buf.append(x)
if len(buf) == 2:
yield tuple(buf)
Task = namedtuple('Task', 'name completed')
tasks = [
Task('start for the day', datetime.now() - timedelta(seconds=3)),
Task('reboot the device', datetime.now()),
Task('contact the vendor', datetime.now() + timedelta(seconds=5)),
Task('rma the device', datetime.now() + timedelta(seconds=15)),
]
tasks = sorted(tasks, key=lambda t: t.completed)
for prev_task, curr_task in pairwise(tasks):
print(f'{curr_task.name} {(curr_task.completed - prev_task.completed).total_seconds():.2f}')
from datetime import date, timedelta
from hypothesis import given
from hypothesis.strategies import dictionaries, dates, text, integers
US_HOLIDAYS = {
'independence day': date(2020, 7, 3),
'thanksgiving': date(2020, 11, 26),
'christmas': date(2020, 12, 25),
"new year's eve": date(2020, 12, 31),
}
def next_business_date(dt, n=1, *, holidays=US_HOLIDAYS):
while True:
while True:
dt = dt + timedelta(days=1)
if dt.weekday() not in {5, 6} and dt not in set(holidays.values()):
break
if n == 0:
break
n -= 1
return dt
@given(
holidays=dictionaries(keys=text(), values=dates(), min_size=1),
day=dates(),
n=integers(min_value=0, max_value=10),
)
def test_next_business_date(holidays, day, n):
hol = [*holidays.values()][0]
assert next_business_date(hol, 0) > hol
assert next_business_date(day, n) >= day
from pytest import main
if __name__ == '__main__':
main(['-q', __file__])
from datetime import date, timedelta
from hypothesis import given
from hypothesis.strategies import dictionaries, dates, text, integers
from enum import Enum
from itertools import islice, tee, takewhile
nwise = lambda g, n=2: zip(*(islice(g, i, None) for i, g in enumerate(tee(g, n))))
US_HOLIDAYS = {
'independence day': date(2020, 7, 3),
'thanksgiving': date(2020, 11, 26),
'christmas': date(2020, 12, 25),
"new year's eve": date(2020, 12, 31),
}
class Dir:
nbd = timedelta(days=1)
pbd = timedelta(days=-1)
def modify_business_date(dt, n=1, *, holidays=US_HOLIDAYS, direction=Dir.nbd):
if n < 0:
raise ValueError('n must be positive')
while True:
while True:
dt = dt + direction
if dt.weekday() not in {5, 6} and dt not in set(holidays.values()):
break
if n == 0:
break
n -= 1
return dt
def datecount(refdate, *, direction=Dir.nbd):
while True:
yield refdate
refdate += direction
def businessdays(refdate, *, direction=Dir.nbd, holidays=US_HOLIDAYS):
return (d for d in datecount(refdate, direction=direction)
if d.weekday() not in {5, 6} and d not in set(holidays.values()))
@given(
refdate=dates(),
)
def test_datecount(refdate):
dc = datecount(refdate)
dates = [next(dc), next(dc)]
assert dates[0] < dates[1]
assert dates[0] == refdate
assert (dates[1] - dates[0]).days == 1
@given(
refdate=dates(),
holidays=dictionaries(keys=text(), values=dates(), min_size=1),
)
def test_businessdays(refdate, holidays):
for pd, cd in nwise(islice(businessdays(refdate, holidays=holidays), 1_000)):
# no weekdays
assert cd.weekday() not in {5, 6}
# no holidays
assert cd not in set(holidays.values())
# should move monotonically forward
assert pd < cd
# dates in-between must be either holidays or weekdays
for d in takewhile(lambda d: d < cd, islice(datecount(pd), 1, None)):
assert d.weekday() in {5, 6} or d in set(holidays.values())
@given(
holidays=dictionaries(keys=text(), values=dates(), min_size=1),
day=dates(),
n=integers(min_value=0, max_value=10),
)
def test_modify_business_date(holidays, day, n):
hol = [*holidays.values()][0]
assert modify_business_date(hol, 0) > hol
assert modify_business_date(day, n) >= day
if day not in set(holidays.values()) and day.weekday() not in {5, 6}:
assert modify_business_date(modify_business_date(day, 1), 1, direction=Dir.pbd) == day
from pytest import main
if __name__ == '__main__':
main(['-q', __file__])
# i.
d1 = {1: 'one', 2: 'two'}
d2 = { 2: 'two', 3: 'three'}
d3 = {}
d3.update(d1)
d3.update(d2)
# ii.
d1 = {1: 'one', 2: 'two'}
d2 = { 2: 'two', 3: 'three'}
d3 = d1.copy()
d3.update(d2)
# iii.
from itertools import chain
d1 = {1: 'one', 2: 'two'}
d2 = { 2: 'two', 3: 'three'}
d3 = dict(chain(d1.items(), d2.items()))
# iv.
from itertools import chain
d1 = {1: 'one', 2: 'two'}
d2 = { 2: 'two', 3: 'three'}
d3 = {k: v for d in [d1, d2] for k, v in d.items()}
# v.
from collections import ChainMap
d1 = {1: 'one', 2: 'two'}
d2 = { 2: 'two', 3: 'three'}
d3 = dict(ChainMap(d1, d2))
# vi.
d1 = {1: 'one', 2: 'two'}
d2 = { 2: 'two', 3: 'three'}
d3 = {**d1, **d2}
# vii. Python ≥3.9
d1 = {1: 'one', 2: 'two'}
d2 = { 2: 'two', 3: 'three'}
d3 = d1 | d2
print(f'{d3 = }')
``` easy hard | care | —–|—— | don’t care | ``