seminars.fb

Tools and Approaches → “Testing, Property Testing & hypothesis

Discussion (Wed, Nov 11, 2020; 12 PM PST)

Theme: Tools and Approaches

Topic: Testing, Property Testing & hypothesis

Keywords: Python, data analysis, data engineering, numpy, pandas

presenter James Powell james@dutc.io
date Wednesday, November 11, 2020
time 12:00 PM PST

Our goal is to write better tests at the “unit”-level (in this case, per function.)

def add(x, y):
    return x + y

print(f'{add(1, 3) = }')
print(f'{add(2, 1) = }')

Here is a sample test. It’s not very good.

def add(x, y):
    # NOTE: uncomment these lines to model
    #       an example of "bug" in `add`
    if x < (y * 2):
        return x - y
    return x + y

# QUESTION: why is this test not very good?
#     HINT: what if there is a bug in `add`?
def test_add():
    assert add(4, 2) == 6

from pytest import main
if __name__ == '__main__':
    main(['-q', __file__])

We can slightly improve the test using the hypothesis library and test fixtures.

A test fixture is a way to automatically generate tests by choosing test inputs programmatically.

from hypothesis import given
from hypothesis.strategies import integers

def add(x, y):
    # NOTE: uncomment these lines to model
    #       an example of "bug" in `add`
    if x == 1:
        return x - y
    return x + y

# QUESTION: why is this test not very good?
#     HINT: how do we determine if the test passed or failed?
#           what is our “oracle”?
@given(x=integers(), y=integers())
def test_add(x, y):
    assert add(x, y) == x + y

from pytest import main
if __name__ == '__main__':
    main(['-q', __file__])
from hypothesis import given
from hypothesis.strategies import integers

def add(x, y):
    #  # NOTE: uncomment these lines to model
    #  #       an example of "bug" in `add`
    #  if x < y * 2:
    #      return x - y
    return x + y

# TASK: using `hypothesis`, write a better test
@given(x=integers(), y=integers(), z=integers())
def test_add(x, y, z):
    assert add(x, y) == add(y, x)
    assert add(x, add(y, z)) == add(add(x, y), z)
    assert add(x,  0) == x
    assert add(x, -x) == 0
    a, b, c = sorted([x, y, z])
    assert add(a, b) <= add(b, c)

from pytest import main
if __name__ == '__main__':
    main(['-q', __file__])
from hypothesis import given
from hypothesis.strategies import integers, floats
from math import isclose

def add(x, y):
    ''' note: addition on floating point values is NOT associative! '''
    return x + y

# TASK: using `hypothesis`, write a better test
@given(
    x=floats(allow_nan=False, allow_infinity=False),
    y=floats(allow_nan=False, allow_infinity=False),
    z=floats(allow_nan=False, allow_infinity=False),
)
def test_add(x, y, z):
    assert isclose(add(x, y), add(y, x), rel_tol=0.01)
    #  assert isclose(add(x, add(y, z)), add(add(x, y), z), rel_tol=0.01)
    assert add(x,  0) == x
    assert add(x, -x) == 0
    a, b, c = sorted([x, y, z])
    assert add(a, b) <= add(b, c)

from pytest import main
if __name__ == '__main__':
    main(['-q', __file__])
from datetime import date, timedelta
from collections import deque
from hypothesis import given
from hypothesis.strategies import integers, lists

def pairwise(xs):
    '''
    get the elements of xs in pairs
    e.g.,
    >>> pairwise('abcdef')
    ('a', 'b'), ('b', 'c'), ('c', 'd'), ('d', 'e'), ('e', 'f')
    '''
    buf = deque(maxlen=2)
    for x in xs:
        buf.append(x)
        if len(buf) == 2:
            yield tuple(buf)
#  print(f'{[*pairwise("abcdef")] = }')

def next_business_date(dt, n=1):
    '''
    compute the nth next business date
    e.g.,
    knowing Fri Jul 3, 2020 was a holiday (US Independence Day)
    the third business date would be Wed Jul 8, 2020.
    >>> from datetime import date
    >>> next_business_date(date(2020, 7, 2), 3)
    date(2020, 7, 8)
    '''
    holidays = {
        'independence day': date(2020,  7,  3),
        'thanksgiving':     date(2020, 11, 26),
        'christmas':        date(2020, 12, 25),
        "new year's eve":   date(2020, 12, 31),
    }
    while n > 0:
        while True:
            dt = dt + timedelta(days=1)
            if dt.weekday() not in {5, 6} and dt not in set(holidays.values()):
                break
        n -= 1
    return dt
#  print(f'{next_business_date(date(2020, 7, 2), 3) = }')

# TASK: using `hypothesis`, write better tests
#       for the below
# NOTE: you can rewrite the above functions
#       if that might make them easier to test
@given(xs=lists(integers()))
def test_pairwise(xs):
    pairs = [*pairwise(xs)]
    for prev_pair, curr_pair in pairwise(pairs):
        assert prev_pair[-1] == curr_pair[0]
    if len(xs) > 2:
        assert pairs[0][0]   == xs[0]
        assert pairs[-1][-1] == xs[-1]

# a b c d e f
# a b
#   b c
#     c d
#       d e 
#         e f

def test_next_business_date():
    assert True

from pytest import main
if __name__ == '__main__':
    main(['-q', __file__])
from datetime import datetime, timedelta
from collections import namedtuple
from collections import deque

def pairwise(xs):
    '''
    get the elements of xs in pairs
    e.g.,
    >>> pairwise('abcdef')
    ('a', 'b'), ('b', 'c'), ('c', 'd'), ('d', 'e'), ('e', 'f')
    '''
    buf = deque(maxlen=2)
    for x in xs:
        buf.append(x)
        if len(buf) == 2:
            yield tuple(buf)

Task = namedtuple('Task', 'name completed')
tasks = [
    Task('start for the day', datetime.now() - timedelta(seconds=3)),
    Task('reboot the device', datetime.now()),
    Task('contact the vendor', datetime.now() + timedelta(seconds=5)),
    Task('rma the device', datetime.now() + timedelta(seconds=15)),
]

tasks = sorted(tasks, key=lambda t: t.completed)
for prev_task, curr_task in pairwise(tasks):
    print(f'{curr_task.name} {(curr_task.completed - prev_task.completed).total_seconds():.2f}')
from datetime import date, timedelta
from hypothesis import given
from hypothesis.strategies import dictionaries, dates, text, integers

US_HOLIDAYS = {
    'independence day': date(2020,  7,  3),
    'thanksgiving':     date(2020, 11, 26),
    'christmas':        date(2020, 12, 25),
    "new year's eve":   date(2020, 12, 31),
}
def next_business_date(dt, n=1, *, holidays=US_HOLIDAYS):
    while True:
        while True:
            dt = dt + timedelta(days=1)
            if dt.weekday() not in {5, 6} and dt not in set(holidays.values()):
                break
        if n == 0:
            break
        n -= 1
    return dt

@given(
    holidays=dictionaries(keys=text(), values=dates(), min_size=1),
    day=dates(),
    n=integers(min_value=0, max_value=10),
)
def test_next_business_date(holidays, day, n):
    hol = [*holidays.values()][0]
    assert next_business_date(hol,  0) > hol
    assert next_business_date(day,  n) >= day

from pytest import main
if __name__ == '__main__':
    main(['-q', __file__])
from datetime import date, timedelta
from hypothesis import given
from hypothesis.strategies import dictionaries, dates, text, integers
from enum import Enum
from itertools import islice, tee, takewhile

nwise = lambda g, n=2: zip(*(islice(g, i, None) for i, g in enumerate(tee(g, n))))

US_HOLIDAYS = {
    'independence day': date(2020,  7,  3),
    'thanksgiving':     date(2020, 11, 26),
    'christmas':        date(2020, 12, 25),
    "new year's eve":   date(2020, 12, 31),
}

class Dir:
    nbd = timedelta(days=1)
    pbd = timedelta(days=-1)

def modify_business_date(dt, n=1, *, holidays=US_HOLIDAYS, direction=Dir.nbd):
    if n < 0:
        raise ValueError('n must be positive')
    while True:
        while True:
            dt = dt + direction
            if dt.weekday() not in {5, 6} and dt not in set(holidays.values()):
                break
        if n == 0:
            break
        n -= 1
    return dt

def datecount(refdate, *, direction=Dir.nbd):
    while True:
        yield refdate
        refdate += direction

def businessdays(refdate, *, direction=Dir.nbd, holidays=US_HOLIDAYS):
    return (d for d in datecount(refdate, direction=direction)
            if d.weekday() not in {5, 6} and d not in set(holidays.values()))

@given(
    refdate=dates(),
)
def test_datecount(refdate):
    dc = datecount(refdate)
    dates = [next(dc), next(dc)]
    assert dates[0] < dates[1]
    assert dates[0] == refdate
    assert (dates[1] - dates[0]).days == 1

@given(
    refdate=dates(),
    holidays=dictionaries(keys=text(), values=dates(), min_size=1),
)
def test_businessdays(refdate, holidays):
    for pd, cd in nwise(islice(businessdays(refdate, holidays=holidays), 1_000)):
        # no weekdays
        assert cd.weekday() not in {5, 6}
        # no holidays
        assert cd not in set(holidays.values())
        # should move monotonically forward
        assert pd < cd
        # dates in-between must be either holidays or weekdays
        for d in takewhile(lambda d: d < cd, islice(datecount(pd), 1, None)):
            assert d.weekday() in {5, 6} or d in set(holidays.values())

@given(
    holidays=dictionaries(keys=text(), values=dates(), min_size=1),
    day=dates(),
    n=integers(min_value=0, max_value=10),
)
def test_modify_business_date(holidays, day, n):
    hol = [*holidays.values()][0]
    assert modify_business_date(hol,  0) > hol
    assert modify_business_date(day,  n) >= day
    if day not in set(holidays.values()) and day.weekday() not in {5, 6}:
        assert modify_business_date(modify_business_date(day, 1), 1, direction=Dir.pbd) == day

from pytest import main
if __name__ == '__main__':
    main(['-q', __file__])
# i.
d1 = {1: 'one', 2: 'two'}
d2 = {          2: 'two', 3: 'three'}
d3 = {}
d3.update(d1)
d3.update(d2)

# ii.
d1 = {1: 'one', 2: 'two'}
d2 = {          2: 'two', 3: 'three'}
d3 = d1.copy()
d3.update(d2)

# iii.
from itertools import chain
d1 = {1: 'one', 2: 'two'}
d2 = {          2: 'two', 3: 'three'}
d3 = dict(chain(d1.items(), d2.items()))

# iv.
from itertools import chain
d1 = {1: 'one', 2: 'two'}
d2 = {          2: 'two', 3: 'three'}
d3 = {k: v for d in [d1, d2] for k, v in d.items()}

# v.
from collections import ChainMap
d1 = {1: 'one', 2: 'two'}
d2 = {          2: 'two', 3: 'three'}
d3 = dict(ChainMap(d1, d2))

# vi.
d1 = {1: 'one', 2: 'two'}
d2 = {          2: 'two', 3: 'three'}
d3 = {**d1, **d2}

# vii. Python ≥3.9
d1 = {1: 'one', 2: 'two'}
d2 = {          2: 'two', 3: 'three'}
d3 = d1 | d2
print(f'{d3 = }')

``` easy hard | care | —–|—— | don’t care | ``