Title | “The Standard Library” |
Topic | a quick tour of the Python standard library |
Date | Fri Oct 14 |
Time | 10am~11am PST |
Keywords | Python, the standard library |
These sessions are designed for a broad audience of non-software engineers and software programmers of all backgrounds and skill-levels.
Our expected audience should comprise attendees with a…
During this session, we will endeavor to guide our audience to developing…
In previous seminars, we’ve made thorough use of the Python standard library, and we’ve talked about what it takes to develop greater fluency and greater capabilities with Python.
In this seminar, we’ll take a closer look at the Python standard library. We’ll present a motivation for coming to terms with the standard library’s scope and size. We’ll present a perspective on how to understand where and when to use the standard library, rather than writing your own approach or introducing a third-party dependency. We’ll also tour the most interesting and useful parts of the standard library—parts which you could be using right now to improve your work!
Did you enjoy this seminar? Did you learn something new that will help you as you as you write your own pandas analyses?
In a future seminar, we can do a deeper dive into data structures and algorithms that can be applied to solving problems in your work. We can also tour major third party libraries that can bring immediate benefit to your code, and highlight tools that you might not be aware of.
If you’re interested in any of these topics, please let us know! Send us an e-mail at learning@dutc.io or contact us over Workplace with your feedback!
print("Let's get started!")
What’s the deal with the “standard library”?
The Python Standard Library serves two purposes…
Guaranteed dependencies…
from graphlib import TopologicalSorter
graph = {
'b': {'c'},
'c': {'d'},
'd': {'e'},
}
ts = TopologicalSorter(graph)
ts.add('a', 'b', 'c')
print(f'{[*ts.static_order()] = }')
from networkx import DiGraph
from networkx.algorithms.dag import topological_sort
g = DiGraph()
g.add_edge('a', 'b')
g.add_edge('b', 'c')
g.add_edge('c', 'd')
g.add_edge('d', 'e')
print(f"{[*topological_sort(g)] = }")
First-order approximations…
from statistics import mean, median, pvariance, variance
xs = [1, 2, 3, 4]
print(
f'{mean(xs) = :>5.2f}',
f'{median(xs) = :>5.2f}',
f'{pvariance(xs) = :>5.2f}',
f'{variance(xs) = :>5.2f}',
sep='\n',
)
from numpy import array, median
from scipy.stats import skew, kurtosis
xs = array([1, 2, 3, 4])
print(
f'{xs.mean() = :>5.2f}',
f'{median(xs) = :>5.2f}',
f'{xs.var() = :>5.2f}',
f'{xs.var(ddof=1) = :>5.2f}',
sep='\n',
)
collections
import collections
print("Let's take a look!")
models = {
'Samsung': {'abc-123', 'def-456'},
'Cisco': {'ghi-789'},
'Infinera': {'jkl-135'},
'Ciena': {'mno-246', 'pqr-000'},
}
# for x in models['Samsung'] | models['Cisco']:
# print(f'{x = }')
for x in models['Samsung'] | models['Cisco'] | models['Huawei']:
print(f'{x = }')
from collections import defaultdict
models = defaultdict(set, {
'Samsung': {'abc-123', 'def-456'},
'Cisco': {'ghi-789'},
'Infinera': {'jkl-135'},
'Ciena': {'mno-246', 'pqr-000'},
})
print(f'{models["Huawei"] = }')
for x in models['Samsung'] | models['Cisco'] | models['Huawei']:
print(f'{x = }')
from collections import defaultdict
devices = defaultdict(int, {
'Samsung': 5_832,
'Cisco': 4_830,
'Infinera': 6_937,
'Ciena': 5_938,
})
print(
f'{devices["Samsung"] + devices["Huawei"] = :,}'
)
lab_devices = defaultdict(int, {
'Samsung': 632,
'Ciena': 1_234,
})
print(
f'{lab_devices["Samsung"] + lab_devices["Huawei"] + devices["Samsung"] + devices["Huawei"] = :,}',
sep='\n',
end='\n{}\n'.format('\N{box drawings light horizontal}' * 40),
)
print(
f'{max(devices.items(), key=lambda kv: kv[-1]) = }',
f'{max({k: lab_devices[k] + devices[k] for k in devices.keys() | lab_devices.keys()}.items(), key=lambda kv: kv[-1]) = }',
sep='\n',
end='\n{}\n'.format('\N{box drawings light horizontal}' * 40),
)
from collections import Counter
devices = Counter({
'Samsung': 5_832,
'Cisco': 4_830,
'Infinera': 6_937,
'Ciena': 5_938,
})
lab_devices = Counter({
'Samsung': 632,
'Ciena': 1234,
})
all_devices = devices + lab_devices
print(
f'{lab_devices["Huawei"] = :,}',
f'{all_devices["Samsung"] + all_devices["Huawei"] = :,}',
sep='\n',
end='\n{}\n'.format('\N{box drawings light horizontal}' * 40),
)
print(
f'{devices.most_common(1) = }',
f'{all_devices.most_common(1) = }',
sep='\n',
end='\n{}\n'.format('\N{box drawings light horizontal}' * 40),
)
hosts = [
'abc-123.corp.net',
'def-456.corp.net',
]
hosts.append(
'xyz-789.corp.net',
)
while hosts:
print(f'{hosts.pop() = }')
from collections import deque
hosts = deque([
'abc-123.corp.net',
'def-456.corp.net',
])
hosts.appendleft(
'xyz-789.corp.net',
)
while hosts:
# print(f'{hosts.pop() = }')
print(f'{hosts.popleft() = }')
from collections import deque
hosts = deque([
'abc-123.corp.net',
'def-456.corp.net',
], maxlen=2)
hosts.append(
'xyz-789.corp.net',
)
print(f'{hosts = }')
base_params = {
'timing': 20,
'fan': 150,
'optics': {'amplitude': ...},
}
new_model_params = {
'timing': 25,
'fan': 100,
'optics': {'amplitude': ...},
}
base_params = {
'timing': 2000,
'fan': 150,
'optics': {'amplitude': ...},
}
new_model_params = {
'timing': 25,
}
timing = new_model_params['timing'] if 'timing' in new_model_params else base_params['timing']
print(f'{timing = }')
from collections import ChainMap
base_params = {
'timing': 20,
'fan': 100,
'optics': {'amplitude': ...},
}
new_model_params = {
'timing': 25,
'optics': {'wavelength': ...},
}
params = ChainMap(new_model_params, base_params)
print(f'{params["timing"] = }')
print(f'{params["optics"] = }')
calibration_params = {
'abc': 123,
'def': 456,
'xyz': 789,
}
for param, value in calibration_params.items():
print(f'set {param = } to {value = }')
calibration_params = {
'Cisco': {
'abc': 123,
'def': 456,
'xyz': 789,
},
'Ciena': {
'abc': 123,
'xyz': 789,
'def': 456,
},
}
print(f'{calibration_params["Cisco"] == calibration_params["Ciena"] = }')
from collections import OrderedDict
calibration_params = {
'Cisco': OrderedDict({
'abc': 123,
'def': 456,
'xyz': 789,
}),
'Ciena': OrderedDict({
'xyz': 789,
'def': 456,
'abc': 123,
}),
}
print(f'{calibration_params["Cisco"] == calibration_params["Ciena"] = }')
from collections import namedtuple
...
class cidict(dict):
def __missing__(self, key):
return self[key.casefold()]
d = cidict({'abc': 123, 'xyz': 456})
d['XYZ'] = 789
print(
f'{d["abc"] = }',
f'{d["ABC"] = }',
f'{d["xyz"] = }',
f'{d = }',
sep='\n',
end='\n{}\n'.format('\N{box drawings light horizontal}' * 40),
)
from collections.abc import MutableMapping
from dataclasses import dataclass
@dataclass
class cidict(MutableMapping):
data : dict
def __iter__(self):
return iter(self.data)
def __len__(self):
return len(self.data)
def __getitem__(self, key):
return self.data[key.casefold()]
def __setitem__(self, key, value):
self.data[key.casefold()] = value
def __delitem__(self, key):
del self.data[key.casefold()]
d = cidict({'abc': 123, 'xyz': 456})
d['XYZ'] = 789
print(
f'{d["abc"] = }',
f'{d["ABC"] = }',
f'{d["xyz"] = }',
f'{d = }',
sep='\n',
end='\n{}\n'.format('\N{box drawings light horizontal}' * 40),
)
pathlib
& tempfile
import pathlib
import tempfile
print("Let's take a look!")
vendor_params = 'data/params.dat'
with open(vendor_params) as f:
pass
processed_params = 'data/params.json'
with open(processed_params, 'w') as f:
pass
vendor_params = 'data/params-38273.dat'
processed_params = 'data/params-38273.json'
vendor_params = 'data/params-38273.dat'
# processed_params = vendor_params.replace('dat', 'json')
# processed_params = vendor_params.replace('.dat', '.json')
print(
f'{vendor_params = }',
f'{processed_params = }',
sep='\n',
end='\n{}\n'.format('\N{box drawings light horizontal}' * 40),
)
from pathlib import Path
vendor_params = Path('data/params-38273.dat')
processed_params = vendor_params.with_suffix('.json')
print(
f'{vendor_params.parent = }',
f'{vendor_params.name = }',
f'{vendor_params.suffix = }',
f'{vendor_params.stem = }',
f'{processed_params = }',
sep='\n',
end='\n{}\n'.format('\N{box drawings light horizontal}' * 40),
)
from pathlib import Path
data_dir = Path('/tmp/data/vendor/params')
data_dir.mkdir(exist_ok=True, parents=True)
paths = [
data_dir.parent / 'a',
data_dir.parent / 'b',
data_dir.parent / 'c',
]
# for p in paths:
# p.touch(exist_ok=True)
# for p in paths:
# print(f'{p.stat() = }')
for p in data_dir.parent.iterdir():
if p.is_file():
print(f'{p = }')
from tempfile import TemporaryDirectory
from pathlib import Path
with TemporaryDirectory() as d:
d = Path(d)
print(f'{d = }')
from tempfile import TemporaryDirectory, TemporaryFile
from pathlib import Path
with TemporaryDirectory() as d:
d = Path(d)
print(f'{d = }')
with TemporaryFile(mode='wt', dir=d) as f:
print(f'{f = }')
from tempfile import TemporaryDirectory, NamedTemporaryFile
from pathlib import Path
with TemporaryDirectory() as d:
d = Path(d)
print(f'{d = }')
with NamedTemporaryFile(mode='wt', dir=d, delete=False) as f:
print(f'{f = }')
print(f'{f.name = }')
from pathlib import Path
class MyPath(type(Path())):
pass
path = MyPath('data/Samsung.2020-01-03.output.csv')
print(
f'{path.parent = }',
f'{path.name = }',
f'{path.suffix = }',
f'{path.stem = }',
sep='\n',
end='\n{}\n'.format('\N{box drawings light horizontal}' * 40),
)
from pathlib import Path
from datetime import datetime
from re import compile as re_compile
class MyPath(type(Path())):
STEM_RE = re_compile(
r'\.'.join([
r'(?P<vendor>[^.]+)',
r'(?P<date>\d{4}-\d{2}-\d{2})',
r'(?P<type>(?:input|output))',
])
)
@property
def date(self):
dt = self.STEM_RE.fullmatch(self.stem).group('date')
return datetime.strptime(dt, '%Y-%m-%d')
@property
def vendor(self):
return self.STEM_RE.fullmatch(self.stem).group('vendor')
path = MyPath('data/Samsung.2020-01-03.output.csv')
print(
f'{path.parent = }',
f'{path.name = }',
f'{path.suffix = }',
f'{path.stem = }',
f'{path.date = }',
f'{path.vendor = }',
sep='\n',
end='\n{}\n'.format('\N{box drawings light horizontal}' * 40),
)
Context Managers
from tempfile import TemporaryDirectory, NamedTemporaryFile
from pathlib import Path
from contextlib import contextmanager
from sqlite3 import connect
from collections import namedtuple
@contextmanager
def temporary_database():
with TemporaryDirectory() as d:
d = Path(d)
with NamedTemporaryFile(dir=d, suffix='.db') as f:
with connect(f.name) as conn:
conn.row_factory = lambda cur, row: namedtuple('Row', [name for name, *_ in cur.description])(*row)
yield conn
with temporary_database() as conn:
print(f'{[*conn.execute("select 1 as test")] = }')
from tempfile import TemporaryDirectory, NamedTemporaryFile
from pathlib import Path
from contextlib import contextmanager
from sqlite3 import connect
from collections import namedtuple
from random import Random
from string import ascii_lowercase
@contextmanager
def temporary_database():
with TemporaryDirectory() as d:
d = Path(d)
with NamedTemporaryFile(dir=d, suffix='.db') as f:
with connect(f.name) as conn:
conn.row_factory = lambda cur, row: namedtuple('Row', [name for name, *_ in cur.description])(*row)
yield conn
@contextmanager
def sample_data(conn, name, *, rnd=None):
class Row(namedtuple('Row', 'name value')):
@classmethod
def from_random(cls, rnd=None):
rnd = rnd if rnd is not None else Random()
name = ''.join(rnd.choice(ascii_lowercase) for _ in range(4))
value = rnd.randint(-100, +100)
return cls(name=name, value=value)
data = [Row.from_random(rnd=rnd) for _ in range(10)]
if not name.isalpha(): raise ValueError('bad table name')
try:
conn.execute(f'create table {name} (name text, value number)')
conn.executemany(f'insert into {name} (name, value) values (?, ?)', data)
yield
finally:
conn.execute(f'drop table {name}')
with temporary_database() as conn:
with sample_data(conn, 'test'):
for row in conn.execute('select * from test'):
print(f'{row = }')
print(f'{[*conn.execute("select sum(value) as total from test")] = }')
itertools
import itertools
print("Let's take a look!")
hosts = ['abc-123', 'def-456', 'xyz-789']
online = [True, False, True]
for hst, onl in zip(hosts, online):
if not onl:
print(f'{hst} is down.')
measurements = [12, 42, 56, 23, 43, 59]
heartbeats = [True, False, False, True, True, True]
print(f'{sum(heartbeats) = }')
measurements = [12, 42, 56, 23, 43, 59]
heartbeats = [True, False, False, True, True, True]
for msm, hb in zip(measurements, heartbeats):
if not hb:
print(f'{msm = } should be ignored')
good_measurements = [msm for msm, hb in zip(measurements, heartbeats) if hb]
print(f'{good_measurements = }')
measurements = [12, 42, 56, 23, 43, 59]
heartbeats = [True, False, False, True, True, True]
for idx, hb in enumerate(heartbeats):
if not hb and idx > 0:
measurements[idx-1] *= .5
measurements[idx] *= .75
measurements[idx+1] *= .5
print(f'{measurements = }')
hosts = ['abc-123', 'def-456', 'xyz-789']
for pri in hosts:
for sec in hosts:
print(f'{pri = } → {sec = }')
hosts = ['abc-123', 'def-456', 'xyz-789']
for pri in hosts:
for sec in hosts:
if pri != sec:
print(f'{pri = } → {sec = }')
from itertools import permutations
hosts = ['abc-123', 'def-456', 'xyz-789']
for pri, sec in permutations(hosts, r=2):
print(f'{pri = } → {sec = }')
from itertools import combinations
hosts = ['abc-123', 'def-456', 'xyz-789']
for lft, rgt in combinations(hosts, r=2):
print(f'{lft = } → {rgt = }')
from itertools import product
hosts = ['abc-123', 'def-456', 'xyz-789']
for lft, rgt in product(hosts, repeat=2):
print(f'{lft = } → {rgt = }')
import itertools
print(dir(itertools))
from itertools import islice, tee
nwise = lambda g, *, n=2: zip(*(islice(g, i, None) for i, g in enumerate(tee(g, n))))
hosts = ['abc-123', 'def-456', 'ghi-135', 'xyz-789']
for srv0, srv1 in nwise(hosts):
print(f'{srv0 = } → {srv1 = }')
from itertools import islice, tee, repeat, chain
nwise = lambda g, *, n=2: zip(*(islice(g, i, None) for i, g in enumerate(tee(g, n))))
first = lambda g, *, n=1: zip(chain(repeat(True, n), repeat(False)), g)
hosts = ['abc-123', 'def-456', 'ghi-135', 'xyz-789']
for is_top, (srv0, srv1) in first(nwise(hosts)):
if is_top:
print(f'Top: {srv0 = }')
print(f'{srv0 = } → {srv1 = }')
from itertools import islice, tee, repeat, chain, zip_longest
nwise = lambda g, *, n=2: zip(*(islice(g, i, None) for i, g in enumerate(tee(g, n))))
nwise_longest = lambda g, *, n=2, fv=object(): zip_longest(*(islice(g, i, None) for i, g in enumerate(tee(g, n))), fillvalue=fv)
first = lambda g, *, n=1: zip(chain(repeat(True, n), repeat(False)), g)
last = lambda g, *, m=1, s=object(): ((y[-1] is s, x) for x, *y in nwise_longest(g, n=m+1, fv=s))
hosts = ['abc-123', 'def-456', 'ghi-135', 'xyz-789']
for is_top, (is_bottom, (srv0, srv1)) in first(last(nwise(hosts))):
if is_top:
print(f'Top: {srv0 = }')
print(f'{srv0 = } → {srv1 = }')
if is_bottom:
print(f'Bottom: {srv1 = }')
Generators, generator expressions, “iteration helpers.”
events = [
('overvolt', 'undervolt'),
('high-fan', 'low-fan'),
]
from itertools import product, chain, combinations
events = [
('overvolt', 'undervolt'),
('high-fan', 'low-fan'),
]
scenarios = {frozenset(evs) for evs in
chain.from_iterable(
product(*cat) for cat in
chain.from_iterable(
combinations(events, r=r)
for r in range(0, len(events)+1)
)
)
}
scenarios = scenarios - {frozenset({'low-fan'})}
for sc in sorted(scenarios, key=len):
print(f'{sc = }')
dataclasses
& enum
import dataclasses
import enum
print("Let's take a look!")
events = [
('cpu', 'Intel', [
('overvolt', +50),
('undervolt', -50),
]),
('bmc', 'ARM', [
('overvolt', +75),
('undervolt', -75),
]),
('cpu fan', [('high', 3000) ('low', 1500)]),
('gpu fan', [('high', 3000) ('low', 1500)]),
]
from collections import namedtuple
CpuEvent = namedtuple('CpuEvent', 'chipset voltage')
BmcEvent = namedtuple('BmcEvent', 'chipset voltage extra')
events = [
CpuEvent('Intel', +50),
CpuEvent('Intel', -50),
BmcEvent('ARM', +75, 1),
BmcEvent('ARM', -75, 1),
]
for ev in events:
print(f'{ev.chipset = :>5}\t{ev.voltage = :>3}')
from enum import Enum
from collections import namedtuple
Chipset = Enum('Chipset', 'Intel ARM')
CpuEvent = namedtuple('CpuEvent', 'chipset voltage')
BmcEvent = namedtuple('BmcEvent', 'chipset voltage')
events = [
CpuEvent(Chipset.Intel, +50),
CpuEvent(Chipset.Intel, -50),
BmcEvent(Chipset.ARM, +75),
BmcEvent(Chipset.ARM, -75),
]
for ev in events:
print(f'{ev.chipset = :>5}\t{ev.voltage = :>3}')
print(f'{[*Chipset] = }')
from dataclasses import dataclass
from enum import Enum
from functools import total_ordering
from itertools import product, chain, combinations
@total_ordering
class OrderedEnum(Enum):
def __eq__(self, other):
return isinstance(other, Enum) and self.value == other.value
def __lt__(self, other):
return self.value < other.value if isinstance(other, Enum) else False
def __hash__(self):
return hash(self.value)
Chipset = Enum('Chipset', 'Intel ARM', type=OrderedEnum)
@dataclass(unsafe_hash=True)
class CpuEvent:
chipset : Chipset
voltage : int
def __call__(self):
print(f'{type(self).__name__}: setting {self.voltage = }')
@dataclass(unsafe_hash=True)
class BmcEvent:
chipset : Chipset
voltage : int
def __call__(self):
print(f'{type(self).__name__}: setting {self.voltage = }')
@dataclass(unsafe_hash=True)
class FanEvent:
speed : int
def __call__(self):
print(f'{type(self).__name__}: setting {self.speed = }')
class GpuFanEvent(FanEvent): pass
class CpuFanEvent(FanEvent): pass
events = [
frozenset({
CpuEvent(Chipset.Intel, +50),
CpuEvent(Chipset.Intel, -50),
}),
frozenset({
BmcEvent(Chipset.ARM, +75),
BmcEvent(Chipset.ARM, -75),
}),
frozenset({
GpuFanEvent(3000),
GpuFanEvent(1500),
}),
frozenset({
CpuFanEvent(3000),
CpuFanEvent(1500),
}),
]
scenarios = {frozenset(evs) for evs in
chain.from_iterable(
product(*cat) for cat in
chain.from_iterable(
combinations(events, r=r)
for r in range(0, len(events)+1)
)
)
}
# for sc in sorted(scenarios, key=len):
# print(f'{sc = }')
from contextlib import contextmanager
@contextmanager
def scenario_settings():
try:
print('reboot device')
yield
finally:
print('poweroff device')
for sc in sorted(scenarios, key=len):
with scenario_settings():
for ev in sc:
ev()
print()
Object orientation!
Context managers.
with open('test-file', 'w') as f:
...
from sqlite3 import connect
from contextlib import closing
with connect(':memory:') as conn:
with closing(conn.cursor()) as cur:
...
from contextlib import contextmanager
@contextmanager
def g():
print('setup')
yield
print('teardown')
with g():
print('in-between')