seminars.fb

Workshop (Fri Oct 28): “Data Analysis”

   
Title “Data Analysis”
Topic hard problems in data analysis
Date Fri Oct 28
Time 1pm~3pm PST
Keywords pandas

Abstract

In this advanced workshop, we will share with you some of the most valuable and most complex problems problems we have had to solve in pandas as part of our work with the Hardware Insights team.

In this workshop, we’ll work through these problems together, to see how we can make very sophisticated analyses straightforward and obvious.

Notes

Question: delta → state; state → delta

print("Let's take a look!")

The below data set contains events related to devices that you manage. Devices follow a simple lifecycle:

provisioned → (deployed → in use) → (under maintenance → in use → …) → decommissioned

Note that a device isn’t guaranteed to be deployed or go into use, and that a device may or may not go under maintenance multiple times.

Answer:

from datetime import timedelta
from random import Random
from enum import Enum
from string import ascii_lowercase, digits

from pandas import DataFrame, to_datetime, concat
from numpy.random import default_rng

rng = default_rng(0)
rnd = Random(0)

State = Enum('State', 'Provisioned Deployed InUse Maintenance Decommissioned')

def simulate():
    td = lambda days: timedelta(seconds=rnd.randrange(24 * 60 * 60 * days))

    yield State.Provisioned, td(days=90)

    if rnd.choices([True, False], weights=[.90, .10])[0]:
        yield State.Deployed, td(days=30)

        if rnd.choices([True, False], weights=[.95, .05])[0]:
            for _ in range(max(0, int(rnd.gauss(mu=2, sigma=1)))):
                yield State.InUse, td(days=14)
                yield State.Maintenance, td(days=14)
            if rnd.choices([True, False], weights=[.95, .05])[0]:
                yield State.InUse, td(days=14)

        if rnd.choices([True, False], weights=[.50, .50])[0]:
            yield State.Decommissioned, td(days=30)

if __name__ == '__main__':
    devices = rng.choice([*ascii_lowercase, *digits], size=(10, 9))
    devices[:, 4] = '-'
    devices = devices.view('<U9').ravel()

    lifecycles = (
        DataFrame(simulate(), columns='state time'.split())
            .assign(device=d)
            .assign(state=lambda df: df['state'].apply(lambda x: x.name))
            .set_index(['device', 'state'])
            .cumsum()
            .pipe(lambda df: df + to_datetime('2020-01-01'))
            .squeeze(axis='columns')
        for d in devices
    )
    lifecycles = concat(lifecycles, axis='index')

    print(lifecycles)

    lifecycles.to_pickle('lifecycles.pkl')

Background & Theory

from pandas import read_pickle
from pandas import date_range

s = read_pickle('lifecycles.pkl')

states = {dev: [] for dev in s.index.get_level_values('device').unique()}

for day_num, dt in enumerate(date_range(s.min().floor('D'), s.max().floor('D'), freq='D')):
    for (dev, new_st), _ in s[lambda s: dt == s.dt.floor('D')].iteritems():
        states[dev].append((dt, new_st))
    if states[dev] and len(states[dev]) <= day_num:
        states[dev].append((dt, states[dev][-1][-1]))

for dev, sts in states.items():
    print(dev)
    for st in sts:
        print(st)
    break
from pandas import read_pickle
from pandas import date_range

s = read_pickle('lifecycles.pkl')
# print(
#     sep='\n',
# )

# for _, x in s.groupby([s.index.get_level_values('device'), s.dt.floor('D')]):
#     print(x.index.get_level_values('state')[-1])
#     # break
from pandas import Series, date_range

s = Series([1, 2, 3], index=date_range('2020-01-01', periods=3))

print(s)
from pandas import read_pickle, IndexSlice, Series, date_range, MultiIndex
from pandas import date_range, set_option

set_option('display.max_rows', None)

s = read_pickle('lifecycles.pkl')
# print(s)

end_of_day = (
    s
        .reset_index('state', drop=False)
        .assign(
            time=lambda df: df['time'].dt.floor('D')
        )
        .set_index('time', append=True)
        .sort_index()
        .groupby(['device', 'time'])
        .tail(1) # .head(1)
        .squeeze(axis='columns')
)

all_devices = end_of_day.index.get_level_values('device').unique()
all_days = date_range(
    end_of_day.index.get_level_values('time').min(),
    end_of_day.index.get_level_values('time').max(),
    freq='D',
)

states = Series(
    index=MultiIndex.from_product([all_devices, all_days], names=['device', 'time']),
    dtype=object,
    name='state',
)
states.loc[end_of_day.index] = end_of_day.values
states = states.ffill()


print(
    # end_of_day.loc[IndexSlice[:, '2020-04', :]]
    # end_of_day,
    # all_devices,
    # all_days,
    # every_device_every_day,
    # len(states.loc[end_of_day.index]),
    # end_of_day.values),
    # states.loc['29ny-x4yz'],
    # states.groupby('time').value_counts(),
    # states.index.names,
    # states.head(3),
    # states.groupby('time').value_counts().unstack('state').plot(),
    states.groupby('time').value_counts().plot(),
    sep='\n',
)
from matplotlib.pyplot import show
show()
from pandas import Series, date_range, DataFrame
from numpy.random import default_rng

rng = default_rng(0)

power = Series(
    data=rng.random(size=100),
    index=date_range('2020-01-01', periods=100),
)

temperature = Series(
    data=rng.random(size=95),
    index=date_range('2020-01-03', periods=95),
)
print(
    DataFrame({
        'power': power,
        'temperature': temperature,
    }).stack()
)
from pandas import Series, date_range

s1 = Series([1, 2, 3], index=date_range('2020-01-01', periods=3))
s2 = Series([40, 50, 60], index=date_range('2020-01-02', periods=3))

print(
    # s1,
    # s2,
    s1 + s2,
    sep='\n'
)

Answer

from pandas import read_pickle

s = read_pickle('lifecycles.pkl')
print(
    s.dt.floor('D').loc[lambda s: s == '2020-04-24']
)

Question: scenario analysis

print("Let's take a look!")

Given the data below, which represents trades done by a trading desk across multiple books, analyze what would happen (on a per book level) to the total market value and total profit under all the various combinations of the different scenarios:

from pandas import period_range, date_range, MultiIndex, Series, DataFrame, to_timedelta
from numpy import tile
from numpy.random import default_rng
from string import ascii_lowercase

rng = default_rng(0)

assets = ['abc', 'def', 'ghi', 'jkl', 'xyz']
dates = period_range('2020-01-01', periods=90, freq='D')

prices = Series(
    index=(idx := MultiIndex.from_product([dates, assets], names=['date', 'asset'])),
    data=(
        rng.normal(loc=100, scale=50, size=len(assets)).clip(10, 150)
      * rng.normal(loc=1, scale=.05, size=(len(dates), len(assets))).cumprod(axis=0)
    ).ravel()
).round(2)

books = ['Alice', 'Bob', 'Charlie', 'Dana', 'Evan']
dates = date_range('2020-01-01', periods=3)

trades = (
    DataFrame(
        index=(idx := MultiIndex.from_product(
            [tile(dates, 5), books, assets],
            names=['date', 'book', 'asset'],
        )),
        data={
            'volume': rng.integers(-100_000, +100_000, size=len(idx)).round(-2),
            'price': (
                prices.loc[
                    MultiIndex.from_arrays([
                        idx.get_level_values('date').to_period('D'),
                        idx.get_level_values('asset'),
                    ])
                ] * rng.normal(loc=1, scale=.1, size=len(idx)).clip(.75, 1.25)
            ).values,
        },
    )
    .loc[lambda df: abs(df['volume']) > 0]
    .sample(frac=.25, random_state=rng)
    .pipe(lambda df: df
        .set_axis(MultiIndex.from_arrays([
            df.index.get_level_values('book'),
            df.index.get_level_values('asset'),
            df.index.get_level_values('date') + to_timedelta(rng.integers(60 * 60, size=len(df)), unit='s'),
        ], names=['book', 'asset', 'date']))
    )
    .sort_index()
)

print(
    trades.head(3)
)

Background & Theory

Answer

Question: consecutive measurements

print("Let's take a look!")
from datetime import timedelta
from random import Random
from enum import Enum
from string import ascii_lowercase, digits

from pandas import DataFrame, to_datetime, concat
from numpy.random import default_rng

rng = default_rng(0)
rnd = Random(0)

State = Enum('State', 'Provisioned Deployed InUse Maintenance Decommissioned')

def simulate():
    td = lambda days: timedelta(seconds=rnd.randrange(24 * 60 * 60 * days))

    yield State.Provisioned, td(days=90)

    if rnd.choices([True, False], weights=[.90, .10])[0]:
        yield State.Deployed, td(days=30)

        if rnd.choices([True, False], weights=[.95, .05])[0]:
            for _ in range(max(0, int(rnd.gauss(mu=2, sigma=1)))):
                yield State.InUse, td(days=14)
                yield State.Maintenance, td(days=14)
            if rnd.choices([True, False], weights=[.95, .05])[0]:
                yield State.InUse, td(days=14)

        if rnd.choices([True, False], weights=[.50, .50])[0]:
            yield State.Decommissioned, td(days=30)

if __name__ == '__main__':
    devices = rng.choice([*ascii_lowercase, *digits], size=(10, 9))
    devices[:, 4] = '-'
    devices = devices.view('<U9').ravel()

    lifecycles = (
        DataFrame(simulate(), columns='state time'.split())
            .assign(device=d)
            .assign(state=lambda df: df['state'].apply(lambda x: x.name))
            .set_index(['device', 'state'])
            .cumsum()
            .pipe(lambda df: df + to_datetime('2020-01-01'))
            .squeeze(axis='columns')
        for d in devices
    )
    lifecycles = concat(lifecycles, axis='index')

    print(lifecycles)

    lifecycles.to_pickle('lifecycles.pkl')

Background & Theory

from pandas import read_pickle, date_range, IndexSlice, DataFrame, MultiIndex, Series

s = read_pickle('lifecycles.pkl')

end_of_day = (
    s
        .reset_index('state', drop=False)
        .assign(
            time=lambda df: df['time'].dt.floor('D')
        )
        .set_index('time', append=True)
        .sort_index()
        .groupby(['device', 'time'])
        .tail(1) # .head(1)
        .squeeze(axis='columns')
)

all_devices = end_of_day.index.get_level_values('device').unique()
all_days = date_range(
    end_of_day.index.get_level_values('time').min(),
    end_of_day.index.get_level_values('time').max(),
    freq='D',
)

states = Series(
    index=MultiIndex.from_product([all_devices, all_days], names=['device', 'time']),
    dtype=object,
    name='state',
)
states.loc[end_of_day.index] = end_of_day.values
states = states.ffill().dropna()

print(states)
from pandas import Series, date_range, MultiIndex
from numpy.random import default_rng
from string import ascii_lowercase

rng = default_rng(0)

entities = rng.choice([*ascii_lowercase], size=(5, 9))
entities[:, 4] = '-'
entities = entities.view('<U9').ravel()

s = Series(
    index=(idx := MultiIndex.from_product([
        entities,
        date_range('2020-01-01', periods=14)
    ], names=['entity', 'date'])),
    data=rng.integers(-10, +10, size=len(idx)),
    name='signal',
)

regions = (
    (s > 0)
        .pipe(
            lambda s: (s != s.shift()).groupby('entity').cumsum(),
        )
)
largest_region = regions.pipe(
    lambda s: s.groupby('entity').agg(
        lambda g: g.groupby(g).count().idxmax()
    )
)

print(
    regions
        .reset_index('date', drop=False)
        .set_index('signal', append=True)
        .loc[
            MultiIndex.from_arrays([
                largest_region.index.get_level_values('entity'),
                largest_region.values,
            ], names=['entity', 'signal'])
        ]
        .droplevel('signal')
        .set_index('date', append=True)
        .pipe(
            lambda idx: s.loc[idx.index]
        )
    ,
    # s,
    # True + False,
    # True == 1,
    # False == 0,



    # .loc['chmk-aada'],

    # s.loc['chmk-aada'],

    # s.groupby('entity').max(),
    # s.groupby('entity').agg(
    #     lambda g: g.groupby(g > 0).count().max()
    # ),
    # s.loc['wqnh-bbae'],
    # s.groupby('entity').transform(
    #     lambda g: (g > 0).rolling(3).agg(lambda x: x.all())
    # ),

    # s.loc['chmk-aada'],
    # s.loc['wqnh-bbae'],
    # s.groupby(),
    # s.groupby(s == 2).count(),

    # aggregations

    # s.groupby('entity').mean(),
    # s.groupby('date').mean(),
    # s.groupby(['entity', s.index.get_level_values('date').to_period('M')]).mean(),

    # s.groupby('entity').agg(
    #     lambda g: g.loc[g>0].sum()
    # ),

    # transformations
    # s.groupby('entity').cumsum(),

    # s.groupby('entity').transform(
    #     lambda g: g.rolling(3).mean()
    # ),

    # arbitrary (“applications”)

    sep='\n',
)

Answer

Question: rolling operations with small windows

print("Let's take a look!")

The below data represents website telemetry data. Who were the top-10 users, measured by number of days on the website? What was the average time these users spent on each page (assuming that any sequence of page views <1h in duration constitute a single “visit” or “session”)?

from pandas import date_range, MultiIndex, to_datetime, to_timedelta, DataFrame, CategoricalDtype, Categorical
from numpy.random import default_rng
from numpy import unique, repeat, arange

rng = default_rng(0)

ips = [f'{a}.{b}.{c}.{d}' for a, b, c, d in unique(rng.integers(256, size=(100_000, 4)), axis=0)]
pages = CategoricalDtype(['index.html', 'about.html', 'contact.html'])

df = DataFrame(
    index=(idx := rng.choice(ips, size=10_000_000)),
    data={
        'date':
            to_datetime('2020-01-01')
          + to_timedelta(rng.integers(90 * 24 * 60 * 60, size=len(idx)), unit='s'),
        'page': Categorical.from_codes(
            rng.choice(arange(len(pages.categories)), size=len(idx)),
            dtype=pages,
        ),
    },
).sample(frac=.25, random_state=rng).sort_index()

print(df)

Background & Theory

Answer

Predicates and pandas

The below data set represents temperature and preciptation measurements for some regions.

Use it to answer the following questions:

from pandas import DataFrame, period_range, MultiIndex
from numpy.random import default_rng
from scipy.stats import skewnorm

rng = default_rng(0)

dates = period_range('2020-01-01', periods=90, freq='D')
regions = ['NY', 'CA', 'FL', 'TX']

df = DataFrame(
    index=(idx := MultiIndex.from_product([dates, regions], names=['date', 'region'])),
    data={
        'humidity':      rng.uniform(0, 1, size=len(idx)),
        'precipitation': skewnorm(a=4).rvs(size=len(idx), random_state=rng).clip(0),
        'temperature':   rng.normal(loc=75, scale=15, size=len(idx)),
    },
).round(2).sort_index()

print(df)

Background & Theory

Answer

Question: multi-column grouped operations

print("Let's take a look!")

Using the below trading data, compute the weighted average buy and sell traded price of each asset using the trade volumes as weights.

from numpy.random import default_rng
from pandas import period_range, MultiIndex, Series, date_range, DataFrame
from numpy import tile
from string import ascii_lowercase

rng = default_rng(0)

portfolios = ['Alice', 'Bob', 'Charlie']
assets = rng.choice([*ascii_lowercase], size=(4, 4)).view('<U4').ravel()
dates = date_range('2020-01', periods=90)

idx = MultiIndex.from_product([
    dates,
    assets,
], names=['date', 'asset'])

prices = DataFrame(
    index=idx,
    data={
        'bid': (bid := (
            rng.normal(loc=100, scale=20, size=len(assets))
            * rng.normal(loc=1, scale=.05, size=(len(dates), len(assets))).cumprod(axis=0)
        ).ravel()),
        'ask': bid * rng.normal(.95, scale=.02, size=len(bid)).clip(0, .995),
    },
).round(2).rename_axis('direction', axis='columns')

idx = MultiIndex.from_product([
    date_range('2020-01-01', periods=90),
    portfolios,
    tile(assets, 5)
], names=['date', 'portfolio', 'asset'])

trades = DataFrame({
    'volume': (volumes := rng.integers(-1_000_000, +1_000_000, size=len(idx)).round(-2)),
    'price':
        (
            (prices['bid'] - prices['ask']).loc[
                px_idx := MultiIndex.from_arrays([
                    idx.get_level_values('date').floor('D'),
                    idx.get_level_values('asset'),
                ])
            ].values * rng.normal(loc=1, scale=.20, size=len(idx))
        ).round(2)
        + prices.loc[px_idx, 'ask'].values
        ,
}, index=idx).sample(random_state=rng, frac=.25).sort_index()

print(
    trades.head(),
    sep='\n{}\n'.format('\N{box drawings light horizontal}' * 40),
)

Background & Theory

Answer

Questions: best practices with pandas

print("Let's take a look!")

The following data represents some measurements you have taken in the field. Given the below data, in one chained operation:

  1. set the index to entity and date
  2. filter down to entries for February only
  3. add a new column with the absolute value of the measurement
  4. group the absolute values on the intervals [0, 2), [2, 5), [5, 10] and find their mean
from pandas import DataFrame, date_range, to_datetime, to_timedelta
from numpy.random import default_rng

rng = default_rng(0)

df = DataFrame({
    'date': to_datetime('2020-01-01')
          + to_timedelta(rng.integers(90 * 24 * 60 * 60, size=(sz := 100)), unit='s'),
    'entity': rng.choice(['abc', 'def', 'xyz'], size=sz),
    'value': rng.integers(-10, +10, size=sz),
})

print(df)

Background & Theory

Answer

Question: pandas pitfalls

print("Let's take a look!")

Why does pandas.SettingWithCopyWarning come up? How do we avoid it?

The following data represents some measurements you have taken in the field. Take the below data, and for every week in February, take the total number of flags that occurred in that week and subtract it from the negative values or add it to the positive values.

from pandas import DataFrame, date_range, to_datetime, to_timedelta, MultiIndex
from numpy.random import default_rng

rng = default_rng(0)

dates = to_datetime('2020-01-01') + to_timedelta(rng.integers(90 * 24 * 60 * 60, size=(sz := 100)), unit='s')
entities = rng.choice(['abc', 'def', 'xyz'], size=sz)

idx = MultiIndex.from_product([dates, entities], names=['date', 'entity'])
df = DataFrame({
    'flag': rng.choice([True, False], size=len(idx)),
    'value': rng.integers(-10, +10, size=len(idx)),
}, index=idx)

print(df)

Background & Theory

Answer

Question: distance between signals

Below is code for a simple genetic algorithm with the following mechanics:

In this simulation, we:

Using the simulation code below, identify how tools like numpy, pandas, xarray, and features in pure Python “fit together.”

from numpy import arange
from numpy.random import default_rng
from dataclasses import dataclass, replace
from matplotlib.pyplot import plot, legend, show
from pandas import DataFrame
from xarray import DataArray
from itertools import islice

@dataclass
class Step:
    population : DataArray
    rng : object

    @classmethod
    def from_random(cls, size, length, *, rng=None):
        if rng is None:
            rng = default_rng(0)
        population = DataArray(
            rng.integers(0, 2, size=(size, length)),
            dims=['size', 'length'],
        )
        return cls(population, rng=rng)

    @property
    def fitness(self):
        return self.population.mean().item()

    def simulate(self, *, mutation_rate=1e-4):
        rng = self.rng
        population = self.population
        size, length = population.sizes['size'], population.sizes['length']

        yield self

        while True:
            p = population.mean(dim='length')
            p /= p.sum()

            # TODO: choose only non-same mating pairs
            parents = rng.choice(arange(size), size=(size, 2), p=p)
            left, right = population[parents[:, 0]], population[parents[:, 1]]

            xover_mask = rng.integers(0, 2, size=(size, length), dtype=bool)
            xover_mask.sort(axis=1)

            mutation_mask = rng.choice(
                [True, False], p=(mutation_rate, 1 - mutation_rate),
                size=(size, length)
            )

            population = ((left * xover_mask) + (right * ~xover_mask)) ^ mutation_mask
            yield replace(self, population=population)

if __name__ == '__main__':
    scenarios = {
        length: Step.from_random(size=(SIZE := 250), length=length).simulate()
        for length in {5, 10, 25, 50, 100}
    }

    results = DataFrame({
        sc: [st.fitness for st in islice(sim, 150)]
        for sc, sim in sorted(scenarios.items())
    })
    results.columns.name, results.index.name = 'length', 'step'

    print(' {} '.format('Results').center(40, '\N{box drawings light horizontal}'))
    print(results)

Further Explanation

In this genetic algorithm, we “breed” two chromosomes to produce an offspring chromosome.

Over time, we would expect repeated breeding to produce “fitter” and “fitter” offspring, but this will require that the choice of mates—the parents of each generation—are selected by their fitness.

Our fitness function is the number of ones in the chromosome. [0, 0, 0, 0, 0] has fitness of zero (or, as a per centage: 0%); [1, 1, 1, 1, 1] has a perfect fitness of five (or, as a per centage: 100%.)

“Fitness-proportional selection” means: at each stage when breeding the next generation, the parents should be selected randomly with weighting given to parents that score higher in our fitness function.

from random import choices
population = [
    [0, 0, 0, 0, 0], # fitness  0%: no     probability of being selected for mating
    [0, 0, 0, 0, 1], # fitness 20%: low    probability of being selected for mating
    [1, 1, 1, 0, 0], # fitness 60%: medium probability of being selected for mating
    [0, 1, 1, 1, 0], # fitness 60%: medium probability of being selected for mating
    [1, 1, 1, 0, 1], # fitness 80%: high   probability of being selected for mating
]

fitnesses = [sum(chromo)/len(chromo) for chromo in population]
total_fitness = sum(fitnesses)
weights = [fit/total_fitness for fit in fitnesses]
print(f'{choices(population, weights=weights, k=2) = }')

The overall fitness of the population can further grow via some randomisation factors. We will implement two:

When breeding two parents x and y to create a child z, we construct the child by lining up x and y and finding a random point at which to stitch the two together. This is what we mean by “one-point crossover.”

# the parents...
parents = x, y = \
    [0, 0,    1, 0, 1], \
    [1, 1,    1, 1, 0],
#          ##
#          stitch here, taking the left of x and right of y
#                    or taking the right of x and the left of y

# the child...
child = z = [0, 0] + [1, 1, 0] # possibility I:  left-of-x  + right-of-y
child = z = [1, 1] + [1, 0, 1] # possibility II: right-of-x + left-of-y

Once you have performed crossover to construct the child, you can further add in randomness by performing random, single-bit flip mutations. This is what we mean by “bit-flip mutation”: randomly select bits and flip them.

from random import choices

child = [0, 0, 1, 1, 0]
print(f'Before: {child = }')
child = [bit ^ choices([True, False], weights=((MUTATION_RATE := .5), 1 - MUTATION_RATE))[0]
         for bit in child]
print(f'After:  {child = }')

Background & Theory

Answer

interval notation

print("Let's take a look")

Given the below time series measurements, select all the values on the following intervals:

In the above interval notation, a [ or ] means “include this endpoint” and a ( or ) means “do not include this endpoint.”

from pandas import Series, to_datetime, to_timedelta
from numpy.random import default_rng

rng = default_rng(0)

s = Series(
    index=(idx :=
        to_datetime('2020-01-01')
      + to_timedelta(rng.normal(loc=5, scale=2, size=100_000).clip(0).round().cumsum(), unit='s')
    ),
    data=rng.choice([True, False], size=len(idx)),
).sort_index()

print(
    s,
    sep='\n{}\n'.format('\N{box drawings light horizontal}' * 40)
)

Background & Theory

.loc vs .iloc

Implicitly hierarhical indices…

pandas.cut

Answer

cross sections

print("Let's take a look")

Given the below time series measurements, align them so that they match on the nearest time entry. Try rounding the time entries to the nearest day, and matching. (In the case of multiple entries per day, either match every against every or match against a grouped or rolling aggregate.)

from pandas import Series, to_datetime, to_timedelta, MultiIndex
from numpy.random import default_rng
from string import ascii_lowercase

rng = default_rng(0)

idx = MultiIndex.from_arrays([
    to_datetime('2020-01-01') + to_timedelta(rng.integers(24*60*60, size=100).cumsum(), unit='s'),
    rng.choice([*ascii_lowercase[:5]], size=100),
])
s1 = Series(rng.normal(size=len(idx)).round(2), index=idx)

idx = MultiIndex.from_arrays([
    to_datetime('2020-01-01') + to_timedelta(rng.integers(24*60*60, size=100).cumsum(), unit='s'),
    rng.choice([*ascii_lowercase[:5]], size=100),
])
s2 = Series(rng.integers(-10, +10, size=len(idx)), index=idx)

print(
    s1.head(2),
    s2.head(2),
    sep='\n{}\n'.format('\N{box drawings light horizontal}' * 40)
)

Background & Theory

Answer