Skip to content

Basic Ci/CD and fix code style #79

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Nov 25, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
name: ci

on: [push]

jobs:
lint:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.8", "3.9", "3.10"]

steps:
- uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v3
with:
python-version: ${{ matrix.python-version }}
- name: Run lint
run: |
make init
make check_lint
test:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.8", "3.9", "3.10"]

steps:
- uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v3
with:
python-version: ${{ matrix.python-version }}
- name: Run tests
run: |
make init
make test
51 changes: 28 additions & 23 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,26 +1,31 @@
# See https://pre-commit.com for more information
# See https://pre-commit.com/hooks.html for more hooks
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.3.0
hooks:
- id: trailing-whitespace
exclude_types: [svg]
- id: end-of-file-fixer
exclude_types: [svg]
- id: check-yaml
- id: check-added-large-files
- repo: https://github.com/asottile/seed-isort-config
rev: v2.2.0
hooks:
- id: seed-isort-config
- repo: https://github.com/pre-commit/mirrors-isort
rev: v5.10.1
hooks:
- id: isort
types: [python]
- repo: https://github.com/ambv/black
rev: 22.10.0
hooks:
- id: black
- id: black-jupyter
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.3.0
hooks:
- id: trailing-whitespace
exclude_types: [svg]
- id: end-of-file-fixer
exclude_types: [svg]
- id: check-yaml
- id: check-added-large-files
- repo: https://github.com/asottile/seed-isort-config
rev: v2.2.0
hooks:
- id: seed-isort-config
- repo: https://github.com/pre-commit/mirrors-isort
rev: v5.10.1
hooks:
- id: isort
args: [--profile, black]
types: [python]
- repo: https://github.com/ambv/black
rev: 22.10.0
hooks:
- id: black
- id: black-jupyter
- repo: https://github.com/pycqa/flake8
rev: 3.9.2
hooks:
- id: flake8
19 changes: 19 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
.PHONY: init lint check_lint test

init:
python -m pip install -e .

lint:
pip install -r requirements-lint.txt
isort .
black .

check_lint:
pip install -r requirements-lint.txt
flake8 .
isort --check-only .
black --diff --check --fast .

test:
pip install -r requirements-test.txt
pytest
1 change: 0 additions & 1 deletion causalpy/data/datasets.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import os
import pathlib

import pandas as pd
Expand Down
22 changes: 11 additions & 11 deletions causalpy/data/simulate_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,9 @@ def generate_synthetic_control_data(
"""
Example:
>> import pathlib
>> df, weightings_true = generate_synthetic_control_data(treatment_time=treatment_time)
>> df, weightings_true = generate_synthetic_control_data(
treatment_time=treatment_time
)
>> df.to_csv(pathlib.Path.cwd() / 'synthetic_control.csv', index=False)
"""

Expand All @@ -45,15 +47,17 @@ def generate_synthetic_control_data(
}
)

# 2. Generate counterfactual, based on weighted sum of non-treated variables. This is the counterfactual with NO treatment.
# 2. Generate counterfactual, based on weighted sum of non-treated variables. This
# is the counterfactual with NO treatment.
weightings_true = dirichlet(np.ones(7)).rvs(1)
df["counterfactual"] = np.dot(df.to_numpy(), weightings_true.T)

# 3. Generate the causal effect
causal_effect = gamma(10).pdf(np.arange(0, N, 1) - treatment_time)
df["causal effect"] = causal_effect * -50

# 4. Generate the actually observed data, ie the treated with the causal effect applied
# 4. Generate the actually observed data, ie the treated with the causal effect
# applied
df["actual"] = df["counterfactual"] + df["causal effect"]

# 5. apply observation noise to all relevant variables
Expand Down Expand Up @@ -94,13 +98,7 @@ def generate_time_series_data(
return df


def generate_time_series_data(treatment_time):
"""
Example use:
>> import pathlib
>> df = generate_time_series_data("2017-01-01").loc[:, ['month', 'year', 't', 'y']]
df.to_csv(pathlib.Path.cwd() / 'its.csv')
"""
def generate_time_series_data_seasonal(treatment_time):
dates = pd.date_range(
start=pd.to_datetime("2010-01-01"), end=pd.to_datetime("2020-01-01"), freq="M"
)
Expand All @@ -126,7 +124,9 @@ def generate_time_series_data(treatment_time):


def generate_time_series_data_simple(treatment_time, slope=0.0):
"""Generate simple interrupted time series data, with no seasonality or temporal structure"""
"""Generate simple interrupted time series data, with no seasonality or temporal
structure.
"""
dates = pd.date_range(
start=pd.to_datetime("2010-01-01"), end=pd.to_datetime("2020-01-01"), freq="M"
)
Expand Down
82 changes: 59 additions & 23 deletions causalpy/pymc_experiments.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,19 +28,28 @@ def print_coefficients(self):
"""Prints the model coefficients"""
print("Model coefficients:")
coeffs = az.extract(self.prediction_model.idata.posterior, var_names="beta")
# Note: f"{name: <30}" pads the name with spaces so that we have alignment of the stats despite variable names of different lengths
# Note: f"{name: <30}" pads the name with spaces so that we have alignment of
# the stats despite variable names of different lengths
for name in self.labels:
coeff_samples = coeffs.sel(coeffs=name)
print(
f" {name: <30}{coeff_samples.mean().data:.2f}, 94% HDI [{coeff_samples.quantile(0.03).data:.2f}, {coeff_samples.quantile(1-0.03).data:.2f}]"
f"""
{name: <30}{coeff_samples.mean().data:.2f},
94% HDI [{coeff_samples.quantile(0.03).data:.2f},
{coeff_samples.quantile(1-0.03).data:.2f}]
"""
)
# add coeff for measurement std
coeff_samples = az.extract(
self.prediction_model.idata.posterior, var_names="sigma"
)
name = "sigma"
print(
f" {name: <30}{coeff_samples.mean().data:.2f}, 94% HDI [{coeff_samples.quantile(0.03).data:.2f}, {coeff_samples.quantile(1-0.03).data:.2f}]"
f"""
{name: <30}{coeff_samples.mean().data:.2f},
94% HDI [{coeff_samples.quantile(0.03).data:.2f},
{coeff_samples.quantile(1-0.03).data:.2f}]
"""
)


Expand Down Expand Up @@ -121,8 +130,12 @@ def plot(self):
include_label=False,
)
ax[0].plot(self.datapost.index, self.post_y, "k.")

ax[0].set(
title=f"Pre-intervention Bayesian $R^2$: {self.score.r2:.3f} (std = {self.score.r2_std:.3f})"
title=f"""
Pre-intervention Bayesian $R^2$: {self.score.r2:.3f}
(std = {self.score.r2_std:.3f})
"""
)

plot_xY(self.datapre.index, self.pre_impact, ax=ax[1])
Expand Down Expand Up @@ -198,7 +211,8 @@ class DifferenceInDifferences(ExperimentalDesign):

.. note::

There is no pre/post intervention data distinction for DiD, we fit all the data available.
There is no pre/post intervention data distinction for DiD, we fit all the
data available.

"""

Expand Down Expand Up @@ -239,16 +253,26 @@ def __init__(
assert (
"treated" in self.data.columns
), "Require a boolean column labelling observations which are `treated`"
# Check for `unit` in the incoming dataframe. *This is only used for plotting purposes*
# Check for `unit` in the incoming dataframe.
# *This is only used for plotting purposes*
assert (
"unit" in self.data.columns
), "Require a `unit` column to label unique units. This is used for plotting purposes"
# Check that `group_variable_name` has TWO levels, representing the treated/untreated. But it does not matter what the actual names of the levels are.
), """
Require a `unit` column to label unique units.
This is used for plotting purposes
"""
# Check that `group_variable_name` has TWO levels, representing the
# treated/untreated. But it does not matter what the actual names of
# the levels are.
assert (
len(pd.Categorical(self.data[self.group_variable_name]).categories) is 2
), f"There must be 2 levels of the grouping variable {self.group_variable_name}. I.e. the treated and untreated."
len(pd.Categorical(self.data[self.group_variable_name]).categories) == 2
), f"""
There must be 2 levels of the grouping variable {self.group_variable_name}
.I.e. the treated and untreated.
"""

# TODO: `treated` is a deterministic function of group and time, so this could be a function rather than supplied data
# TODO: `treated` is a deterministic function of group and time, so this could
# be a function rather than supplied data

# DEVIATION FROM SKL EXPERIMENT CODE =============================
# fit the model to the observed (pre-intervention) data
Expand Down Expand Up @@ -348,11 +372,13 @@ def plot(self):
showmedians=False,
widths=0.2,
)

for pc in parts["bodies"]:
pc.set_facecolor("C1")
pc.set_edgecolor("None")
pc.set_alpha(0.5)
# Plot counterfactual - post-test for treatment group IF no treatment had occurred.
# Plot counterfactual - post-test for treatment group IF no treatment
# had occurred.
parts = ax.violinplot(
az.extract(
self.y_pred_counterfactual,
Expand Down Expand Up @@ -380,7 +406,8 @@ def plot(self):

def _plot_causal_impact_arrow(self, ax):
"""
draw a vertical arrow between `y_pred_counterfactual` and `y_pred_counterfactual`
draw a vertical arrow between `y_pred_counterfactual` and
`y_pred_counterfactual`
"""
# Calculate y values to plot the arrow between
y_pred_treatment = (
Expand Down Expand Up @@ -438,13 +465,16 @@ class RegressionDiscontinuity(ExperimentalDesign):

:param data: A pandas dataframe
:param formula: A statistical model formula
:param treatment_threshold: A scalar threshold value at which the treatment is applied
:param treatment_threshold: A scalar threshold value at which the treatment
is applied
:param prediction_model: A PyMC model
:param running_variable_name: The name of the predictor variable that the treatment threshold is based upon
:param running_variable_name: The name of the predictor variable that the treatment
threshold is based upon

.. note::

There is no pre/post intervention data distinction for the regression discontinuity design, we fit all the data available.
There is no pre/post intervention data distinction for the regression
discontinuity design, we fit all the data available.
"""

def __init__(
Expand All @@ -469,7 +499,8 @@ def __init__(
self.y, self.X = np.asarray(y), np.asarray(X)
self.outcome_variable_name = y.design_info.column_names[0]

# TODO: `treated` is a deterministic function of x and treatment_threshold, so this could be a function rather than supplied data
# TODO: `treated` is a deterministic function of x and treatment_threshold, so
# this could be a function rather than supplied data

# DEVIATION FROM SKL EXPERIMENT CODE =============================
# fit the model to the observed (pre-intervention) data
Expand All @@ -492,8 +523,10 @@ def __init__(
(new_x,) = build_design_matrices([self._x_design_info], self.x_pred)
self.pred = self.prediction_model.predict(X=np.asarray(new_x))

# calculate discontinuity by evaluating the difference in model expectation on either side of the discontinuity
# NOTE: `"treated": np.array([0, 1])`` assumes treatment is applied above (not below) the threshold
# calculate discontinuity by evaluating the difference in model expectation on
# either side of the discontinuity
# NOTE: `"treated": np.array([0, 1])`` assumes treatment is applied above
# (not below) the threshold
self.x_discon = pd.DataFrame(
{
self.running_variable_name: np.array(
Expand All @@ -514,7 +547,7 @@ def _is_treated(self, x):

.. warning::

Assumes treatment is given to those ABOVE the treatment threshold.
Assumes treatment is given to those ABOVE the treatment threshold.
"""
return np.greater_equal(x, self.treatment_threshold)

Expand All @@ -536,10 +569,13 @@ def plot(self):
ax=ax,
)
# create strings to compose title
r2 = f"Bayesian $R^2$ on all data = {self.score.r2:.3f} (std = {self.score.r2_std:.3f})"
title_info = f"{self.score.r2:.3f} (std = {self.score.r2_std:.3f})"
r2 = f"Bayesian $R^2$ on all data = {title_info}"
percentiles = self.discontinuity_at_threshold.quantile([0.03, 1 - 0.03]).values
ci = r"$CI_{94\%}$" + f"[{percentiles[0]:.2f}, {percentiles[1]:.2f}]"
discon = f"Discontinuity at threshold = {self.discontinuity_at_threshold.mean():.2f}, "
discon = f"""
Discontinuity at threshold = {self.discontinuity_at_threshold.mean():.2f},
"""
ax.set(title=r2 + "\n" + discon + ci)
# Intervention line
ax.axvline(
Expand All @@ -559,7 +595,7 @@ def summary(self):
print(f"Formula: {self.formula}")
print(f"Running variable: {self.running_variable_name}")
print(f"Threshold on running variable: {self.treatment_threshold}")
print(f"\nResults:")
print("\nResults:")
print(
f"Discontinuity at threshold = {self.discontinuity_at_threshold.mean():.2f}"
)
Expand Down
7 changes: 5 additions & 2 deletions causalpy/pymc_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@ def _data_setter(self, X):
pm.set_data({"X": X})

def fit(self, X, y, coords):
"""Draw samples from posterior, prior predictive, and posterior predictive distributions."""
"""Draw samples from posterior, prior predictive, and posterior predictive
distributions.
"""
self.build_model(X, y, coords)
with self.model:
self.idata = pm.sample()
Expand All @@ -43,7 +45,8 @@ def score(self, X, y):

.. caution::

The Bayesian :math:`R^2` is not the same as the traditional coefficient of determination, https://en.wikipedia.org/wiki/Coefficient_of_determination.
The Bayesian :math:`R^2` is not the same as the traditional coefficient of
determination, https://en.wikipedia.org/wiki/Coefficient_of_determination.

"""
yhat = self.predict(X)
Expand Down
Loading