Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 11 additions & 10 deletions .github/workflows/python-app.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ jobs:
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest, macos-latest] #windows-latest
os: [ubuntu-latest, windows-latest] #macos-latest fails because no matching distribution found for nvidia-cublas-cu12==12.1.3.1
python-version: ["3.10"]

steps:
Expand All @@ -24,19 +24,20 @@ jobs:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install .
pip install .[dev]
python -m pip install --upgrade pip setuptools wheel
if [ -f dev-requirements.txt ]; then pip install -r dev-requirements.txt; fi
continue-on-error: false
- name: Run Ruff
run: ruff check --fix --config=pyproject.toml --output-format=github .
- name: Run Ruff
run: ruff check --output-format=github .
- name: Test with pytest
run: |
coverage run -m pytest -v -s
- name: Generate Coverage Report
run: |
coverage report -m
- name: test if app starts
run: | #do I need to exit this step?
cd /synthetic_data_talk
streamlit run 🔓_Synthetic_Data.py
- name: test streamlit app
uses: streamlit/streamlit-app-action@v0.0.3
with:
app-path: synthetic_data_talk/🔓_Synthetic_Data.py
ruff: true

19 changes: 0 additions & 19 deletions .streamlit/config.toml

This file was deleted.

71 changes: 36 additions & 35 deletions demo_notebooks/demo_ctgan_best_setting.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,16 @@
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import warnings\n",
"import json\n",
"import os\n",
"import warnings\n",
"\n",
"import pandas as pd\n",
"\n",
"os.chdir(\"/home/antonia/code/Unlocking-Information-Creating-Synthetic-Data-for-Open-Access\") # insert your your path to the project here\n",
"warnings.filterwarnings(\"ignore\")\n",
"\n",
"folder_name = 'best_setting'"
"folder_name = \"best_setting\""
]
},
{
Expand All @@ -31,7 +32,7 @@
}
],
"source": [
"rpad_df = pd.read_excel(r'data/RPAD_data_small.xlsx', engine='openpyxl')\n",
"rpad_df = pd.read_excel(r\"data/RPAD_data_small.xlsx\", engine=\"openpyxl\")\n",
"print(rpad_df.shape)"
]
},
Expand Down Expand Up @@ -121,7 +122,7 @@
"def calculate_bmi(weight, height):\n",
" return weight/(height/100)**2\n",
"\n",
"rpad_df.drop(rpad_df[round(rpad_df['BMI'],0) != round(calculate_bmi(rpad_df['Weight'], rpad_df['Height']),0)].index, inplace=True)\n",
"rpad_df.drop(rpad_df[round(rpad_df[\"BMI\"],0) != round(calculate_bmi(rpad_df[\"Weight\"], rpad_df[\"Height\"]),0)].index, inplace=True)\n",
"print(rpad_df.shape)"
]
},
Expand All @@ -133,15 +134,15 @@
"source": [
"# load the constraint from the file\n",
"model.load_custom_constraint_classes(\n",
" filepath='/home/antonia/code/Unlocking-Information/utils/example_custom_constraint.py',\n",
" class_names=['BMI_Formulae']\n",
" filepath=\"/home/antonia/code/Unlocking-Information/utils/example_custom_constraint.py\",\n",
" class_names=[\"BMI_Formulae\"],\n",
")\n",
"\n",
"constraint = {\n",
" 'constraint_class': 'BMI_Formulae',\n",
" 'constraint_parameters': {\n",
" 'column_names': ['BMI', 'Weight', 'Height']\n",
" }\n",
" \"constraint_class\": \"BMI_Formulae\",\n",
" \"constraint_parameters\": {\n",
" \"column_names\": [\"BMI\", \"Weight\", \"Height\"],\n",
" },\n",
"}"
]
},
Expand All @@ -153,19 +154,19 @@
"source": [
"def get_positive_constraint(column_name, strict):\n",
" return {\n",
" 'constraint_class': 'Positive',\n",
" 'constraint_parameters': {\n",
" 'column_name': column_name,\n",
" 'strict_boundaries': strict\n",
" \"constraint_class\": \"Positive\",\n",
" \"constraint_parameters\": {\n",
" \"column_name\": column_name,\n",
" \"strict_boundaries\": strict,\n",
" },\n",
" }\n",
" }\n",
"positive_bmi = get_positive_constraint('BMI', True)\n",
"positive_weight = get_positive_constraint('Weight', True)\n",
"positive_height = get_positive_constraint('Height', True)\n",
"positive_los = get_positive_constraint('Length_of_Stay', True)\n",
"positive_age = get_positive_constraint('Age', False)\n",
"positive_a_score = get_positive_constraint('Alvarado_Score', False)\n",
"positive_pa_score = get_positive_constraint('Paedriatic_Appendicitis_Score', False)"
"positive_bmi = get_positive_constraint(\"BMI\", True)\n",
"positive_weight = get_positive_constraint(\"Weight\", True)\n",
"positive_height = get_positive_constraint(\"Height\", True)\n",
"positive_los = get_positive_constraint(\"Length_of_Stay\", True)\n",
"positive_age = get_positive_constraint(\"Age\", False)\n",
"positive_a_score = get_positive_constraint(\"Alvarado_Score\", False)\n",
"positive_pa_score = get_positive_constraint(\"Paedriatic_Appendicitis_Score\", False)"
]
},
{
Expand Down Expand Up @@ -299,7 +300,7 @@
"quality_report = evaluate_quality(\n",
" rpad_df,\n",
" synth_data,\n",
" metadata\n",
" metadata,\n",
")\n",
"\n",
"quality_report.save(filepath=f\"pages/evaluation/{folder_name}/results/quality_report.pkl\")"
Expand Down Expand Up @@ -378,10 +379,10 @@
"y": [
0.934156378600823,
0.8244170096021948,
0.9506172839506173,
0.9506172839506172,
0.9122085048010974,
0.874828060522696,
0.9324393603016887,
0.9324393603016888,
0.9042516216204474
],
"yaxis": "y"
Expand Down Expand Up @@ -425,7 +426,7 @@
],
"xaxis": "x",
"y": [
0.9876543209876543,
0.9876543209876544,
0.887517146776406,
0.8710562414266118
],
Expand Down Expand Up @@ -1296,7 +1297,7 @@
}
],
"source": [
"fig = quality_report.get_visualization('Column Shapes')\n",
"fig = quality_report.get_visualization(\"Column Shapes\")\n",
"fig.show()\n",
"fig.write_image(file = f\"pages/evaluation/{folder_name}/images/col_shapes.png\")"
]
Expand Down Expand Up @@ -3680,10 +3681,10 @@
"fig = get_column_plot(\n",
" real_data=rpad_df,\n",
" synthetic_data=synth_data,\n",
" column_name='Sex',\n",
" metadata=metadata\n",
" column_name=\"Sex\",\n",
" metadata=metadata,\n",
")\n",
" \n",
"\n",
"fig.show()\n",
"fig.write_image(file = f\"pages/evaluation/{folder_name}/images/col_plot_sex.png\")\n"
]
Expand Down Expand Up @@ -11384,18 +11385,18 @@
"fig_categorical = get_column_pair_plot(\n",
" real_data=rpad_df,\n",
" synthetic_data=synth_data,\n",
" column_names=['Severity', 'Management'],\n",
" column_names=[\"Severity\", \"Management\"],\n",
" metadata=metadata)\n",
" \n",
"\n",
"fig_categorical.show()\n",
"fig_categorical.write_image(file = f\"pages/evaluation/{folder_name}/images/Severity_Management_pair_plot.png\")\n",
"\n",
"fig_numerical = get_column_pair_plot(\n",
" real_data=rpad_df,\n",
" synthetic_data=synth_data,\n",
" column_names=['Weight', 'Height'],\n",
" column_names=[\"Weight\", \"Height\"],\n",
" metadata=metadata)\n",
" \n",
"\n",
"fig_numerical.show()\n",
"fig_numerical.write_image(file = f\"pages/evaluation/{folder_name}/images/Weight_Height_pair_plot.png\")"
]
Expand Down
27 changes: 14 additions & 13 deletions demo_notebooks/demo_ctgan_default.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,15 @@
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import warnings\n",
"import os\n",
"import warnings\n",
"\n",
"import pandas as pd\n",
"\n",
"os.chdir(\"/path/to/your/project/\") # insert your your path to the project here\n",
"warnings.filterwarnings(\"ignore\")\n",
"\n",
"folder_name = 'default'"
"folder_name = \"default\""
]
},
{
Expand All @@ -30,7 +31,7 @@
}
],
"source": [
"rpad_df = pd.read_excel(r'data/RPAD_data_small.xlsx', engine='openpyxl')\n",
"rpad_df = pd.read_excel(r\"data/RPAD_data_small.xlsx\", engine=\"openpyxl\")\n",
"print(rpad_df.shape)\n",
"# print(rpad_df.head(5))"
]
Expand Down Expand Up @@ -192,7 +193,7 @@
"quality_report = evaluate_quality(\n",
" rpad_df,\n",
" synth_data,\n",
" metadata\n",
" metadata,\n",
")\n",
"\n",
"quality_report.save(filepath=f\"pages/evaluation/{folder_name}/results/quality_report.pkl\")"
Expand Down Expand Up @@ -1195,7 +1196,7 @@
}
],
"source": [
"fig = quality_report.get_visualization('Column Shapes')\n",
"fig = quality_report.get_visualization(\"Column Shapes\")\n",
"fig.show()\n",
"fig.write_image(file = f\"pages/evaluation/{folder_name}/images/col_shapes.png\")"
]
Expand Down Expand Up @@ -3695,10 +3696,10 @@
"fig = get_column_plot(\n",
" real_data=rpad_df,\n",
" synthetic_data=synth_data,\n",
" column_name='Sex',\n",
" metadata=metadata\n",
" column_name=\"Sex\",\n",
" metadata=metadata,\n",
")\n",
" \n",
"\n",
"fig.show()\n",
"fig.write_image(file = f\"pages/evaluation/{folder_name}/images/col_plot_sex.png\")\n"
]
Expand Down Expand Up @@ -11823,18 +11824,18 @@
"fig_categorical = get_column_pair_plot(\n",
" real_data=rpad_df,\n",
" synthetic_data=synth_data,\n",
" column_names=['Severity', 'Management'],\n",
" column_names=[\"Severity\", \"Management\"],\n",
" metadata=metadata)\n",
" \n",
"\n",
"fig_categorical.show()\n",
"fig_categorical.write_image(file = f\"pages/evaluation/{folder_name}/images/Severity_Management_pair_plot.png\")\n",
"\n",
"fig_numerical = get_column_pair_plot(\n",
" real_data=rpad_df,\n",
" synthetic_data=synth_data,\n",
" column_names=['Weight', 'Height'],\n",
" column_names=[\"Weight\", \"Height\"],\n",
" metadata=metadata)\n",
" \n",
"\n",
"fig_numerical.show()\n",
"fig_numerical.write_image(file = f\"pages/evaluation/{folder_name}/images/Weight_Height_pair_plot.png\")"
]
Expand Down
Loading