Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
52 commits
Select commit Hold shift + click to select a range
f1c346d
[training/train.py] [refactor] : standardize tensor conversion with t…
syedabdullahbukhari77 Aug 31, 2025
6808e7b
[training/train.py] [refactor] : standardize tensor conversion with t…
syedabdullahbukhari77 Aug 31, 2025
ac5779a
[training/train.py] [refactor] : standardize tensor conversion with t…
syedabdullahbukhari77 Aug 31, 2025
f236ca4
[training/train.py] [refactor] : wrap script in main() entrypoint for…
syedabdullahbukhari77 Aug 31, 2025
1186262
[training/train.py] [refactor] : wrap script in main() entrypoint for…
syedabdullahbukhari77 Aug 31, 2025
bd3000f
[training/train.py] [refactor] : finalize script by reverting to orig…
syedabdullahbukhari77 Aug 31, 2025
bf79172
Merge pull request #7 from syedabdullahbukhari77/training/dataset.py
syedabdullahbukhari77 Aug 31, 2025
7eb6726
[training/train.py] [refactor] : finalize script by reverting to orig…
syedabdullahbukhari77 Aug 31, 2025
fbacb5c
Merge pull request #8 from syedabdullahbukhari77/training/dataset.py
syedabdullahbukhari77 Aug 31, 2025
f48f61e
[notebook/revenue_forecasting_project_cloud_basedy]
syedabdullahbukhari77 Aug 31, 2025
f562412
Merge pull request #9 from syedabdullahbukhari77/main
syedabdullahbukhari77 Aug 31, 2025
0027ae6
Merge pull request #10 from syedabdullahbukhari77/prototype-notebook-1
syedabdullahbukhari77 Aug 31, 2025
b1d62a9
Merge pull request #11 from syedabdullahbukhari77/main
syedabdullahbukhari77 Aug 31, 2025
dd79620
Merge pull request #12 from syedabdullahbukhari77/training/preprocesi…
syedabdullahbukhari77 Aug 31, 2025
6b8e818
[training/validate.py] : wrap all validation code single file , furth…
syedabdullahbukhari77 Aug 31, 2025
7447339
[training/validate.py] : wrap all validation code single file , furth…
syedabdullahbukhari77 Aug 31, 2025
2f32931
Merge pull request #13 from syedabdullahbukhari77/training/validate.py
syedabdullahbukhari77 Aug 31, 2025
eaf0b2e
[notebook/revenue_forecasting_project_cloud_basedy]
syedabdullahbukhari77 Aug 31, 2025
3d45705
Merge branch 'prototype-notebook-2' of https://github.com/syedabdulla…
syedabdullahbukhari77 Aug 31, 2025
f2126c7
Merge pull request #14 from syedabdullahbukhari77/prototype-notebook-2
syedabdullahbukhari77 Aug 31, 2025
a3dc7d6
[ inference / predict.py ] (refactor) : [ add docstrings and improve …
syedabdullahbukhari77 Aug 31, 2025
88c1a32
[ inference / predict.py ] (refactor) : [ add docstrings and improve …
syedabdullahbukhari77 Aug 31, 2025
fe740a5
[ inference / predict.py ] (refactor) : [ add docstrings and improve …
syedabdullahbukhari77 Aug 31, 2025
ce829bf
Merge pull request #15 from syedabdullahbukhari77/inference/predict.py
syedabdullahbukhari77 Aug 31, 2025
7468a22
[ inference / predict.py ] (refactor) : [ add docstrings and improve …
syedabdullahbukhari77 Aug 31, 2025
f1127bf
Merge pull request #16 from syedabdullahbukhari77/inference/predict.py
syedabdullahbukhari77 Aug 31, 2025
7b5eb16
[ inference / predict.py ] (refactor) : [ add docstrings and improve …
syedabdullahbukhari77 Aug 31, 2025
25cc9f9
feat(app): add Streamlit UI for business forecasting with file upload…
syedabdullahbukhari77 Sep 2, 2025
92cd4ae
feat(app): add Streamlit UI for business forecasting with file upload…
syedabdullahbukhari77 Sep 2, 2025
b63582c
feat(app): add Streamlit UI for business forecasting with file upload…
syedabdullahbukhari77 Sep 2, 2025
7bb6378
feat(app): add Streamlit UI for business forecasting with file upload…
syedabdullahbukhari77 Sep 2, 2025
7a35b6c
refactor(app): reorganize artifact loading and model inference into h…
syedabdullahbukhari77 Sep 2, 2025
5d84be2
feat(app): add missing column checks and encoder validation for robus…
syedabdullahbukhari77 Sep 2, 2025
bef3bda
[ server / app.py ] : enhance output with forecast line chart and chu…
syedabdullahbukhari77 Sep 2, 2025
47e0495
[ server / app.py ] : enhance output with forecast line chart and chu…
syedabdullahbukhari77 Sep 2, 2025
d4d0bcb
[ server / app.py ] : enhance output with forecast line chart and chu…
syedabdullahbukhari77 Sep 2, 2025
f8592e2
[ server / app.py ] : finalize Streamlit app with consistent structur…
syedabdullahbukhari77 Sep 2, 2025
0807324
Merge pull request #17 from syedabdullahbukhari77/server/app.py
syedabdullahbukhari77 Sep 2, 2025
a27ea53
Merge pull request #18 from syedabdullahbukhari77/main
syedabdullahbukhari77 Sep 2, 2025
c28c277
Merge pull request #19 from syedabdullahbukhari77/prototype-notebook-1
syedabdullahbukhari77 Sep 2, 2025
1c4a4cb
[training / dataset.py] implement FinanceDataset class for Pytorch tr…
syedabdullahbukhari77 Sep 3, 2025
a4f00f7
[training/dataset.py] implement FinanceDataset class for training pip…
syedabdullahbukhari77 Sep 3, 2025
85a0f99
(server/app.py) add Streamlit UI for business forecasting(#22) syedab…
syedabdullahbukhari77 Sep 3, 2025
b78e2b8
add Streamlit UI for business forecasting ( #23 ) from syedabdullahbu…
syedabdullahbukhari77 Sep 3, 2025
e360a92
(README.md) add streamlit deployment documentation
syedabdullahbukhari77 Sep 3, 2025
e33446e
(README.md) add streamlit deployment documentation ( #24 )
syedabdullahbukhari77 Sep 3, 2025
06c4505
(README.md) add streamlit deployment documentation
syedabdullahbukhari77 Sep 3, 2025
d3ca8a5
(README.md) add streamlit deployment documentation
syedabdullahbukhari77 Sep 3, 2025
0d9eedc
(README.md) add streamlit deployment documentation ( #26 )
syedabdullahbukhari77 Sep 3, 2025
2b9af45
wrap script in main() entrypoint for cleaner execution ( #27 )
syedabdullahbukhari77 Sep 4, 2025
dc6870b
wrap script in main() entrypoint for cleaner execution ( #28 )
syedabdullahbukhari77 Sep 4, 2025
58cb67e
update documentation ( #29 )
abdass Sep 8, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 33 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# 📊 Business KPI Prediction from Financial Data (PyTorch)

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)]([https://colab.research.google.com/github/syedabdullahbukhari77/Revenue_Forecasting_Business_Prediction_model_Pytorch/blob/main/notebooks/demo.ipynb](https://colab.research.google.com/drive/1PZDRWDH1c7dxteG9uOvPkiQ5mfuMPHw7))

---

## 1. Introduction
This repository implements a **multi-task deep learning model** for predicting key business performance indicators (KPIs) from tabular financial/accounting data.
The model jointly learns three predictive tasks:
Expand All @@ -18,7 +22,7 @@ Conventional financial forecasting methods (linear models, rule-based systems, s
This project demonstrates how **multi-task learning (MTL)** can serve as a scalable, modern approach to forecasting KPIs, providing:

- **One model → multiple outputs**
- **Feature integration** across categorical, numerical, and time-based inputs
- **Feature integration** across categorical, numerical, and temporal inputs
- **Extensible baseline** for real-world datasets

---
Expand Down Expand Up @@ -46,10 +50,21 @@ This project demonstrates how **multi-task learning (MTL)** can serve as a scala

---

## 5. Training Setup
## 5. Preprocessing Pipeline
All preprocessing steps are modularized in `preprocessing.py`:
- Missing value handling
- Feature scaling (with `scaler.pkl`)
- Label/categorical encoding (with `encoders.pkl`)
- Automatic column validation during inference

This ensures **consistency between training and deployment**.

---

## 6. Training Setup
- **Split:** 2020–2024 → training | 2024–2025 → validation/testing
- **Optimizer:** Adam (`lr = 5e-5`)
- **Loss:** `Loss = MSE(revenue) + MSE(risk) + BCE(churn)`
- **Loss:** MSE(revenue) + MSE(risk) + BCE(churn)
- **Batch size:** 128
- **Epochs:** 100

Expand All @@ -64,7 +79,7 @@ This project demonstrates how **multi-task learning (MTL)** can serve as a scala

---

## 6. Results
## 7. Results
- Model shows **stable convergence** on all three tasks.
- Training and validation losses are closely aligned → low overfitting on synthetic data.
- Demonstrates feasibility of **joint KPI forecasting** using deep learning.
Expand All @@ -73,10 +88,19 @@ This project demonstrates how **multi-task learning (MTL)** can serve as a scala

---

## 7. Usage
## 8. Deployment (Streamlit App)
This repository includes a **Streamlit web app** for interactive predictions.

### Run Locally
Make sure you have trained the model and saved artifacts (`finance_model.pth`, `scaler.pkl`, `encoders.pkl`) in the `models/` folder.

### Installation
```bash
git clone https://github.com/syedabdullahbukhari77/Revenue_Forecasting_Business_Prediction_model_Pytorch
cd Revenue_Forecasting_Business_Prediction_model_Pytorch
pip install -r requirements.txt
# Step 1: Train the model
python train.py

# Step 2: Validate model performance
python validate.py

# Step 3: Launch Streamlit app
streamlit run server/app.py

34,002 changes: 1 addition & 34,001 deletions dataset/synthetic_financial_data_bukharii.csv

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it seems that all content in dataset/synthetic_financial_data_bukharii.csv was accidentally removed in this commit. Could you please restore the file before we merge PR (#29)? Otherwise, it will break the project. @abdass

Large diffs are not rendered by default.

22 changes: 22 additions & 0 deletions inference/predict.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import torch
import numpy as np
from models.finance_model import finance_model

def load_model(path="models/finance_model.pth", input_dim=9, device="cpu"):
model = finance_model(input_dim)
model.load_state_dict(torch.load(path, map_location=device))
model.eval()
return model

def predict(model, features: list, device="cpu"):
features = np.array(features).reshape(1, -1)
features_tensor = torch.tensor(features, dtype=torch.float32).to(device)

with torch.no_grad():
revenue, risk, churn = model(features_tensor)

return {
"revenue": revenue.item(),
"risk": risk.item(),
"churn_probability": torch.sigmoid(churn).item()
}
81 changes: 81 additions & 0 deletions server/app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
import streamlit as st
import pandas as pd
import torch
import os, sys
import joblib

# make sure we can import from parent folder
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))

from models.finance_model import finance_model

# Fixed feature set (must match training!)
FEATURES = [
'customer_tenure', 'industry', 'some_other_features',
'company_size', 'region', 'operating_margin',
'debt_ratio', 'log_revenue', 'gross_profit'
]

@st.cache_resource
def load_artifacts(model_path="models/finance_model.pth", input_dim=9):
model = finance_model(input_dim=input_dim)
model.load_state_dict(torch.load(model_path, map_location="cpu"))
model.eval()

scaler = joblib.load("models/scaler.pkl")
encoders = joblib.load("models/encoders.pkl")
return model, scaler, encoders

st.title("📊 Business Forecasting App")
st.write("Predict **Revenue**, **Risk**, and **Churn** from business financial data using PyTorch.")

# File upload
uploaded_file = st.file_uploader("Upload a CSV file with business features", type=["csv"])

if uploaded_file:
df = pd.read_csv(uploaded_file)
st.write("### Uploaded Data (raw)")
st.dataframe(df.head())

# --- Column check ---
missing = [col for col in FEATURES if col not in df.columns]
if missing:
st.error(f"❌ Missing required columns: {missing}")
else:
# Drop extra columns automatically
df = df[FEATURES]

# --- Apply encoders ---
scaler = joblib.load("models/scaler.pkl")
encoders = joblib.load("models/encoders.pkl")
for col in df.select_dtypes(include=['object']).columns:
if col in encoders:
df[col] = encoders[col].transform(df[col])
else:
st.error(f"No encoder found for column {col}")
st.stop()

# --- Scale numeric features ---
X = scaler.transform(df)
X_tensor = torch.tensor(X, dtype=torch.float32)

# --- Load model ---
model, _, _ = load_artifacts(input_dim=len(FEATURES))

# --- Run predictions ---
with torch.no_grad():
revenue, risk, churn = model(X_tensor)

results = pd.DataFrame({
"Revenue_Pred": revenue.numpy(),
"Risk_Score": risk.numpy(),
"Churn_Prob": torch.sigmoid(churn).numpy()
})

st.write("### Predictions")
st.dataframe(results.head())

# Visualization
st.write("### 📈 Forecast Visualization")
st.line_chart(results[["Revenue_Pred"]])
st.bar_chart(results[["Churn_Prob"]])
42 changes: 42 additions & 0 deletions training/train.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# training/train.py
import torch
import torch.nn as nn
import torch.optim as optim
from models.finance_model import finance_model
from training.preprocessing import load_and_preprocess
import joblib
import os

# 1. Load and preprocess data
X_train, X_val, y_train, y_val, scaler, encoders = load_and_preprocess("synthetic_financial_data_bukharii.csv")

# 2. Convert to tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)
X_val = torch.tensor(X_val, dtype=torch.float32)
y_val = torch.tensor(y_val, dtype=torch.float32)

# 3. Model
model = finance_model(input_dim=X_train.shape[1])
criterion = nn.MSELoss() # basic loss, you can customize for multitask
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 4. Train loop (very simple for demo)
for epoch in range(20): # increase epochs as needed
model.train()
optimizer.zero_grad()
revenue, risk, churn = model(X_train)
loss = criterion(revenue, y_train[:,0]) + criterion(risk, y_train[:,1]) + criterion(churn, y_train[:,2])
loss.backward()
optimizer.step()

if epoch % 5 == 0:
print(f"Epoch {epoch} - Loss: {loss.item():.4f}")

# 5. Save artifacts
os.makedirs("models", exist_ok=True)
torch.save(model.state_dict(), "models/finance_model.pth")
joblib.dump(scaler, "models/scaler.pkl")
joblib.dump(encoders, "models/encoders.pkl")

print("Training complete. Model & preprocessing saved in /models/")
21 changes: 21 additions & 0 deletions training/validate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import torch

def validate(model, loss_fn1, loss_fn2, device, val_loader):
model.eval()
val_loss = 0.0

with torch.no_grad():
for X_batch, y_batch in val_loader:
X_batch, y_batch = X_batch.to(device), y_batch.to(device)
y_rev, y_risk, y_churn = y_batch[:,0], y_batch[:,1], y_batch[:,2]

pred_rev, pred_risk, pred_churn = model(X_batch)

loss_rev = loss_fn1(pred_rev, y_rev)
loss_risk = loss_fn1(pred_risk, y_risk)
loss_churn = loss_fn2(pred_churn, y_churn)

loss = loss_rev + loss_risk + loss_churn
val_loss += loss.item()

return val_loss / len(val_loader)