Initial public upload: FastAPI + joblib inference bundle

Browse files

Files changed (9) hide show

README.md +57 -0
app.py +169 -0
artifact_manifest.json +0 -0
cost_model.joblib +3 -0
example_payload.json +16 -0
example_payload_wrapped.json +18 -0
requirements.txt +7 -0
risk_model.joblib +3 -0
uplift_model.joblib +3 -0

README.md ADDED Viewed

	@@ -0,0 +1,57 @@

+---
+library_name: scikit-learn
+license: mit
+pipeline_tag: tabular-regression
+tags:
+- fastapi
+- scikit-learn
+- joblib
+- tabular
+- oil-gas
+- design-recommender
+- trident
+---
+# TRIDENT Design Recommender FastAPI Inference Bundle
+Model bundle untuk inference engine **TRIDENT Physics-Guided Design Recommender**.
+## Isi repo
+- `uplift_model.joblib`
+- `cost_model.joblib`
+- `risk_model.joblib`
+- `artifact_manifest.json`
+- `example_payload.json`
+- `example_payload_wrapped.json`
+- `app.py`
+- `requirements.txt`
+## Artifact info
+- Version: **1.0.0**
+- scikit-learn: **1.6.1**
+- Number of trained features: **139**
+## Outputs
+- `pred_uplift_bopd`
+- `pred_cost_usd`
+- `pred_screenout_risk`
+- `pred_gross_value_usd`
+- `pred_design_score_usd`
+## Jalankan lokal
+```bash
+pip install -r requirements.txt
+uvicorn app:app --host 0.0.0.0 --port 8000
+```
+## Health check
+```bash
+curl http://127.0.0.1:8000/health
+```
+## Predict
+```bash
+curl -X POST "http://127.0.0.1:8000/predict" \
+  -H "Content-Type: application/json" \
+  -d @example_payload_wrapped.json
+```

app.py ADDED Viewed

	@@ -0,0 +1,169 @@

+import json
+import joblib
+import numpy as np
+import pandas as pd
+from pathlib import Path
+from contextlib import asynccontextmanager
+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel, Field
+from typing import Dict, Any
+ARTIFACT_DIR = Path(__file__).resolve().parent
+ml_assets = {}
+class PredictRequest(BaseModel):
+    features: Dict[str, Any] = Field(..., description="Feature dict untuk 1 well/design row")
+def encode_for_inference(
+    df_infer: pd.DataFrame,
+    trained_feature_columns,
+    encoded_category_maps,
+) -> pd.DataFrame:
+    X = df_infer.copy()
+    # encode kategorikal sesuai mapping training
+    for col, cmap in encoded_category_maps.items():
+        if col in X.columns:
+            X[col] = X[col].map(cmap)
+            # unknown category -> -1
+            X[col] = X[col].fillna(-1)
+    # tambah kolom yang hilang
+    for col in trained_feature_columns:
+        if col not in X.columns:
+            X[col] = 0
+    # buang kolom ekstra, lalu reorder persis seperti training
+    X = X[trained_feature_columns].copy()
+    # paksa numerik
+    for col in X.columns:
+        X[col] = pd.to_numeric(X[col], errors="coerce").fillna(0)
+    return X
+def enrich_derived_features(df: pd.DataFrame) -> pd.DataFrame:
+    out = df.copy()
+    # safe pressure limit
+    required_pressure_cols = [
+        "max_allowable_surface_pressure_psi",
+        "casing_pressure_limit_psi",
+        "tubing_pressure_limit_psi",
+    ]
+    if all(c in out.columns for c in required_pressure_cols):
+        out["safe_pressure_limit_psi"] = out[required_pressure_cols].min(axis=1)
+    if "safe_pressure_limit_psi" in out.columns and "max_pressure_psi" in out.columns:
+        out["pressure_headroom_psi"] = out["safe_pressure_limit_psi"] - out["max_pressure_psi"]
+    if "avg_planned_rate_bpm" in out.columns and "max_pump_rate_bpm" in out.columns:
+        denom = out["max_pump_rate_bpm"].replace(0, np.nan)
+        out["rate_to_pump_capacity"] = (out["avg_planned_rate_bpm"] / denom).fillna(0)
+    if "inventory_proppant_ton" in out.columns and "total_planned_proppant_ton" in out.columns:
+        denom = out["total_planned_proppant_ton"].replace(0, np.nan)
+        out["inventory_proppant_coverage"] = (out["inventory_proppant_ton"] / denom).fillna(0)
+    if "inventory_fluid_bbl" in out.columns and "total_planned_fluid_bbl" in out.columns:
+        denom = out["total_planned_fluid_bbl"].replace(0, np.nan)
+        out["inventory_fluid_coverage"] = (out["inventory_fluid_bbl"] / denom).fillna(0)
+    if "total_planned_fluid_bbl" in out.columns and "total_planned_proppant_ton" in out.columns:
+        denom = out["total_planned_proppant_ton"].replace(0, np.nan)
+        out["fluid_to_proppant_ratio"] = (out["total_planned_fluid_bbl"] / denom).fillna(0)
+    if "planned_stage_count" in out.columns and "total_planned_proppant_ton" in out.columns:
+        denom = out["planned_stage_count"].replace(0, np.nan)
+        out["proppant_per_stage_ton"] = (out["total_planned_proppant_ton"] / denom).fillna(0)
+    if "planned_stage_count" in out.columns and "total_planned_fluid_bbl" in out.columns:
+        denom = out["planned_stage_count"].replace(0, np.nan)
+        out["fluid_per_stage_bbl"] = (out["total_planned_fluid_bbl"] / denom).fillna(0)
+    if "lateral_treated_length_m" in out.columns and "planned_stage_count" in out.columns:
+        denom = out["planned_stage_count"].replace(0, np.nan)
+        out["derived_stage_length_m"] = (out["lateral_treated_length_m"] / denom).fillna(0)
+    return out
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    with open(ARTIFACT_DIR / "artifact_manifest.json", "r", encoding="utf-8") as f:
+        manifest = json.load(f)
+    ml_assets["manifest"] = manifest
+    ml_assets["uplift_model"] = joblib.load(ARTIFACT_DIR / "uplift_model.joblib")
+    ml_assets["cost_model"] = joblib.load(ARTIFACT_DIR / "cost_model.joblib")
+    ml_assets["risk_model"] = joblib.load(ARTIFACT_DIR / "risk_model.joblib")
+    yield
+    ml_assets.clear()
+app = FastAPI(
+    title="TRIDENT Design Recommender Inference API",
+    version="1.0.0",
+    lifespan=lifespan,
+)
+@app.get("/health")
+def health():
+    return {
+        "status": "ok",
+        "artifacts_loaded": bool(ml_assets),
+        "model_version": ml_assets.get("manifest", {}).get("artifact_version"),
+    }
+@app.post("/predict")
+def predict(req: PredictRequest):
+    try:
+        manifest = ml_assets["manifest"]
+        trained_feature_columns = manifest["trained_feature_columns"]
+        encoded_category_maps = manifest["encoded_category_maps"]
+        default_eval_days = manifest["default_eval_days"]
+        risk_penalty_usd = manifest["risk_penalty_usd"]
+        df = pd.DataFrame([req.features])
+        df = enrich_derived_features(df)
+        X = encode_for_inference(
+            df_infer=df,
+            trained_feature_columns=trained_feature_columns,
+            encoded_category_maps=encoded_category_maps,
+        )
+        uplift = float(ml_assets["uplift_model"].predict(X)[0])
+        cost = float(ml_assets["cost_model"].predict(X)[0])
+        risk = float(ml_assets["risk_model"].predict_proba(X)[0, 1])
+        oil_price = float(df["realized_oil_price_usd_bbl"].iloc[0]) if "realized_oil_price_usd_bbl" in df.columns else 65.0
+        gross_value = uplift * default_eval_days * oil_price
+        design_score = gross_value - cost - risk * risk_penalty_usd
+        response = {
+            "pred_uplift_bopd": uplift,
+            "pred_cost_usd": cost,
+            "pred_screenout_risk": risk,
+            "pred_gross_value_usd": gross_value,
+            "pred_design_score_usd": design_score,
+        }
+        # optional info untuk debugging
+        if "safe_pressure_limit_psi" in df.columns:
+            response["safe_pressure_limit_psi"] = float(df["safe_pressure_limit_psi"].iloc[0])
+        if "pressure_headroom_psi" in df.columns:
+            response["pressure_headroom_psi"] = float(df["pressure_headroom_psi"].iloc[0])
+        return response
+    except Exception as e:
+        raise HTTPException(status_code=400, detail=str(e))

artifact_manifest.json ADDED Viewed

The diff for this file is too large to render. See raw diff

cost_model.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:11065680bed8e4e79b1395500ab06786a7d8437e5e95c59ece0fdeca04336c1d
+size 146913

example_payload.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+  "planned_stage_count": 18,
+  "avg_cluster_spacing_m": 18.5,
+  "avg_shots_per_cluster": 5,
+  "total_planned_proppant_ton": 1500.0,
+  "total_planned_fluid_bbl": 14000.0,
+  "avg_planned_rate_bpm": 28.0,
+  "avg_planned_max_concentration": 3.5,
+  "max_pump_rate_bpm": 35.0,
+  "max_allowable_surface_pressure_psi": 8500.0,
+  "casing_pressure_limit_psi": 9000.0,
+  "tubing_pressure_limit_psi": 8800.0,
+  "max_pressure_psi": 7200.0,
+  "inventory_proppant_ton": 1700.0,
+  "inventory_fluid_bbl": 16000.0
+}

example_payload_wrapped.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+  "features": {
+    "planned_stage_count": 18,
+    "avg_cluster_spacing_m": 18.5,
+    "avg_shots_per_cluster": 5,
+    "total_planned_proppant_ton": 1500.0,
+    "total_planned_fluid_bbl": 14000.0,
+    "avg_planned_rate_bpm": 28.0,
+    "avg_planned_max_concentration": 3.5,
+    "max_pump_rate_bpm": 35.0,
+    "max_allowable_surface_pressure_psi": 8500.0,
+    "casing_pressure_limit_psi": 9000.0,
+    "tubing_pressure_limit_psi": 8800.0,
+    "max_pressure_psi": 7200.0,
+    "inventory_proppant_ton": 1700.0,
+    "inventory_fluid_bbl": 16000.0
+  }
+}

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+fastapi>=0.115.0
+uvicorn[standard]>=0.30.0
+pandas>=2.0.0
+numpy>=1.24.0
+scikit-learn==1.6.1
+joblib>=1.3.0
+pydantic>=2.0.0

risk_model.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:37771c5d7d6dc99c28a21bcdfa6e872ef696ec1a140c8debfc236e58d0fd4309
+size 21339846

uplift_model.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:44e8696de7d1142400e288c596375e20c01b7ea09f4113479c91eaccd07d40c4
+size 105060417