anekazek commited on
Commit
aa5ea1b
·
verified ·
1 Parent(s): 36947e4

Initial public upload: FastAPI + joblib inference bundle

Browse files
README.md ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: scikit-learn
3
+ license: mit
4
+ pipeline_tag: tabular-regression
5
+ tags:
6
+ - fastapi
7
+ - scikit-learn
8
+ - joblib
9
+ - tabular
10
+ - oil-gas
11
+ - design-recommender
12
+ - trident
13
+ ---
14
+
15
+ # TRIDENT Design Recommender FastAPI Inference Bundle
16
+
17
+ Model bundle untuk inference engine **TRIDENT Physics-Guided Design Recommender**.
18
+
19
+ ## Isi repo
20
+ - `uplift_model.joblib`
21
+ - `cost_model.joblib`
22
+ - `risk_model.joblib`
23
+ - `artifact_manifest.json`
24
+ - `example_payload.json`
25
+ - `example_payload_wrapped.json`
26
+ - `app.py`
27
+ - `requirements.txt`
28
+
29
+ ## Artifact info
30
+ - Version: **1.0.0**
31
+ - scikit-learn: **1.6.1**
32
+ - Number of trained features: **139**
33
+
34
+ ## Outputs
35
+ - `pred_uplift_bopd`
36
+ - `pred_cost_usd`
37
+ - `pred_screenout_risk`
38
+ - `pred_gross_value_usd`
39
+ - `pred_design_score_usd`
40
+
41
+ ## Jalankan lokal
42
+ ```bash
43
+ pip install -r requirements.txt
44
+ uvicorn app:app --host 0.0.0.0 --port 8000
45
+ ```
46
+
47
+ ## Health check
48
+ ```bash
49
+ curl http://127.0.0.1:8000/health
50
+ ```
51
+
52
+ ## Predict
53
+ ```bash
54
+ curl -X POST "http://127.0.0.1:8000/predict" \
55
+ -H "Content-Type: application/json" \
56
+ -d @example_payload_wrapped.json
57
+ ```
app.py ADDED
@@ -0,0 +1,169 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import joblib
3
+ import numpy as np
4
+ import pandas as pd
5
+
6
+ from pathlib import Path
7
+ from contextlib import asynccontextmanager
8
+ from fastapi import FastAPI, HTTPException
9
+ from pydantic import BaseModel, Field
10
+ from typing import Dict, Any
11
+
12
+
13
+ ARTIFACT_DIR = Path(__file__).resolve().parent
14
+
15
+ ml_assets = {}
16
+
17
+
18
+ class PredictRequest(BaseModel):
19
+ features: Dict[str, Any] = Field(..., description="Feature dict untuk 1 well/design row")
20
+
21
+
22
+ def encode_for_inference(
23
+ df_infer: pd.DataFrame,
24
+ trained_feature_columns,
25
+ encoded_category_maps,
26
+ ) -> pd.DataFrame:
27
+ X = df_infer.copy()
28
+
29
+ # encode kategorikal sesuai mapping training
30
+ for col, cmap in encoded_category_maps.items():
31
+ if col in X.columns:
32
+ X[col] = X[col].map(cmap)
33
+ # unknown category -> -1
34
+ X[col] = X[col].fillna(-1)
35
+
36
+ # tambah kolom yang hilang
37
+ for col in trained_feature_columns:
38
+ if col not in X.columns:
39
+ X[col] = 0
40
+
41
+ # buang kolom ekstra, lalu reorder persis seperti training
42
+ X = X[trained_feature_columns].copy()
43
+
44
+ # paksa numerik
45
+ for col in X.columns:
46
+ X[col] = pd.to_numeric(X[col], errors="coerce").fillna(0)
47
+
48
+ return X
49
+
50
+
51
+ def enrich_derived_features(df: pd.DataFrame) -> pd.DataFrame:
52
+ out = df.copy()
53
+
54
+ # safe pressure limit
55
+ required_pressure_cols = [
56
+ "max_allowable_surface_pressure_psi",
57
+ "casing_pressure_limit_psi",
58
+ "tubing_pressure_limit_psi",
59
+ ]
60
+ if all(c in out.columns for c in required_pressure_cols):
61
+ out["safe_pressure_limit_psi"] = out[required_pressure_cols].min(axis=1)
62
+
63
+ if "safe_pressure_limit_psi" in out.columns and "max_pressure_psi" in out.columns:
64
+ out["pressure_headroom_psi"] = out["safe_pressure_limit_psi"] - out["max_pressure_psi"]
65
+
66
+ if "avg_planned_rate_bpm" in out.columns and "max_pump_rate_bpm" in out.columns:
67
+ denom = out["max_pump_rate_bpm"].replace(0, np.nan)
68
+ out["rate_to_pump_capacity"] = (out["avg_planned_rate_bpm"] / denom).fillna(0)
69
+
70
+ if "inventory_proppant_ton" in out.columns and "total_planned_proppant_ton" in out.columns:
71
+ denom = out["total_planned_proppant_ton"].replace(0, np.nan)
72
+ out["inventory_proppant_coverage"] = (out["inventory_proppant_ton"] / denom).fillna(0)
73
+
74
+ if "inventory_fluid_bbl" in out.columns and "total_planned_fluid_bbl" in out.columns:
75
+ denom = out["total_planned_fluid_bbl"].replace(0, np.nan)
76
+ out["inventory_fluid_coverage"] = (out["inventory_fluid_bbl"] / denom).fillna(0)
77
+
78
+ if "total_planned_fluid_bbl" in out.columns and "total_planned_proppant_ton" in out.columns:
79
+ denom = out["total_planned_proppant_ton"].replace(0, np.nan)
80
+ out["fluid_to_proppant_ratio"] = (out["total_planned_fluid_bbl"] / denom).fillna(0)
81
+
82
+ if "planned_stage_count" in out.columns and "total_planned_proppant_ton" in out.columns:
83
+ denom = out["planned_stage_count"].replace(0, np.nan)
84
+ out["proppant_per_stage_ton"] = (out["total_planned_proppant_ton"] / denom).fillna(0)
85
+
86
+ if "planned_stage_count" in out.columns and "total_planned_fluid_bbl" in out.columns:
87
+ denom = out["planned_stage_count"].replace(0, np.nan)
88
+ out["fluid_per_stage_bbl"] = (out["total_planned_fluid_bbl"] / denom).fillna(0)
89
+
90
+ if "lateral_treated_length_m" in out.columns and "planned_stage_count" in out.columns:
91
+ denom = out["planned_stage_count"].replace(0, np.nan)
92
+ out["derived_stage_length_m"] = (out["lateral_treated_length_m"] / denom).fillna(0)
93
+
94
+ return out
95
+
96
+
97
+ @asynccontextmanager
98
+ async def lifespan(app: FastAPI):
99
+ with open(ARTIFACT_DIR / "artifact_manifest.json", "r", encoding="utf-8") as f:
100
+ manifest = json.load(f)
101
+
102
+ ml_assets["manifest"] = manifest
103
+ ml_assets["uplift_model"] = joblib.load(ARTIFACT_DIR / "uplift_model.joblib")
104
+ ml_assets["cost_model"] = joblib.load(ARTIFACT_DIR / "cost_model.joblib")
105
+ ml_assets["risk_model"] = joblib.load(ARTIFACT_DIR / "risk_model.joblib")
106
+ yield
107
+ ml_assets.clear()
108
+
109
+
110
+ app = FastAPI(
111
+ title="TRIDENT Design Recommender Inference API",
112
+ version="1.0.0",
113
+ lifespan=lifespan,
114
+ )
115
+
116
+
117
+ @app.get("/health")
118
+ def health():
119
+ return {
120
+ "status": "ok",
121
+ "artifacts_loaded": bool(ml_assets),
122
+ "model_version": ml_assets.get("manifest", {}).get("artifact_version"),
123
+ }
124
+
125
+
126
+ @app.post("/predict")
127
+ def predict(req: PredictRequest):
128
+ try:
129
+ manifest = ml_assets["manifest"]
130
+ trained_feature_columns = manifest["trained_feature_columns"]
131
+ encoded_category_maps = manifest["encoded_category_maps"]
132
+ default_eval_days = manifest["default_eval_days"]
133
+ risk_penalty_usd = manifest["risk_penalty_usd"]
134
+
135
+ df = pd.DataFrame([req.features])
136
+ df = enrich_derived_features(df)
137
+
138
+ X = encode_for_inference(
139
+ df_infer=df,
140
+ trained_feature_columns=trained_feature_columns,
141
+ encoded_category_maps=encoded_category_maps,
142
+ )
143
+
144
+ uplift = float(ml_assets["uplift_model"].predict(X)[0])
145
+ cost = float(ml_assets["cost_model"].predict(X)[0])
146
+ risk = float(ml_assets["risk_model"].predict_proba(X)[0, 1])
147
+
148
+ oil_price = float(df["realized_oil_price_usd_bbl"].iloc[0]) if "realized_oil_price_usd_bbl" in df.columns else 65.0
149
+ gross_value = uplift * default_eval_days * oil_price
150
+ design_score = gross_value - cost - risk * risk_penalty_usd
151
+
152
+ response = {
153
+ "pred_uplift_bopd": uplift,
154
+ "pred_cost_usd": cost,
155
+ "pred_screenout_risk": risk,
156
+ "pred_gross_value_usd": gross_value,
157
+ "pred_design_score_usd": design_score,
158
+ }
159
+
160
+ # optional info untuk debugging
161
+ if "safe_pressure_limit_psi" in df.columns:
162
+ response["safe_pressure_limit_psi"] = float(df["safe_pressure_limit_psi"].iloc[0])
163
+ if "pressure_headroom_psi" in df.columns:
164
+ response["pressure_headroom_psi"] = float(df["pressure_headroom_psi"].iloc[0])
165
+
166
+ return response
167
+
168
+ except Exception as e:
169
+ raise HTTPException(status_code=400, detail=str(e))
artifact_manifest.json ADDED
The diff for this file is too large to render. See raw diff
 
cost_model.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:11065680bed8e4e79b1395500ab06786a7d8437e5e95c59ece0fdeca04336c1d
3
+ size 146913
example_payload.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "planned_stage_count": 18,
3
+ "avg_cluster_spacing_m": 18.5,
4
+ "avg_shots_per_cluster": 5,
5
+ "total_planned_proppant_ton": 1500.0,
6
+ "total_planned_fluid_bbl": 14000.0,
7
+ "avg_planned_rate_bpm": 28.0,
8
+ "avg_planned_max_concentration": 3.5,
9
+ "max_pump_rate_bpm": 35.0,
10
+ "max_allowable_surface_pressure_psi": 8500.0,
11
+ "casing_pressure_limit_psi": 9000.0,
12
+ "tubing_pressure_limit_psi": 8800.0,
13
+ "max_pressure_psi": 7200.0,
14
+ "inventory_proppant_ton": 1700.0,
15
+ "inventory_fluid_bbl": 16000.0
16
+ }
example_payload_wrapped.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "features": {
3
+ "planned_stage_count": 18,
4
+ "avg_cluster_spacing_m": 18.5,
5
+ "avg_shots_per_cluster": 5,
6
+ "total_planned_proppant_ton": 1500.0,
7
+ "total_planned_fluid_bbl": 14000.0,
8
+ "avg_planned_rate_bpm": 28.0,
9
+ "avg_planned_max_concentration": 3.5,
10
+ "max_pump_rate_bpm": 35.0,
11
+ "max_allowable_surface_pressure_psi": 8500.0,
12
+ "casing_pressure_limit_psi": 9000.0,
13
+ "tubing_pressure_limit_psi": 8800.0,
14
+ "max_pressure_psi": 7200.0,
15
+ "inventory_proppant_ton": 1700.0,
16
+ "inventory_fluid_bbl": 16000.0
17
+ }
18
+ }
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ fastapi>=0.115.0
2
+ uvicorn[standard]>=0.30.0
3
+ pandas>=2.0.0
4
+ numpy>=1.24.0
5
+ scikit-learn==1.6.1
6
+ joblib>=1.3.0
7
+ pydantic>=2.0.0
risk_model.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37771c5d7d6dc99c28a21bcdfa6e872ef696ec1a140c8debfc236e58d0fd4309
3
+ size 21339846
uplift_model.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44e8696de7d1142400e288c596375e20c01b7ea09f4113479c91eaccd07d40c4
3
+ size 105060417