Simple Pipeline
[1]:
import pandas as pd
import numpy as np
from skpipes.pipeline import SkPipeline
[2]:
data = [{"x1": 1, "x2": 400, "x3": np.nan},
{"x1": 4.8, "x2": 250, "x3": 50},
{"x1": 3, "x2": 140, "x3": 43},
{"x1": 1.4, "x2": 357, "x3": 75},
{"x1": 2.4, "x2": np.nan, "x3": 42},
{"x1": 4, "x2": 287, "x3": 21}]
df = pd.DataFrame(data)
df.head()
[2]:
x1 | x2 | x3 | |
---|---|---|---|
0 | 1.0 | 400.0 | NaN |
1 | 4.8 | 250.0 | 50.0 |
2 | 3.0 | 140.0 | 43.0 |
3 | 1.4 | 357.0 | 75.0 |
4 | 2.4 | NaN | 42.0 |
[3]:
pipe = SkPipeline(name='median_imputer-minmax',
data_type="numerical")
pipe.steps
[3]:
[('imputer', SimpleImputer(strategy='median')), ('scaler', MinMaxScaler())]
[4]:
str(pipe)
[4]:
'Median imputer with MinMax Scaler'
[5]:
pipe.fit(df)
[5]:
SkPipeline(data_type='numerical', name='median_imputer-minmax')
[6]:
pipe.transform(df)
[6]:
array([[0. , 1. , 0.40740741],
[1. , 0.42307692, 0.53703704],
[0.52631579, 0. , 0.40740741],
[0.10526316, 0.83461538, 1. ],
[0.36842105, 0.56538462, 0.38888889],
[0.78947368, 0.56538462, 0. ]])
[7]:
pipe.fit_transform(df)
[7]:
array([[0. , 1. , 0.40740741],
[1. , 0.42307692, 0.53703704],
[0.52631579, 0. , 0.40740741],
[0.10526316, 0.83461538, 1. ],
[0.36842105, 0.56538462, 0.38888889],
[0.78947368, 0.56538462, 0. ]])