Simple Pipeline

[1]:
import pandas as pd
import numpy as np
from skpipes.pipeline import SkPipeline
[2]:
data = [{"x1": 1, "x2": 400, "x3": np.nan},
        {"x1": 4.8, "x2": 250, "x3": 50},
        {"x1": 3, "x2": 140, "x3": 43},
        {"x1": 1.4, "x2": 357, "x3": 75},
        {"x1": 2.4, "x2": np.nan, "x3": 42},
        {"x1": 4, "x2": 287, "x3": 21}]

df = pd.DataFrame(data)
df.head()
[2]:
x1 x2 x3
0 1.0 400.0 NaN
1 4.8 250.0 50.0
2 3.0 140.0 43.0
3 1.4 357.0 75.0
4 2.4 NaN 42.0
[3]:
pipe = SkPipeline(name='median_imputer-minmax',
                  data_type="numerical")
pipe.steps
[3]:
[('imputer', SimpleImputer(strategy='median')), ('scaler', MinMaxScaler())]
[4]:
str(pipe)
[4]:
'Median imputer with MinMax Scaler'
[5]:
pipe.fit(df)
[5]:
SkPipeline(data_type='numerical', name='median_imputer-minmax')
[6]:
pipe.transform(df)

[6]:
array([[0.        , 1.        , 0.40740741],
       [1.        , 0.42307692, 0.53703704],
       [0.52631579, 0.        , 0.40740741],
       [0.10526316, 0.83461538, 1.        ],
       [0.36842105, 0.56538462, 0.38888889],
       [0.78947368, 0.56538462, 0.        ]])
[7]:
pipe.fit_transform(df)
[7]:
array([[0.        , 1.        , 0.40740741],
       [1.        , 0.42307692, 0.53703704],
       [0.52631579, 0.        , 0.40740741],
       [0.10526316, 0.83461538, 1.        ],
       [0.36842105, 0.56538462, 0.38888889],
       [0.78947368, 0.56538462, 0.        ]])