2  Dating dataset

The data file can be downloaded from here.

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

df = pd.read_csv('assests/datasets/datingTestSet2.txt', sep='\t', header=None)
X = np.array(df[[0, 1, 2]])
y = np.array(df[3])
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15)