X, y = make_blobs(n_samples=100, centers=3, random_state=12, cluster_std=5) # make synthetic data
X_train_toy, X_test_toy, y_train_toy, y_test_toy = train_test_split(
X, y, random_state=5, test_size=0.4) # split it into training and test sets
# Visualize the training data
plt.scatter(X_train_toy[:, 0], X_train_toy[:, 1], label="Training set", s=60)
plt.scatter(
X_test_toy[:, 0], X_test_toy[:, 1], color=mglearn.cm2(1), label="Test set", s=60
)
plt.legend(loc="upper right")