I'm trying to run a multivariate linear regression but I'm getting an error when trying to get the coefficients of the regression model.
The error I'm getting is this: AttributeError: 'numpy.ndarray' object has no attribute 'columns'
Here's the code I'm using:
import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as seabornInstance from sklearn.model_selection import train_test_split from sklearn.linear_model import LinearRegression from sklearn import metrics %matplotlib inline # Main files dataset = pd.read_csv('namaste_econ_model.csv') dataset.shape dataset.describe() dataset.isnull().any() #Dividing data into "attributes" and "labels". X variable contains all the attributes and y variable contains labels. X = dataset[['Read?', 'x1', 'x2', 'x3', 'x4', 'x5', 'x6' , 'x7','x8','x9','x10','x11','x12','x13','x14','x15','x16','x17','x18','x19','x20','x21','x22','x23','x24','x25','x26','x27','x28','x29','x30','x31','x32','x33','x34','x35','x36','x37','x38','x39','x40','x41','x42','x43','x44','x45','x46','x47']].values y = dataset['Change in Profit (BP)'].values plt.figure(figsize=(15,10)) plt.tight_layout() seabornInstance.distplot(dataset['Change in Profit (BP)']) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0) regressor = LinearRegression() regressor.fit(X_train, y_train) coeff_df = pd.DataFrame(regressor.coef_, X.columns, columns=['Coefficient']) coeff_df Full error:
Traceback (most recent call last):
File "", line 14, in coeff_df = pd.DataFrame(regressor.coef_, X.columns, columns=['Coefficient'])
AttributeError: 'numpy.ndarray' object has no attribute 'columns'
Any help on this will be highly appreciated!