Posted on:

19 Jun 2024

0

feature name error despite creating dataframes

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()

from sklearn.linear_model import LinearRegression


# ## Import the relevant libraries

# ## Load the data

data = pd.read_csv(r'Feature Selection through standardization Dataset\1.02. Multiple linear regression.csv')
data.head()


x = data[['SAT','Rand 1,2,3']]
y = data['GPA']


new_data = pd.DataFrame(data=[[1700,2],[1800,1]],columns=['SAT','Rand 1,2,3'])
new_data

new_data_scaled = scaler.transform(new_data)
new_data_scaled_df = pd.DataFrame(new_data_scaled,columns=['SAT','Rand 1,2,3'])



# ## What if we removed the 'Random 1,2,3' variable?



x_2 = data[['SAT']]
scaler.fit(x_2)
x_2scaled = scaler.transform(x_2)


# Create a DataFrame with scaled features to maintain feature names
x_2scaled_df = pd.DataFrame(x_2scaled, columns=x_2.columns)


reg_2 = LinearRegression()
reg_2.fit(x_2scaled_df,y)

reg_2.coef_

reg_2.intercept_


reg_2summary = pd.DataFrame([['Bias'],['SAT']],columns=['Features'])
reg_2summary['Weights'] = reg_2.intercept_, reg_2.coef_[0]
reg_2summary


reg_2.predict( new_data_scaled_df[['SAT']].values.reshape(-1, 1))

for this last line of code above i still get the error about feature names,even though my new_data_scaled_df and reg_2 are both DataFrames with feature names??



0 answers ( 0 marked as helpful)

Submit an answer