Linear Regression#

from datascience import *
from cs104 import *
import numpy as np
%matplotlib inline
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
/var/folders/md/kwd9nc_d2ns0hw9wsvdrnt2c0000gn/T/ipykernel_2065/3933483274.py in <module>
      1 from datascience import *
----> 2 from cs104 import *
      3 import numpy as np
      4 get_ipython().run_line_magic('matplotlib', 'inline')

~/other/cs104/libs/cs104-toolbox/cs104/__init__.py in <module>
      1 from .version import __version__
      2 
----> 3 from .check import *
      4 from .exceptions import *
      5 from .animation import *

~/other/cs104/libs/cs104-toolbox/cs104/check.py in <module>
     13 import abc
     14 
---> 15 from .docs import doc_tag
     16 
     17 def in_otter():

~/other/cs104/libs/cs104-toolbox/cs104/docs.py in <module>
     89         'minimize'
     90     ]),
---> 91     (datascience.Plot, [
     92         'dot',
     93         'square',

AttributeError: module 'datascience' has no attribute 'Plot'

Helper functions for visualizations#

This code in the next cell is used in the rest of the notebook to create visualizations. You do not need to look at or understand all the details of it.

def plot_line(a, b, x): 
    """
    Plots a line using the give slope and intercept over the
    range of x-values in the x array.
    """
    xlims = make_array(min(x), max(x))
    plot = Plot()
    plot.line(xlims, a * xlims + b, lw=4, clip_on=False)

def line_predictions(a, b, x): 
    """
    Computes the prediction  y_hat = a * x + b
    where a and b are the slope and intercept and x is the set of x-values.
    """
    return a * x + b

def visualize_scatter_with_line_and_residuals(table, x_label, y_label, a, b):
    """
    Draw a scatter plot, a line, and the line segments capturing the residuals 
    for some of the points.
    """
    y_hat = line_predictions(a, b, table.column(x_label))
    prediction_table = table.with_columns(y_label + '_hat', y_hat)
    plot = table.scatter(x_label, y_label, title='a = ' + str(round(a,3)) + '; b = ' + str(round(b,3)))
    xlims = make_array(min(table.column(x_label)), max(table.column(x_label)))
    plot.line(xlims, a * xlims + b, lw=4, color="C0")
    
    every10th = prediction_table.sort(x_label).take(np.arange(0, 
                                                           prediction_table.num_rows, 
                                                           prediction_table.num_rows // 10))
    for row in every10th.rows:
        x = row.item(x_label)
        y = row.item(y_label)
        plot.line([x, x], [y, a * x + b], color='r', lw=2)    
        plot.dot(x,y,color='C0',s=70)

    return plot

def mean_squared_error(table, x_label, y_label, a, b): 
    y_hat = line_predictions(a, b, table.column(x_label))
    residual = table.column(y_label) - y_hat
    return np.mean(residual**2)

def visualize_scatter_with_line_and_mse(table, x_label, y_label, a, b):
    plot = visualize_scatter_with_line_and_residuals(table, x_label, y_label, a, b)
    mse = mean_squared_error(x_y_table, x_label, y_label, a, b)
    plot.set_title('a = ' + str(round(a,3)) + 
                '; b = ' + str(round(b,3)) + 
                '\n mse = ' + str(round(mse,3)))

    
## MSE HEAT

def plot_regression_line_and_mse_heat(table, x_label, y_label, a, b, show_mse='2d', fig=None):
        """
        Left plot: the scatter plot with line y=ax+b
        Right plot: None, 2D heat map of MSE, or 3D surface plot of MSE 
        """

        a_space = np.linspace(-0.5, 0.5, 200)
        b_space = np.linspace(-35, -8, 200)
        a_space, b_space = np.meshgrid(a_space, b_space)
        broadcasted = np.broadcast(a_space, b_space)
        mses = np.empty(broadcasted.shape)
        mses.flat = [mean_squared_error(table, 'x', 'y', a, b) for (a,b) in broadcasted]
        
        if fig is None:
            fig = Figure(1,2)

        ax = fig.axes()

        #Plot the scatter plot and best fit line on the left
        with Plot(ax[0]):
            plot_scatter_with_line(table, x_label, y_label, a, b)

        if show_mse == '2d':
            with Plot(ax[1]):
                mse = mean_squared_error(table, x_label, y_label, a, b)
                ax[1].pcolormesh(a_space, b_space, mses, cmap='viridis', 
                                 norm=colors.PowerNorm(gamma=0.25,
                                                       vmin=mses.min(), 
                                                       vmax=mses.max()));
                ax[1].scatter(a,b,s=100,color='red');
                ax[1].set_xlabel('a')
                ax[1].set_ylabel('b')
                ax[1].set_title('Mean Squared Error: ' + str(np.round(mse, 3)))

        elif show_mse == "3d": 
            ax[1] = plots.subplot(1, 2, 2, projection='3d')
            with Plot(ax[1]):
                ax[1].plot_surface(a_space, b_space, mses,
                                cmap='viridis', 
                                antialiased=False, 
                                linewidth=0,
                                norm = colors.PowerNorm(gamma=0.25,vmin=mses.min(), vmax=mses.max()))

                ax[1].plot([a],[b],[mean_squared_error(table, x_label, y_label, a, b)], 'ro',zorder=3);
                ax[1].set_xlabel('a')
                ax[1].set_ylabel('b')
                ax[1].set_title('Mean Squared Error')      

def visualize_regression_line_and_mse_heat(table, x_label, y_label, show_mse=None):
    
    a_space = np.linspace(-0.5, 0.5, 200)
    b_space = np.linspace(-35, -8, 200)
    a_space, b_space = np.meshgrid(a_space, b_space)
    broadcasted = np.broadcast(a_space, b_space)
    mses = np.empty(broadcasted.shape)
    mses.flat = [mean_squared_error(table, 'x', 'y', a, b) for (a,b) in broadcasted]
    
    def visualize_regression_line_and_mse_heat_helper(a, b, show_mse=show_mse, fig=None):
        """
        Left plot: the scatter plot with line y=ax+b
        Right plot: None, 2D heat map of MSE, or 3D surface plot of MSE 
        """

        if fig is None:
            fig = Figure(1,2)

        ax = fig.axes()

        #Plot the scatter plot and best fit line on the left
        with Plot(ax[0]):
            plot_scatter_with_line(table, x_label, y_label, a, b)

        if show_mse == '2d':
            with Plot(ax[1]):
                mse = mean_squared_error(table, x_label, y_label, a, b)
                ax[1].pcolormesh(a_space, b_space, mses, cmap='viridis', 
                                 norm=colors.PowerNorm(gamma=0.25,
                                                       vmin=mses.min(), 
                                                       vmax=mses.max()));
                ax[1].scatter(a,b,s=100,color='red');
                ax[1].set_xlabel('a')
                ax[1].set_ylabel('b')
                ax[1].set_title('Mean Squared Error: ' + str(np.round(mse, 3)))

        elif show_mse == "3d": 
            ax[1] = plots.subplot(1, 2, 2, projection='3d')
            with Plot(ax[1]):
                ax[1].plot_surface(a_space, b_space, mses,
                                cmap='viridis', 
                                antialiased=False, 
                                linewidth=0,
                                norm = colors.PowerNorm(gamma=0.25,vmin=mses.min(), vmax=mses.max()))

                ax[1].plot([a],[b],[mean_squared_error(table, x_label, y_label, a, b)], 'ro',zorder=3);
                ax[1].set_xlabel('a')
                ax[1].set_ylabel('b')
                ax[1].set_title('Mean Squared Error')
            
    interact(visualize_regression_line_and_mse_heat_helper, 
                a = Slider(-0.5,0.5,0.01),
                b = Slider(-35,-8,0.1),
                fig = Fixed(None),
                show_mse=Choice([None, "2d", "3d"]))            

1. Lines on scatter plots#

#load data from the lecture 7 and clean it up 
greenland_climate = Table().read_table('data/climate_upernavik.csv')
greenland_climate = greenland_climate.relabeled('Precipitation (millimeters)', 
                                                "Precip (mm)")
tidy_greenland = greenland_climate.where('Air temperature (C)', 
                                         are.not_equal_to(999.99))
tidy_greenland = tidy_greenland.where('Sea level pressure (mbar)', 
                                      are.not_equal_to(9999.9))
feb = tidy_greenland.where('Month', are.equal_to(2))
feb.show(3)
Year Month Air temperature (C) Sea level pressure (mbar) Precip (mm)
1875 2 -19.7 1005.5 63
1876 2 -21.2 1012.3 68
1877 2 -26.5 1008.2 26

... (102 rows omitted)

plot = feb.scatter('Year', 'Air temperature (C)', fit_line=True)
plot.set_title('Februrary in Upernavik, Greenland')
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
/var/folders/md/kwd9nc_d2ns0hw9wsvdrnt2c0000gn/T/ipykernel_2065/1093984260.py in <module>
      1 plot = feb.scatter('Year', 'Air temperature (C)', fit_line=True)
----> 2 plot.set_title('Februrary in Upernavik, Greenland')

AttributeError: 'NoneType' object has no attribute 'set_title'

Let’s change the x-axis to be Years since 1880 so that it’s easier to calculate slope.

x = feb.column('Year') - 1880 
y = feb.column('Air temperature (C)')
x_y_table = Table().with_columns("x", x, "y", y)
plot = x_y_table.scatter("x", "y", fit_line=True)
plot.set_xlabel("Years since 1880")
plot.set_ylabel("Air temperature (C)");
plot.set_title('Februrary in Upernavik, Greenland');
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
/var/folders/md/kwd9nc_d2ns0hw9wsvdrnt2c0000gn/T/ipykernel_2065/3180166664.py in <module>
      1 plot = x_y_table.scatter("x", "y", fit_line=True)
----> 2 plot.set_xlabel("Years since 1880")
      3 plot.set_ylabel("Air temperature (C)");
      4 plot.set_title('Februrary in Upernavik, Greenland');

AttributeError: 'NoneType' object has no attribute 'set_xlabel'
# Manually put in a, b 
rise = -19+24
run = 120 
a = rise/run
b = -24 
print("a=", a, " , b=", b)
a= 0.041666666666666664  , b= -24

Two lines with different slopes and intercepts:

with Figure(1,2):
    plot_scatter_with_line(x_y_table, 'x', 'y', a, b)
    plot_scatter_with_line(x_y_table, 'x', 'y', 0.06, -20)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
/var/folders/md/kwd9nc_d2ns0hw9wsvdrnt2c0000gn/T/ipykernel_2065/2726873612.py in <module>
----> 1 with Figure(1,2):
      2     plot_scatter_with_line(x_y_table, 'x', 'y', a, b)
      3     plot_scatter_with_line(x_y_table, 'x', 'y', 0.06, -20)

NameError: name 'Figure' is not defined

The following cell contains an interactive visualization. You won’t see the visualization on this web page, but you can view and interact with it if you run this notebook on our server here.

interact(plot_scatter_with_line, 
         table = Fixed(x_y_table),
         x_label = Fixed('x'),
         y_label = Fixed('y'),
         a = Slider(-1,1,0.01),
         b = Slider(-35,-8,0.1))

Here is an animation showing lines of different slope and intercept.

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
/var/folders/md/kwd9nc_d2ns0hw9wsvdrnt2c0000gn/T/ipykernel_2065/2945069226.py in <module>
     20         yield locals()
     21 
---> 22 animate(partial(plot_scatter_with_line, x_y_table, 'x', 'y'), gen, default_mode="reflect")

NameError: name 'animate' is not defined

2. Predict and evaluate#

def line_predictions(a, b, x): 
    """
    Computes the prediction  y_hat = a * x + b
    where a and b are the slope and intercept and x is the set of x-values.
    """
    return a * x + b
y_hat = line_predictions(a, b, x_y_table.column("x"))
prediction_table = x_y_table.with_columns("y_hat", y_hat)
prediction_table.take(np.arange(10, 15))
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
/var/folders/md/kwd9nc_d2ns0hw9wsvdrnt2c0000gn/T/ipykernel_2065/1722845482.py in <module>
----> 1 prediction_table.take(np.arange(10, 15))

NameError: name 'np' is not defined

A “residual” is y - y_hat. In other words, a “residual” is the true y minus the prediction made by our line, y_hat.

residuals = prediction_table.column("y") - prediction_table.column("y_hat")
prediction_table = prediction_table.with_columns('residual', residuals)
prediction_table.show(3)
x y y_hat residual
-5 -19.7 -24.2083 4.50833
-4 -21.2 -24.1667 2.96667
-3 -26.5 -24.125 -2.375

... (102 rows omitted)

The following cell contains an interactive visualization. You won’t see the visualization on this web page, but you can view and interact with it if you run this notebook on our server here.

interact(visualize_scatter_with_line_and_residuals, 
        table = Fixed(x_y_table),
        x_label = Fixed('x'),
        y_label = Fixed('y'),                     
        a = Slider(-1,1,0.01),
        b = Slider(-35,-8,0.1))

Here, mean squared error (MSE) first squares the residuals and then takes the mean.

Since we want our predictions (y_hat) to be closer to the observed values y a smaller MSE value is better.

np.mean(residuals**2)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
/var/folders/md/kwd9nc_d2ns0hw9wsvdrnt2c0000gn/T/ipykernel_2065/2681690236.py in <module>
----> 1 np.mean(residuals**2)

NameError: name 'np' is not defined

Let’s write a function that computes the MSE for any table with data and line characterized by a and b.

def mean_squared_error(table, x_label, y_label, a, b): 
    y_hat = line_predictions(a, b, table.column(x_label))
    residual = table.column(y_label) - y_hat
    return np.mean(residual**2)
mean_squared_error(x_y_table, 'x', 'y', a, b)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
/var/folders/md/kwd9nc_d2ns0hw9wsvdrnt2c0000gn/T/ipykernel_2065/2210417025.py in <module>
----> 1 mean_squared_error(x_y_table, 'x', 'y', a, b)

/var/folders/md/kwd9nc_d2ns0hw9wsvdrnt2c0000gn/T/ipykernel_2065/225208544.py in mean_squared_error(table, x_label, y_label, a, b)
      2     y_hat = line_predictions(a, b, table.column(x_label))
      3     residual = table.column(y_label) - y_hat
----> 4     return np.mean(residual**2)

NameError: name 'np' is not defined
mean_squared_error(x_y_table, 'x', 'y', 0.04, -24)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
/var/folders/md/kwd9nc_d2ns0hw9wsvdrnt2c0000gn/T/ipykernel_2065/1833269796.py in <module>
----> 1 mean_squared_error(x_y_table, 'x', 'y', 0.04, -24)

/var/folders/md/kwd9nc_d2ns0hw9wsvdrnt2c0000gn/T/ipykernel_2065/225208544.py in mean_squared_error(table, x_label, y_label, a, b)
      2     y_hat = line_predictions(a, b, table.column(x_label))
      3     residual = table.column(y_label) - y_hat
----> 4     return np.mean(residual**2)

NameError: name 'np' is not defined
mean_squared_error(x_y_table, 'x', 'y', 0.02, -20)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
/var/folders/md/kwd9nc_d2ns0hw9wsvdrnt2c0000gn/T/ipykernel_2065/2742332074.py in <module>
----> 1 mean_squared_error(x_y_table, 'x', 'y', 0.02, -20)

/var/folders/md/kwd9nc_d2ns0hw9wsvdrnt2c0000gn/T/ipykernel_2065/225208544.py in mean_squared_error(table, x_label, y_label, a, b)
      2     y_hat = line_predictions(a, b, table.column(x_label))
      3     residual = table.column(y_label) - y_hat
----> 4     return np.mean(residual**2)

NameError: name 'np' is not defined

The following cell contains an interactive visualization. You won’t see the visualization on this web page, but you can view and interact with it if you run this notebook on our server here.

interact(visualize_scatter_with_line_and_mse, 
        table = Fixed(x_y_table),
        x_label = Fixed('x'),
        y_label = Fixed('y'),                     
        a = Slider(-1,1,0.01),
        b = Slider(-35,-8,0.1))
with Figure(1,3):
    visualize_scatter_with_line_and_mse(x_y_table, 'x', 'y', a, b)
    visualize_scatter_with_line_and_mse(x_y_table, 'x', 'y', 0.04, -24)
    visualize_scatter_with_line_and_mse(x_y_table, 'x', 'y', 0.02, -20)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
/var/folders/md/kwd9nc_d2ns0hw9wsvdrnt2c0000gn/T/ipykernel_2065/4228871256.py in <module>
----> 1 with Figure(1,3):
      2     visualize_scatter_with_line_and_mse(x_y_table, 'x', 'y', a, b)
      3     visualize_scatter_with_line_and_mse(x_y_table, 'x', 'y', 0.04, -24)
      4     visualize_scatter_with_line_and_mse(x_y_table, 'x', 'y', 0.02, -20)

NameError: name 'Figure' is not defined

3. Fit the Best Line Manually#

The following cell contains an interactive visualization. You won’t see the visualization on this web page, but you can view and interact with it if you run this notebook on our server here.

visualize_regression_line_and_mse_heat(x_y_table, 'x', 'y')

Here is an animation showing how to manually fit a line using MSE.

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
/var/folders/md/kwd9nc_d2ns0hw9wsvdrnt2c0000gn/T/ipykernel_2065/1397094297.py in <module>
    103     animate(visualize_regression_line_and_mse_heat_helper, gen, fig=_fig, interval=200, default_mode='loop', show_params=False)
    104 
--> 105 animate_regression_line_and_mse_heat(x_y_table, 'x', 'y')

/var/folders/md/kwd9nc_d2ns0hw9wsvdrnt2c0000gn/T/ipykernel_2065/1397094297.py in animate_regression_line_and_mse_heat(table, x_label, y_label, show_mse)
      1 def animate_regression_line_and_mse_heat(table, x_label, y_label, show_mse=None):
      2 
----> 3     _fig=Figure(1,2, figsize=(5,5))
      4 
      5     a_space = np.linspace(-0.5, 0.5, 200)

NameError: name 'Figure' is not defined

4. Fit the Best Line with Brute Force#

# Brute force loop to find a and b 
lowest_mse = np.inf
a_for_lowest = 0
b_for_lowest = 0

for a in np.arange(-0.5, 0.5, 0.001): 
    for b in np.arange(-35, -10, 1): 
        mse = mean_squared_error(x_y_table, 'x', 'y', a, b)
        if mse < lowest_mse: 
            lowest_mse = mse
            a_for_lowest = a
            b_for_lowest = b 
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
/var/folders/md/kwd9nc_d2ns0hw9wsvdrnt2c0000gn/T/ipykernel_2065/2156846840.py in <module>
      1 # Brute force loop to find a and b
----> 2 lowest_mse = np.inf
      3 a_for_lowest = 0
      4 b_for_lowest = 0
      5 

NameError: name 'np' is not defined
lowest_mse, a_for_lowest, b_for_lowest
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
/var/folders/md/kwd9nc_d2ns0hw9wsvdrnt2c0000gn/T/ipykernel_2065/1505149944.py in <module>
----> 1 lowest_mse, a_for_lowest, b_for_lowest

NameError: name 'lowest_mse' is not defined
# Looks pretty good when we plot it! 
plot_regression_line_and_mse_heat(x_y_table, 'x', 'y', a_for_lowest, b_for_lowest)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
/var/folders/md/kwd9nc_d2ns0hw9wsvdrnt2c0000gn/T/ipykernel_2065/3851309000.py in <module>
      1 # Looks pretty good when we plot it!
----> 2 plot_regression_line_and_mse_heat(x_y_table, 'x', 'y', a_for_lowest, b_for_lowest)

NameError: name 'a_for_lowest' is not defined

5. Fit the Best Line with minimize()#

This is really an optimization problem for a curve in 3d.

plot_regression_line_and_mse_heat(x_y_table, 
                                  'x', 'y', 
                                  a_for_lowest, b_for_lowest, 
                                  show_mse='3d')
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
/var/folders/md/kwd9nc_d2ns0hw9wsvdrnt2c0000gn/T/ipykernel_2065/2975835437.py in <module>
      1 plot_regression_line_and_mse_heat(x_y_table, 
      2                                   'x', 'y',
----> 3                                   a_for_lowest, b_for_lowest,
      4                                   show_mse='3d')

NameError: name 'a_for_lowest' is not defined

The following cell contains an interactive visualization. You won’t see the visualization on this web page, but you can view and interact with it if you run this notebook on our server here.

visualize_regression_line_and_mse_heat(x_y_table, 'x', 'y', show_mse='3d')
def linear_regression(table, x_label, y_label):
    """
    Return an array containing the slope and intercept of the line best fitting 
    the table's data according to the mean square error loss function.
    """
    
    # A helper function that takes *only* the two variables we need to optimize.
    # This is necessary to use minimize below, because the function we want
    # to minimize cannot take any parameters beyond those it will solve for.
    def mse_for_a_b(a, b):
        return mean_squared_error(table, x_label, y_label, a, b)
    
    # We can use a built-in function minimize that uses gradient descent
    # to find the optimal a and b for our mean squared error function     
    return minimize(mse_for_a_b)

optimal = linear_regression(x_y_table, 'x', 'y')
a_optimal = optimal.item(0)
b_optimal = optimal.item(1)
plot_regression_line_and_mse_heat(x_y_table, 'x', 'y', a_optimal, b_optimal)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
/var/folders/md/kwd9nc_d2ns0hw9wsvdrnt2c0000gn/T/ipykernel_2065/168394026.py in <module>
     15     return minimize(mse_for_a_b)
     16 
---> 17 optimal = linear_regression(x_y_table, 'x', 'y')
     18 a_optimal = optimal.item(0)
     19 b_optimal = optimal.item(1)

/var/folders/md/kwd9nc_d2ns0hw9wsvdrnt2c0000gn/T/ipykernel_2065/168394026.py in linear_regression(table, x_label, y_label)
     13     # We can use a built-in function minimize that uses gradient descent
     14     # to find the optimal a and b for our mean squared error function
---> 15     return minimize(mse_for_a_b)
     16 
     17 optimal = linear_regression(x_y_table, 'x', 'y')

/usr/local/lib/python3.9/site-packages/datascience/util.py in minimize(f, start, smooth, log, array, **vargs)
    241     if not smooth and 'method' not in vargs:
    242         vargs['method'] = 'Powell'
--> 243     result = optimize.minimize(objective, start, **vargs)
    244     if log is not None:
    245         log(result)

/usr/local/lib/python3.9/site-packages/scipy/optimize/_minimize.py in minimize(fun, x0, args, method, jac, hess, hessp, bounds, constraints, tol, callback, options)
    612                                     **options)
    613     elif meth == 'powell':
--> 614         return _minimize_powell(fun, x0, args, callback, bounds, **options)
    615     elif meth == 'cg':
    616         return _minimize_cg(fun, x0, args, jac, callback, **options)

/usr/local/lib/python3.9/site-packages/scipy/optimize/optimize.py in _minimize_powell(func, x0, args, callback, bounds, xtol, ftol, maxiter, maxfev, disp, direc, return_all, **unknown_options)
   3018                           OptimizeWarning, 3)
   3019 
-> 3020     fval = squeeze(func(x))
   3021     x1 = x.copy()
   3022     iter = 0

/usr/local/lib/python3.9/site-packages/scipy/optimize/optimize.py in function_wrapper(x, *wrapper_args)
    462     def function_wrapper(x, *wrapper_args):
    463         ncalls[0] += 1
--> 464         return function(np.copy(x), *(wrapper_args + args))
    465 
    466     return ncalls, function_wrapper

/usr/local/lib/python3.9/site-packages/datascience/util.py in objective(args)
    237         @functools.wraps(f)
    238         def objective(args):
--> 239             return f(*args)
    240 
    241     if not smooth and 'method' not in vargs:

/var/folders/md/kwd9nc_d2ns0hw9wsvdrnt2c0000gn/T/ipykernel_2065/168394026.py in mse_for_a_b(a, b)
      9     # to minimize cannot take any parameters beyond those it will solve for.
     10     def mse_for_a_b(a, b):
---> 11         return mean_squared_error(table, x_label, y_label, a, b)
     12 
     13     # We can use a built-in function minimize that uses gradient descent

/var/folders/md/kwd9nc_d2ns0hw9wsvdrnt2c0000gn/T/ipykernel_2065/225208544.py in mean_squared_error(table, x_label, y_label, a, b)
      2     y_hat = line_predictions(a, b, table.column(x_label))
      3     residual = table.column(y_label) - y_hat
----> 4     return np.mean(residual**2)

NameError: name 'np' is not defined