Midterm Review#

import numpy as np
from datascience import *
from cs104 import *
%matplotlib inline

Converting between strings and ints.#

# this is an error: can add a string and an int
"hello" + 5
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[3], line 2
      1 # this is an error: can add a string and an int
----> 2 "hello" + 5

TypeError: can only concatenate str (not "int") to str
# but we can convert an integer to a string with `str`:
"hello" + str(5)
'hello5'
# and vice-versa.
int("55") + 5
60

Simulation and Loops#

This code is based on the sample question.

def within(x, y, z):
    """Returns true if z is within x distance of y."""
    return abs(z - y) < x
within(5, 0, 4)
True
within(5, 0, -3)
True
within(5, 0, -5)
False

A simulation loop, where we run a given number of trials.

num_trials = 10000
count = 0

for i in np.arange(0, num_trials):
    five_rolls = np.random.choice(dice, 5)
    ave = np.mean(five_rolls)
    if within(0.5, 3.5, ave):
        count = count + 1
        
print(count / num_trials)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[11], line 5
      2 count = 0
      4 for i in np.arange(0, num_trials):
----> 5     five_rolls = np.random.choice(dice, 5)
      6     ave = np.mean(five_rolls)
      7     if within(0.5, 3.5, ave):

NameError: name 'dice' is not defined

Another loop, where we count the number of positive values in an array

def stay_positive(values):
    count = 0
    for value in values:
        if value > 0:
            count = count + 1
    return count
stay_positive(make_array(1, -1, -2, 3, 4, 0))
3

A variation where we sum the positive values in an array

def get_positives(values):
    count = 0
    for value in values:
        if value > 0:
            count = count + value
    return count
get_positives(make_array(1, -1, -2, 3, 4, 0))
8

Another variation where we return an array of the positive values.

def all_positives(values):
    positives = make_array()
    for value in values:
        if value > 0:
            positives = np.append(positives, value)
    return positives
all_positives(make_array(1, -1, -2, 3, 4, 0))
array([1., 3., 4.])
## Group and Pivot
cones = Table().read_table("data/cones.csv")
cones.show()
Flavor Color Price
strawberry pink 3.55
chocolate light brown 4.75
chocolate dark brown 5.25
strawberry pink 5.25
chocolate dark brown 5.25
purple cow purple 10.75
# Count the number of occurrences of each color
cones.group("Color")
Color count
dark brown 2
light brown 1
pink 2
purple 1
# Sum up the numerical columns for each color
cones.group("Color", sum)
Color Flavor sum Price sum
dark brown 10.5
light brown 4.75
pink 8.8
purple 10.75

Pivot also groups, this time by 2 categorical variables

cones.pivot('Color', 'Flavor')
Flavor dark brown light brown pink purple
chocolate 2 1 0 0
purple cow 0 0 0 1
strawberry 0 0 2 0

We can count, as above, or aggregate a numerical column, like this:

cones.pivot('Color', 'Flavor', "Price", max)
Flavor dark brown light brown pink purple
chocolate 5.25 4.75 0 0
purple cow 0 0 0 10.75
strawberry 0 0 5.25 0