Midterm Review
Contents
Midterm Review#
import numpy as np
from datascience import *
from cs104 import *
%matplotlib inline
Converting between strings and ints.#
# this is an error: can add a string and an int
"hello" + 5
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
Cell In[3], line 2
1 # this is an error: can add a string and an int
----> 2 "hello" + 5
TypeError: can only concatenate str (not "int") to str
# but we can convert an integer to a string with `str`:
"hello" + str(5)
'hello5'
# and vice-versa.
int("55") + 5
60
Simulation and Loops#
This code is based on the sample question.
def within(x, y, z):
"""Returns true if z is within x distance of y."""
return abs(z - y) < x
within(5, 0, 4)
True
within(5, 0, -3)
True
within(5, 0, -5)
False
A simulation loop, where we run a given number of trials.
num_trials = 10000
count = 0
for i in np.arange(0, num_trials):
five_rolls = np.random.choice(dice, 5)
ave = np.mean(five_rolls)
if within(0.5, 3.5, ave):
count = count + 1
print(count / num_trials)
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[11], line 5
2 count = 0
4 for i in np.arange(0, num_trials):
----> 5 five_rolls = np.random.choice(dice, 5)
6 ave = np.mean(five_rolls)
7 if within(0.5, 3.5, ave):
NameError: name 'dice' is not defined
Another loop, where we count the number of positive values in an array
def stay_positive(values):
count = 0
for value in values:
if value > 0:
count = count + 1
return count
stay_positive(make_array(1, -1, -2, 3, 4, 0))
3
A variation where we sum the positive values in an array
def get_positives(values):
count = 0
for value in values:
if value > 0:
count = count + value
return count
get_positives(make_array(1, -1, -2, 3, 4, 0))
8
Another variation where we return an array of the positive values.
def all_positives(values):
positives = make_array()
for value in values:
if value > 0:
positives = np.append(positives, value)
return positives
all_positives(make_array(1, -1, -2, 3, 4, 0))
array([1., 3., 4.])
## Group and Pivot
cones = Table().read_table("data/cones.csv")
cones.show()
Flavor | Color | Price |
---|---|---|
strawberry | pink | 3.55 |
chocolate | light brown | 4.75 |
chocolate | dark brown | 5.25 |
strawberry | pink | 5.25 |
chocolate | dark brown | 5.25 |
purple cow | purple | 10.75 |
# Count the number of occurrences of each color
cones.group("Color")
Color | count |
---|---|
dark brown | 2 |
light brown | 1 |
pink | 2 |
purple | 1 |
# Sum up the numerical columns for each color
cones.group("Color", sum)
Color | Flavor sum | Price sum |
---|---|---|
dark brown | 10.5 | |
light brown | 4.75 | |
pink | 8.8 | |
purple | 10.75 |
Pivot also groups, this time by 2 categorical variables
cones.pivot('Color', 'Flavor')
Flavor | dark brown | light brown | pink | purple |
---|---|---|---|---|
chocolate | 2 | 1 | 0 | 0 |
purple cow | 0 | 0 | 0 | 1 |
strawberry | 0 | 0 | 2 | 0 |
We can count, as above, or aggregate a numerical column, like this:
cones.pivot('Color', 'Flavor', "Price", max)
Flavor | dark brown | light brown | pink | purple |
---|---|---|---|---|
chocolate | 5.25 | 4.75 | 0 | 0 |
purple cow | 0 | 0 | 0 | 10.75 |
strawberry | 0 | 0 | 5.25 | 0 |