Conditionals and Loops#

from datascience import *
from cs104 import *
import numpy as np
%matplotlib inline

Booleans and Comparison Operators#

3 > 1
True
type(3 > 1)
bool
3 = 3
  Cell In[4], line 1
    3 = 3
    ^
SyntaxError: cannot assign to literal
3 == 3
True
x = 5
y = 12
x == 7
False
y - x
7
4 < y - x <= 6
False
4 < y - x
True
y - x <= 6
False
True and False
False
True or False
True
True and True
True

Monopoly#

https://i5.walmartimages.com/asr/5390b2cd-b17a-4dad-8889-a8a28b779950.e12432db94df80244efc837d9842006c.jpeg
monopoly = Table().read_table("data/monopoly.csv")
monopoly
Name Space Color Position Price PriceBuild Rent RentBuild1 RentBuild2 RentBuild3 RentBuild4 RentBuild5 Number
Go Go nan 0 0 0 0 0 0 0 0 0 0
Mediterranean Avenue Street Brown 1 60 50 2 10 30 90 160 250 2
Community Chest Chest nan 2 0 0 0 0 0 0 0 0 0
Baltic Avenue Street Brown 3 60 50 4 20 60 180 320 450 2
Income Tax Tax nan 4 200 0 200 0 0 0 0 0 0
Reading Railroad Railroad nan 5 200 0 25 0 0 0 0 0 0
Oriental Avenue Street LightBlue 6 100 50 6 30 90 270 400 550 3
Chance Chance nan 7 0 0 0 0 0 0 0 0 0
Vermont Avenue Street LightBlue 8 100 50 6 30 90 270 400 550 3
Connecticut Avenue Street LightBlue 9 120 50 8 40 100 300 450 600 3

... (30 rows omitted)

tiny_monopoly = monopoly.where('Color', are.not_equal_to('None'))
tiny_monopoly = tiny_monopoly.where('Space', are.containing('Street'))
tiny_monopoly = tiny_monopoly.select('Name', 'Color', 'Price')
tiny_monopoly = tiny_monopoly.sort('Name')  
tiny_monopoly.show()
Name Color Price
Atlantic Avenue Yellow 260
Baltic Avenue Brown 60
Boardwalk Blue 400
Connecticut Avenue LightBlue 120
Illinois Avenue Red 240
Indiana Avenue Red 220
Kentucky Avenue Red 220
Marvin Gardens Yellow 280
Mediterranean Avenue Brown 60
New York Avenue Orange 200
North Carolina Avenue Green 300
Oriental Avenue LightBlue 100
Pacific Avenue Green 300
Park Place Blue 350
Pennsylvania Avenue Green 320
St. Charles Place Pink 140
St. James Place Orange 180
States Avenue Pink 140
Tennessee Avenue Orange 180
Ventnor Avenue Yellow 260
Vermont Avenue LightBlue 100
Virginia Avenue Pink 160

Suppose we only have 220 dollars. How many properties could we buy for exactly 220 dollars?

price = tiny_monopoly.column("Price")
price
array([260,  60, 400, 120, 240, 220, 220, 280,  60, 200, 300, 100, 300,
       350, 320, 140, 180, 140, 180, 260, 100, 160])
price == 220
array([False, False, False, False, False,  True,  True, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False])
sum(price==220)
2
np.count_nonzero(price == 220)
2

How many properties could we buy for less than or equal to 200 dollars?

price
array([260,  60, 400, 120, 240, 220, 220, 280,  60, 200, 300, 100, 300,
       350, 320, 140, 180, 140, 180, 260, 100, 160])
price <= 200
array([False,  True, False,  True, False, False, False, False,  True,
        True, False,  True, False, False, False,  True,  True,  True,
        True, False,  True,  True])
np.count_nonzero(price <= 200)
11

How many of the Monopoly spaces are light blue?

sum(monopoly.column("Color") == "LightBlue")
3

Conditional Statements#

def price_rating(price):
    if price < 100:
        print("Inexpensive")
        
price_rating(50)
Inexpensive
price_rating(500)
def price_rating(price):
    if price < 100:
        print("Inexpensive")
    else:
        print("Expensive")
price_rating(500)
Expensive
# return a value instead of printing
def price_rating(price):
    if price < 200:
        return "Inexpensive"
    elif price < 300:
        return "Expensive"
    elif price < 400:
        return "Very Expensive"
    else:
        return "Outrageous"

ratings = tiny_monopoly.apply(price_rating, 'Price')
ratings
array(['Expensive', 'Inexpensive', 'Outrageous', 'Inexpensive',
       'Expensive', 'Expensive', 'Expensive', 'Expensive', 'Inexpensive',
       'Expensive', 'Very Expensive', 'Inexpensive', 'Very Expensive',
       'Very Expensive', 'Very Expensive', 'Inexpensive', 'Inexpensive',
       'Inexpensive', 'Inexpensive', 'Expensive', 'Inexpensive',
       'Inexpensive'], dtype='<U14')
rated_monopoly = tiny_monopoly.with_columns("Cost Rating", ratings)
rated_monopoly
Name Color Price Cost Rating
Atlantic Avenue Yellow 260 Expensive
Baltic Avenue Brown 60 Inexpensive
Boardwalk Blue 400 Outrageous
Connecticut Avenue LightBlue 120 Inexpensive
Illinois Avenue Red 240 Expensive
Indiana Avenue Red 220 Expensive
Kentucky Avenue Red 220 Expensive
Marvin Gardens Yellow 280 Expensive
Mediterranean Avenue Brown 60 Inexpensive
New York Avenue Orange 200 Expensive

... (12 rows omitted)

rated_monopoly.where('Cost Rating', 'Inexpensive')
Name Color Price Cost Rating
Baltic Avenue Brown 60 Inexpensive
Connecticut Avenue LightBlue 120 Inexpensive
Mediterranean Avenue Brown 60 Inexpensive
Oriental Avenue LightBlue 100 Inexpensive
St. Charles Place Pink 140 Inexpensive
St. James Place Orange 180 Inexpensive
States Avenue Pink 140 Inexpensive
Tennessee Avenue Orange 180 Inexpensive
Vermont Avenue LightBlue 100 Inexpensive
Virginia Avenue Pink 160 Inexpensive
rated_monopoly.where('Cost Rating', 'Outrageous')
Name Color Price Cost Rating
Boardwalk Blue 400 Outrageous

Slot Machine#

Suppose we have a slot machine with three wheels, each having four symbols: β€˜πŸ’β€™, β€˜πŸ””β€™, β€˜πŸ‹β€™, and β€˜πŸ‰β€™.

slot_symbols = make_array('πŸ’', 'πŸ””', 'πŸ‹', 'πŸ‰')

The payout for our slot machine is described by the following rules:

  1. If there are any lemons, the payout is -1. (That is, you lose your coin.)

  2. If all wheels show cherries, the payout is 15. (You win back your coin plus 15 more.)

  3. In all other cases, the payout is three times the number of bells.

def payout(symbols):
    if np.count_nonzero(symbols == 'πŸ‹') > 0:
        return -1
    elif np.count_nonzero(symbols == 'πŸ’') == len(symbols):
        return 15
    else:
        return 3 * np.count_nonzero(symbols == 'πŸ””')
def play_slots():
    def slot_machine(wheel1, wheel2, wheel3):
        wheels = make_array(wheel1, wheel2, wheel3)
        result = payout(wheels)
        print()
        print("Payout for " + str(wheels) + " is " + str(result))
        print()
    
    interact(slot_machine, wheel1 = Choice(slot_symbols), wheel2 = Choice(slot_symbols), wheel3 = Choice(slot_symbols))
    
play_slots()
check(payout(make_array('πŸ‹', 'πŸ’', 'πŸ’')) == -1)
check(payout(make_array('πŸ‹', 'πŸ‹', 'πŸ‹')) == -1)
check(payout(make_array('πŸ’', 'πŸ’', 'πŸ’')) == 15)
check(payout(make_array('πŸ’', 'πŸ””', 'πŸ””')) == 6)
check(payout(make_array('πŸ’', 'πŸ‰', 'πŸ‰')) == 0)

Let’s look at a table of all possible spins.

from sklearn.utils.extmath import cartesian

spins = Table().with_column('Spin', cartesian([slot_symbols, slot_symbols, slot_symbols]))
spins.show(10)
Spin
['πŸ’' 'πŸ’' 'πŸ’']
['πŸ’' 'πŸ’' 'πŸ””']
['πŸ’' 'πŸ’' 'πŸ‹']
['πŸ’' 'πŸ’' 'πŸ‰']
['πŸ’' 'πŸ””' 'πŸ’']
['πŸ’' 'πŸ””' 'πŸ””']
['πŸ’' 'πŸ””' 'πŸ‹']
['πŸ’' 'πŸ””' 'πŸ‰']
['πŸ’' 'πŸ‹' 'πŸ’']
['πŸ’' 'πŸ‹' 'πŸ””']

... (54 rows omitted)

payouts = spins.with_column('Payout', spins.apply(payout, 'Spin'))
payouts.show(10)
Spin Payout
['πŸ’' 'πŸ’' 'πŸ’'] 15
['πŸ’' 'πŸ’' 'πŸ””'] 3
['πŸ’' 'πŸ’' 'πŸ‹'] -1
['πŸ’' 'πŸ’' 'πŸ‰'] 0
['πŸ’' 'πŸ””' 'πŸ’'] 3
['πŸ’' 'πŸ””' 'πŸ””'] 6
['πŸ’' 'πŸ””' 'πŸ‹'] -1
['πŸ’' 'πŸ””' 'πŸ‰'] 3
['πŸ’' 'πŸ‹' 'πŸ’'] -1
['πŸ’' 'πŸ‹' 'πŸ””'] -1

... (54 rows omitted)

What proportion of the spins have payouts greater than 0?

positive_payout_proportion = payouts.where('Payout', are.above(0)).num_rows / payouts.num_rows
positive_payout_proportion
0.3125
np.mean(payouts.column('Payout'))
0.921875

Would casinos ever use our slot machines?

Algorithms for Arrays#

Here is an array of numbers:

numbers = np.random.choice(np.arange(0,100,1), 10)
numbers
array([30, 48, 43, 52, 16, 20, 87,  0, 29, 84])

We can add up all the numbers in the array:

sum(numbers)
409

How do we write sum? We need to be able to do the same thing to every element in the array, namely add it to a tally we’re keeping. Loops let us do the same thing repeatedly.

for name in ['Katie', 'Steve']:
    print(name)
Katie
Steve
for i in np.arange(0,10):
    print("iteration", i)
iteration 0
iteration 1
iteration 2
iteration 3
iteration 4
iteration 5
iteration 6
iteration 7
iteration 8
iteration 9
# Steve: Important for prelab to show one loop that accumulates a value...
total = 0
for i in numbers:
    total = total + i
total
409
def sum(numbers):
    total = 0
    for i in numbers:
        total = total + i
    return total

sum(np.random.choice(np.arange(0,100,1), 100))
5247
def mean(numbers):
    return sum(numbers) / len(numbers)

mean(numbers)
40.9
def max(numbers):
    """
    requires len(numbers) > 0!
    """
    biggest_yet = numbers.item(0)
    for i in numbers:
        if i > biggest_yet:
            biggest_yet = i
    return biggest_yet

max(numbers)
    
87

Think-pair-share of some sort here..#

def abs(numbers):
    abs_numbers = make_array()
    for i in numbers:
        if i < 0:
            abs_numbers = np.append(abs_numbers, -i)
        else:
            abs_numbers = np.append(abs_numbers, i)
    return abs_numbers

abs(numbers)
    
array([30., 48., 43., 52., 16., 20., 87.,  0., 29., 84.])

Simulation and Loops#

Think-pair-share#

https://wherethewindsblow.com/wp-content/uploads/2015/04/White-Six-Sided-Dice.jpg

Suppose your friend proposes that you switch your electronic version of Monopoly so players roll one dice and multiply its value by two instead rather than the more standard way of rolling two dice and summing their values.

Which scenario will give you a higher score on average?

  • Option A. Roll 2 dice and sum their values.

  • Option B. Roll one dice and multiply it’s value by two.

Random Selection#

dice = np.arange(1,7)
dice
array([1, 2, 3, 4, 5, 6])

Here, np.random.choice randomly picks one item from the array and it is equally likely to pick any of the items.

np.random.choice(dice)
1

We can repeat the process by calling a second argument.

np.random.choice(dice, 10)
array([1, 6, 5, 6, 1, 6, 1, 5, 2, 3])
rolls = np.random.choice(dice,100)
sum(rolls == 6)
13
np.mean(rolls)
3.17

Simulating the Question#

N = 1000000
option_a = np.random.choice(dice, N) + np.random.choice(dice, N)
option_b = 2 * np.random.choice(dice, N)
print("Option A Mean: ", np.mean(option_a))
print("Option B Mean: ", np.mean(option_b))
Option A Mean:  7.001956
Option B Mean:  6.997276
samples = Table().with_columns("Option A", option_a, "Option B", option_b)
samples.hist("Option A", bins=np.arange(0,14))
samples.hist("Option B", bins=np.arange(0,14))
../_images/14-conditionals-and-loops_88_0.png ../_images/14-conditionals-and-loops_88_1.png
samples.hist(bins=np.arange(0,14))
../_images/14-conditionals-and-loops_89_0.png

Think-Pair-Share 2#

https://upload.wikimedia.org/wikipedia/commons/7/74/Pompey_by_Nasidius.jpg

If you flip a coin 100 times, what are the odds you get between 40 and 60 heads?

Simulating One Trial#

coin = make_array('heads', 'tails')
np.random.choice(coin)
'heads'
tosses = np.random.choice(coin, 100)
tosses
array(['tails', 'tails', 'tails', 'heads', 'tails', 'heads', 'tails',
       'tails', 'tails', 'heads', 'tails', 'tails', 'tails', 'heads',
       'tails', 'heads', 'heads', 'heads', 'tails', 'heads', 'tails',
       'heads', 'tails', 'heads', 'tails', 'tails', 'heads', 'heads',
       'tails', 'heads', 'tails', 'tails', 'heads', 'tails', 'heads',
       'tails', 'heads', 'tails', 'heads', 'heads', 'heads', 'tails',
       'tails', 'heads', 'heads', 'tails', 'tails', 'tails', 'heads',
       'tails', 'tails', 'heads', 'tails', 'tails', 'heads', 'tails',
       'heads', 'tails', 'tails', 'heads', 'heads', 'tails', 'heads',
       'heads', 'heads', 'heads', 'heads', 'tails', 'tails', 'heads',
       'tails', 'tails', 'tails', 'tails', 'tails', 'heads', 'tails',
       'heads', 'tails', 'tails', 'heads', 'tails', 'heads', 'tails',
       'heads', 'tails', 'tails', 'tails', 'tails', 'heads', 'tails',
       'tails', 'heads', 'tails', 'heads', 'heads', 'heads', 'heads',
       'heads', 'heads'], dtype='<U5')
sum(tosses == 'heads')
46

All in one cell: run it a bunch!

sum(np.random.choice(coin, 100) == 'heads')
40

Running Many Trials: For Loops#

for name in ['Katie', 'Steve']:
    print(name)
Katie
Steve
for i in np.arange(0,10):
    print("iteration", i)
iteration 0
iteration 1
iteration 2
iteration 3
iteration 4
iteration 5
iteration 6
iteration 7
iteration 8
iteration 9
# Steve: Important for prelab to show one loop that accumulates a value...
total = 0
for i in np.arange(0,10):
    total = total + i
total
45
for i in np.arange(0,10):
    print(sum(np.random.choice(coin, 100) == 'heads'))
52
41
54
57
48
55
50
56
46
64

Appending to an array of outcomes#

outcomes = make_array()

One simulation: run it a bunch!

num_heads = sum(np.random.choice(coin, 100) == 'heads')
outcomes = np.append(outcomes, num_heads)
outcomes
array([47.])

All in one cell:

outcomes = make_array()
number_outcomes = 10000
for i in np.arange(0, number_outcomes):
    num_heads = sum(np.random.choice(coin, 100) == 'heads')
    outcomes = np.append(outcomes, num_heads)
    
outcomes
array([49., 47., 45., ..., 50., 47., 48.])
simulated_results = Table().with_column('Heads in 100 flips', outcomes)
simulated_results.hist(bins=np.arange(30, 70, 1))
../_images/14-conditionals-and-loops_112_0.png
target_range = simulated_results.group("Heads in 100 flips").where("Heads in 100 flips", are.between(40,60))
target_range
Heads in 100 flips count
40 99
41 174
42 220
43 277
44 391
45 472
46 538
47 677
48 763
49 812

... (10 rows omitted)

sum(target_range.column("count")) / simulated_results.num_rows
0.9501

Generalization#

Let’s make a reusable version of our simulation. That is, let’s make a function to do the work and produce the outcomes array.

def simulate_coin_tosses():
    outcomes = make_array()
    number_outcomes = 10000
    for i in np.arange(0,number_outcomes):
        outcome = sum(np.random.choice(coin, 100) == 'heads')
        outcomes = np.append(outcomes, outcome)

    return outcomes

Make it general by providing parameters to customize behavior.

def simulate_coin_tosses(number_outcomes):
    outcomes = make_array()
    for i in np.arange(0,number_outcomes):
        outcome = sum(np.random.choice(coin, 100) == 'heads')
        outcomes = np.append(outcomes, outcome)

    return outcomes
simulate_coin_tosses(5)
array([47., 44., 52., 53., 50.])

What would change if we simulated other kinds of events?

  • Number of tails in 200 flips?

  • Sum of 20 dice rolls?

Only thing that changes is the outcome for each individual simulation step:

def simulate(make_outcome, number_outcomes):
    outcomes = make_array()
    for i in np.arange(0, number_outcomes):
        outcome = make_outcome()
        outcomes = np.append(outcomes, outcome)

    return outcomes
def heads_in_100_flips():
    return sum(np.random.choice(coin, 100) == 'heads')

simulate(heads_in_100_flips, 10)
array([57., 50., 50., 49., 55., 48., 42., 42., 47., 50.])
def sum_twenty_dice():
    return sum(np.random.choice(np.arange(1,7), 20))

simulate(sum_twenty_dice, 5)
array([68., 65., 67., 66., 63.])