Sweeny Case Study
Contents
Sweeny Case Study#
from datascience import *
from cs104 import *
import numpy as np
%matplotlib inline
Voter and Medical Data as in Sweeny’s work#
# example data from: Zheleva and Getoor, "PRIVACY IN SOCIAL NETWORKS: A SURVEY", Social Network Data Analytics, 2011.
voters = Table().read_table('data/sweeny_voters.csv')
voters
Name | Birth Year | Sex | Zip | Party |
---|---|---|---|---|
Ana | 2001 | F | 20740 | Democrat |
Bob | 1997 | M | 83209 | Democrat |
Chris | 1998 | M | 20740 | Democrat |
Don | 1993 | M | 83209 | Republican |
Emma | 1998 | F | 20740 | Democrat |
Fabio | 1998 | M | 20740 | Democrat |
Gina | 1994 | F | 83209 | Democrat |
Halle | 1993 | F | 83209 | Republican |
Ian | 1991 | M | 83209 | Republican |
John | 1998 | M | 20740 | Democrat |
records = Table().read_table('data/sweeny_medical_records.csv')
records
Birth Year | Sex | Zip | Medical Records |
---|---|---|---|
1994 | M | 83209 | Record #1 |
1997 | F | 83209 | Record #2 |
2004 | M | 20740 | Record #3 |
2003 | F | 20740 | Record #4 |
2004 | M | 83209 | Record #5 |
1992 | M | 83209 | Record #6 |
1998 | F | 83209 | Record #7 |
1994 | F | 83209 | Record #8 |
2001 | F | 20740 | Record #9 |
2003 | M | 83209 | Record #10 |
... (20 rows omitted)
joined = voters.join('Birth Year', records)
joined
Birth Year | Name | Sex | Zip | Party | Sex_2 | Zip_2 | Medical Records |
---|---|---|---|---|---|---|---|
1991 | Ian | M | 83209 | Republican | F | 20740 | Record #15 |
1991 | Ian | M | 83209 | Republican | M | 20740 | Record #19 |
1991 | Ian | M | 83209 | Republican | F | 83209 | Record #20 |
1991 | Ian | M | 83209 | Republican | F | 20740 | Record #25 |
1991 | Ian | M | 83209 | Republican | M | 83209 | Record #27 |
1993 | Don | M | 83209 | Republican | M | 20740 | Record #26 |
1993 | Halle | F | 83209 | Republican | M | 20740 | Record #26 |
1994 | Gina | F | 83209 | Democrat | M | 83209 | Record #1 |
1994 | Gina | F | 83209 | Democrat | F | 83209 | Record #8 |
1994 | Gina | F | 83209 | Democrat | F | 20740 | Record #18 |
... (11 rows omitted)
joined = voters.join(make_array('Birth Year', 'Sex', 'Zip'), records)
joined
Birth Year | Sex | Zip | Name | Party | Medical Records |
---|---|---|---|---|---|
1991 | M | 83209 | Ian | Republican | Record #27 |
1994 | F | 83209 | Gina | Democrat | Record #8 |
1994 | F | 83209 | Gina | Democrat | Record #23 |
1997 | M | 83209 | Bob | Democrat | Record #21 |
2001 | F | 20740 | Ana | Democrat | Record #9 |
joined.where('Name', are.equal_to('Bob'))
Birth Year | Sex | Zip | Name | Party | Medical Records |
---|---|---|---|---|---|
1997 | M | 83209 | Bob | Democrat | Record #21 |
joined.where('Name', are.equal_to('Gina'))
Birth Year | Sex | Zip | Name | Party | Medical Records |
---|---|---|---|---|---|
1994 | F | 83209 | Gina | Democrat | Record #8 |
1994 | F | 83209 | Gina | Democrat | Record #23 |