Sweeny Case Study
Contents
Sweeny Case Study#
from datascience import *
from cs104 import *
import numpy as np
%matplotlib inline
Voter and Medical Data as in Sweeny’s work#
# example data from: Zheleva and Getoor, "PRIVACY IN SOCIAL NETWORKS: A SURVEY", Social Network Data Analytics, 2011.
voters = Table().read_table('data/sweeny_voters.csv')
voters
| Name | Birth Year | Sex | Zip | Party |
|---|---|---|---|---|
| Ana | 2001 | F | 20740 | Democrat |
| Bob | 1997 | M | 83209 | Democrat |
| Chris | 1998 | M | 20740 | Democrat |
| Don | 1993 | M | 83209 | Republican |
| Emma | 1998 | F | 20740 | Democrat |
| Fabio | 1998 | M | 20740 | Democrat |
| Gina | 1994 | F | 83209 | Democrat |
| Halle | 1993 | F | 83209 | Republican |
| Ian | 1991 | M | 83209 | Republican |
| John | 1998 | M | 20740 | Democrat |
records = Table().read_table('data/sweeny_medical_records.csv')
records
| Birth Year | Sex | Zip | Medical Records |
|---|---|---|---|
| 1994 | M | 83209 | Record #1 |
| 1997 | F | 83209 | Record #2 |
| 2004 | M | 20740 | Record #3 |
| 2003 | F | 20740 | Record #4 |
| 2004 | M | 83209 | Record #5 |
| 1992 | M | 83209 | Record #6 |
| 1998 | F | 83209 | Record #7 |
| 1994 | F | 83209 | Record #8 |
| 2001 | F | 20740 | Record #9 |
| 2003 | M | 83209 | Record #10 |
... (20 rows omitted)
joined = voters.join('Birth Year', records)
joined
| Birth Year | Name | Sex | Zip | Party | Sex_2 | Zip_2 | Medical Records |
|---|---|---|---|---|---|---|---|
| 1991 | Ian | M | 83209 | Republican | F | 20740 | Record #15 |
| 1991 | Ian | M | 83209 | Republican | M | 20740 | Record #19 |
| 1991 | Ian | M | 83209 | Republican | F | 83209 | Record #20 |
| 1991 | Ian | M | 83209 | Republican | F | 20740 | Record #25 |
| 1991 | Ian | M | 83209 | Republican | M | 83209 | Record #27 |
| 1993 | Don | M | 83209 | Republican | M | 20740 | Record #26 |
| 1993 | Halle | F | 83209 | Republican | M | 20740 | Record #26 |
| 1994 | Gina | F | 83209 | Democrat | M | 83209 | Record #1 |
| 1994 | Gina | F | 83209 | Democrat | F | 83209 | Record #8 |
| 1994 | Gina | F | 83209 | Democrat | F | 20740 | Record #18 |
... (11 rows omitted)
joined = voters.join(make_array('Birth Year', 'Sex', 'Zip'), records)
joined
| Birth Year | Sex | Zip | Name | Party | Medical Records |
|---|---|---|---|---|---|
| 1991 | M | 83209 | Ian | Republican | Record #27 |
| 1994 | F | 83209 | Gina | Democrat | Record #8 |
| 1994 | F | 83209 | Gina | Democrat | Record #23 |
| 1997 | M | 83209 | Bob | Democrat | Record #21 |
| 2001 | F | 20740 | Ana | Democrat | Record #9 |
joined.where('Name', are.equal_to('Bob'))
| Birth Year | Sex | Zip | Name | Party | Medical Records |
|---|---|---|---|---|---|
| 1997 | M | 83209 | Bob | Democrat | Record #21 |
joined.where('Name', are.equal_to('Gina'))
| Birth Year | Sex | Zip | Name | Party | Medical Records |
|---|---|---|---|---|---|
| 1994 | F | 83209 | Gina | Democrat | Record #8 |
| 1994 | F | 83209 | Gina | Democrat | Record #23 |