-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmiscellaneous_code.py
59 lines (44 loc) · 2.19 KB
/
miscellaneous_code.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
# -*- coding: utf-8 -*-
"""
Created on Fri May 31 20:14:57 2019
@author: Alex
"""
unique_persons = df['unique_id'].unique()
### Iterate through each unique person
for person in unique_persons:
# Create dataframe for each unique person
person_df = df[df['unique_id'] == person].loc[:,['unique_id', 'ref_date', 'birth_month', 'TBJOCC1']]
# Create array of employer values for month of and after birth of child
post_birth_occs = person_df[person_df['ref_date'] >= person_df['birth_month']]['TJBOCC1'].values
# Drop null values
post_birth_occs = post_birth_occs[~pd.isnull(post_birth_employers)]
# True if array contains multiple values
post_birth_occs_binary = int(len(set(post_birth_employers)) > 1)
# Assign result back to original dataframe
df.loc[df['unique_id'] == person, 'ever_occs_change'] = post_birth_occs_binary
# Codes / labels translator
occ_dict = {'11' : 'Management',
'13' : 'Business and Finance',
'21' : 'Community and Social Service',
'25' : 'Education, Training, and Library',
'29' : 'Healthcare Practitioners and Technicians',
'31' : 'Healthcare Support',
'35' : 'Food Preparation and Servers',
'37' : 'Building and Grounds Cleaning and Maintenance',
'39' : 'Personal Care and Service',
'41' : 'Sales and Related Occupations',
'43' : 'Office and Administrative Support',
'51' : 'Production Occuptions'}
# Occupation group regressions
#group_models_results = [smf.ols(formula =
# 'LFP ~ C(rhcalyr) + C(tfipsst) + C(birth_recode) * policy',
# data = df[df['industry_pre_birth'] == i]).fit(cov_type = 'cluster',
# cov_kwds={'groups': df[df['industry_pre_birth'] == i]['ssuid']}) for i in occ_dict]
# Summary of management regression
#print(group_models_results[2].summary())
df = pd.read_pickle('SIPP_Dataset_2')
# Generate constant within panel weights
df['end_date'] = df.groupby('unique_id').ref_date.transform('max')
df['end_weight'] = np.nan
df['end_weight'].mask(df['ref_date'] == df['end_date'], df['wpfinwgt'], inplace = True)
df['end_weight'] = df.groupby('unique_id')['end_weight'].transform('max')