jedi_config.py

# Standard modules
import os
from astropy.time import Time
from scipy.io.idl import readsav
import numpy as np
import pandas as pd
from collections import OrderedDict
import itertools

# Custom modules
from jpm_logger import JpmLogger

# Declare variables to be accessed by anything that imports this module
eve_data_path = '/Users/jmason86/Dropbox/Research/Data/EVE/eve_lines_2010121-2014146 MEGS-A Mission Bare Bones.sav'
goes_data_path = '/Users/jmason86/Dropbox/Research/Data/GOES/events/GoesEventsC1MinMegsAEra.sav'
output_path = '/Users/jmason86/Dropbox/Research/Postdoc_NASA/Analysis/Coronal Dimming Analysis/JEDI Catalog/'
logger_filename = 'generate_jedi_catalog'

threshold_time_prior_flare_minutes = 300.0
dimming_window_relative_to_flare_minutes_left = -1.0
dimming_window_relative_to_flare_minutes_right = 1440.0
threshold_minimum_dimming_window_minutes = 120.0
n_events = 5052
n_threads = 6  # The number of threads to use when doing parallel processing tasks
verbose = True  # Set to log the processing messages to disk and console.

eve_lines = None
goes_flare_events = None
logger = None
jedi_hdf_filename = None
preflare_csv_filename = None
all_minutes_since_last_flare = None
preflare_indices = None
ion_tuples = None
ion_permutations = None


def init():
    """Initialize the jedi catalog: load the data

        Inputs:
            None.

        Optional Inputs:
            None

        Outputs:
            All outputs are globals accessible by doing import jedi_config
            logger [JpmLogger]:                               A configurable log that can optionally also print to console.
            all_minutes_since_last_flare [numpy float array]: The amount of time between each flare.
            preflare_indices [numpy int array]:               The indices where flares are considered time-independent.

        Optional Outputs:
             None

        Example:
            jedi_config.init()
    """
    global logger, all_minutes_since_last_flare, preflare_indices

    # Initialize logger
    logger = JpmLogger(filename=logger_filename, path=output_path, console=False)
    logger.info('Logger initialized.')

    # Set up folders
    init_folders()

    # Set up filenames
    init_filenames()

    # Load the EVE data
    load_eve_data()

    # Get GOES flare events above C1 within date range corresponding to EVE data
    load_goes_flare_event_data()

    # Compute the amount of time between all flares [minutes]
    peak_time = goes_flare_events['peak_time']
    all_minutes_since_last_flare = (peak_time[1:] - peak_time[0:-1]).sec / 60.0

    # Figure out which flares are independent, store those indices
    is_flare_independent = all_minutes_since_last_flare > threshold_time_prior_flare_minutes
    preflare_indices = np.where(is_flare_independent)[0] + 1  # Add 1 to map back to event index and not to the differentiated vector
    logger.info('Found {0} independent flares of {1} total flares given a time separation of {2} minutes.'.format(len(preflare_indices), len(is_flare_independent), threshold_time_prior_flare_minutes))


def init_folders():
    """Internal-use function to check if necessary folders exist; if not, create them

        Inputs:
            None. Draws from the globals at the top of this file.

        Optional Inputs:
            None.

        Outputs:
            No return. Creates folders on disk.

        Optional Outputs:
             None.

        Example:
            init_folders()
    """
    if not os.path.exists(output_path):
        os.makedirs(output_path)

    if not os.path.exists(output_path + 'Processed Pre-Parameterization Data'):
        os.makedirs(output_path + 'Processed Pre-Parameterization Data')

    if not os.path.exists(output_path + 'Processed Lines Data'):
        os.makedirs(output_path + 'Processed Lines Data')

    if not os.path.exists(output_path + 'Peak Subtractions'):
        os.makedirs(output_path + 'Peak Subtractions')

    if not os.path.exists(output_path + 'Fitting'):
        os.makedirs(output_path + 'Fitting')

    if not os.path.exists(output_path + 'Depth'):
        os.makedirs(output_path + 'Depth')

    if not os.path.exists(output_path + 'Slope'):
        os.makedirs(output_path + 'Slope')

    if not os.path.exists(output_path + 'Duration'):
        os.makedirs(output_path + 'Duration')

    if not os.path.exists(output_path + 'Summary Plots'):
        os.makedirs(output_path + 'Summary Plots')


def init_filenames():
    """Internal-use function to set filename globals
        These are variables that themselves depend on other globals being defined already but are fully determined at that point.

        Inputs:
            None. Draws from the globals at the top of this file.

        Optional Inputs:
            None.

        Outputs:
            No return. Updates global variables.
            jedi_hdf_filename [str]:     The unique path/filename for the jedi catalog in this run to be stored in on disk.
            preflare_csv_filename [str]: The path/filename of the computed pre-flare irradiances.

        Optional Outputs:
             None.

        Example:
            init_filenames()
    """
    global jedi_hdf_filename, preflare_csv_filename
    jedi_hdf_filename = output_path + 'jedi_{0}'.format(Time.now().iso)
    preflare_csv_filename = os.path.join(output_path, 'Preflare Determination/Preflare Irradiances.csv')


def load_eve_data():
    """Internal-use function to load and clean the SDO/EVE data.

        Inputs:
            None. Draws from the globals at the top of this file.

        Optional Inputs:
            None.

        Outputs:
            No return. Updates global variables.
            eve_lines [pandas DataFrame]: SDO/EVE level 2 lines data. Stores irradiance, time, and wavelength.

        Optional Outputs:
             None.

        Example:
            load_eve_data()
    """
    global eve_lines

    # TODO: Replace this shortcut method with the method I'm building into sunpy
    logger.info('Loading EVE data.')
    eve_readsav = readsav(eve_data_path)
    irradiance = eve_readsav['irradiance'].byteswap().newbyteorder()  # pandas doesn't like big endian
    irradiance[irradiance == -1] = np.nan
    wavelengths = eve_readsav['wavelength']
    wavelengths_str = []
    [wavelengths_str.append('{0:1.1f}'.format(wavelength)) for wavelength in wavelengths]
    eve_lines = pd.DataFrame(irradiance, columns=wavelengths_str)
    eve_lines.index = pd.to_datetime(eve_readsav.iso.astype(str))
    eve_lines.sort_index(inplace=True)
    eve_lines = eve_lines.drop_duplicates()


def load_goes_flare_event_data():
    """Internal-use function to load and clean the GOES/XRS flare event data from NOAA/SWPC.

        Inputs:
            None. Draws from the globals at the top of this file.

        Optional Inputs:
            None.

        Outputs:
            No return. Updates global variables.
            goes_flare_events[pandas DataFrame]: Flares as observed by GOES/XRS. Store class, start and peak time

        Optional Outputs:
             None.

        Example:
            load_eve_data()
    """
    global goes_flare_events

    # flares = get_goes_flare_events(eve_lines.index[0], eve_lines.index[-1])  # TODO: The method in sunpy needs fixing, issue 2434

    # Load GOES events from IDL saveset instead of directly through sunpy
    logger.info('Loading GOES flare events.')
    goes_flare_events = readsav(goes_data_path)
    goes_flare_events['class'] = goes_flare_events['class'].astype(str)
    goes_flare_events['event_peak_time_human'] = goes_flare_events['event_peak_time_human'].astype(str)
    goes_flare_events['event_start_time_human'] = goes_flare_events['event_start_time_human'].astype(str)
    goes_flare_events['peak_time'] = Time(goes_flare_events['event_peak_time_jd'], format='jd', scale='utc', precision=0)
    goes_flare_events['start_time'] = Time(goes_flare_events['event_start_time_jd'], format='jd', scale='utc', precision=0)


def init_jedi_row():
    """Internal-use function for defining the column headers in the JEDI catalog

        Inputs:
            None. Draws from the globals set up in init. So you must run the init function before calling this function.

        Optional Inputs:
            None

        Outputs:
            jedi_row [pandas DataFrame]: A ~24k column DataFrame with only a single row populated with np.nan's.

        Optional Outputs:
             None

        Example:
            jedi_row = init_jedi_row()
    """
    jedi_row = pd.DataFrame([OrderedDict([
                             ('Event #', np.nan),
                             ('GOES Flare Start Time', np.nan),
                             ('GOES Flare Peak Time', np.nan),
                             ('GOES Flare Class', np.nan),
                             ('Pre-Flare Start Time', np.nan),
                             ('Pre-Flare End Time', np.nan),
                             ('Flare Interrupt', np.nan),
                             ('Flare Latitude [deg]', np.nan),
                             ('Flare Longitude [deg]', np.nan),
                             ('Flare Position Angle [deg]', np.nan)])])

    # Define the combination of columns of the JEDI catalog
    global ion_tuples, ion_permutations
    ion_tuples = list(itertools.permutations(eve_lines.columns.values, 2))
    ion_permutations = pd.Index([' by '.join(ion_tuples[i]) for i in range(len(ion_tuples))])

    jedi_row = jedi_row.join(pd.DataFrame(columns=eve_lines.columns + ' Pre-Flare Irradiance [W/m2]'))
    jedi_row = jedi_row.join(pd.DataFrame(columns=eve_lines.columns + ' Slope Start Time'))
    jedi_row = jedi_row.join(pd.DataFrame(columns=eve_lines.columns + ' Slope End Time'))
    jedi_row = jedi_row.join(pd.DataFrame(columns=eve_lines.columns + ' Slope Min [%/s]'))
    jedi_row = jedi_row.join(pd.DataFrame(columns=eve_lines.columns + ' Slope Max [%/s]'))
    jedi_row = jedi_row.join(pd.DataFrame(columns=eve_lines.columns + ' Slope Mean [%/s]'))
    jedi_row = jedi_row.join(pd.DataFrame(columns=eve_lines.columns + ' Slope Uncertainty [%/s]'))
    jedi_row = jedi_row.join(pd.DataFrame(columns=eve_lines.columns + ' Depth First Time'))
    jedi_row = jedi_row.join(pd.DataFrame(columns=eve_lines.columns + ' Depth First [%]'))
    jedi_row = jedi_row.join(pd.DataFrame(columns=eve_lines.columns + ' Depth Max Time'))
    jedi_row = jedi_row.join(pd.DataFrame(columns=eve_lines.columns + ' Depth Max [%]'))
    jedi_row = jedi_row.join(pd.DataFrame(columns=eve_lines.columns + ' Depth Uncertainty [%]'))
    jedi_row = jedi_row.join(pd.DataFrame(columns=eve_lines.columns + ' Duration Start Time'))
    jedi_row = jedi_row.join(pd.DataFrame(columns=eve_lines.columns + ' Duration End Time'))
    jedi_row = jedi_row.join(pd.DataFrame(columns=eve_lines.columns + ' Duration [s]'))
    jedi_row = jedi_row.join(pd.DataFrame(columns=eve_lines.columns + ' Fitting Gamma'))
    jedi_row = jedi_row.join(pd.DataFrame(columns=eve_lines.columns + ' Fitting Score'))

    jedi_row = jedi_row.join(pd.DataFrame(columns=ion_permutations + ' Slope Start Time'))
    jedi_row = jedi_row.join(pd.DataFrame(columns=ion_permutations + ' Slope End Time'))
    jedi_row = jedi_row.join(pd.DataFrame(columns=ion_permutations + ' Slope Min [%/s]'))
    jedi_row = jedi_row.join(pd.DataFrame(columns=ion_permutations + ' Slope Max [%/s]'))
    jedi_row = jedi_row.join(pd.DataFrame(columns=ion_permutations + ' Slope Mean [%/s]'))
    jedi_row = jedi_row.join(pd.DataFrame(columns=ion_permutations + ' Slope Uncertainty [%/s]'))
    jedi_row = jedi_row.join(pd.DataFrame(columns=ion_permutations + ' Depth First Time'))
    jedi_row = jedi_row.join(pd.DataFrame(columns=ion_permutations + ' Depth First [%]'))
    jedi_row = jedi_row.join(pd.DataFrame(columns=ion_permutations + ' Depth Max Time'))
    jedi_row = jedi_row.join(pd.DataFrame(columns=ion_permutations + ' Depth Max [%]'))
    jedi_row = jedi_row.join(pd.DataFrame(columns=ion_permutations + ' Depth Uncertainty [%]'))
    jedi_row = jedi_row.join(pd.DataFrame(columns=ion_permutations + ' Duration Start Time'))
    jedi_row = jedi_row.join(pd.DataFrame(columns=ion_permutations + ' Duration End Time'))
    jedi_row = jedi_row.join(pd.DataFrame(columns=ion_permutations + ' Duration [s]'))
    jedi_row = jedi_row.join(pd.DataFrame(columns=ion_permutations + ' Correction Time Shift [s]'))
    jedi_row = jedi_row.join(pd.DataFrame(columns=ion_permutations + ' Correction Scale Factor'))
    jedi_row = jedi_row.join(pd.DataFrame(columns=ion_permutations + ' Fitting Gamma'))
    jedi_row = jedi_row.join(pd.DataFrame(columns=ion_permutations + ' Fitting Score'))

    # Force the dtypes to numeric rather than objects
    jedi_row = jedi_row.apply(pd.to_numeric, errors='ignore')

    return jedi_row


def write_new_jedi_file_to_disk(jedi_row):
    """Write a jedi_row to disk as a csv file -- intended to be called after first initialization of jedi_row

           Inputs:
               jedi_row [pandas DataFrame]: The JEDI DataFrame to be written

           Optional Inputs:
               None

           Outputs:
               csv file on disk

           Optional Outputs:
                None

           Example:
               jedi_row = init_jedi_row()
               write_new_jedi_file_to_disk(jedi_row)
       """
    jedi_row.to_hdf(jedi_hdf_filename, key='jedi', mode='w')