Skip to content
Binary file added fink_science/data/models/model_orphans.pkl
Binary file not shown.
Empty file.
129 changes: 129 additions & 0 deletions fink_science/rubin/orphans/basic_functions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
# Copyright 2019-2022 AstroLab Software
# Authors: Marina Masson
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import numpy as np
import pandas as pd


def flux_to_mag(flux):
"""
Convert flux from milliJansky to AB Magnitude

1 Jy = 1e-23 erg/cm2/s/Hz
Fnu = 3631 Jy = 3.631*1e-20 erg/cm2/s/Hz
ABmag = 0-2.5*log10( Fnu )-48.6 = 0

Parameters
----------
flux: float
Flux in milli-Jansky

Returns
-------
mag: float
Corresponding AB Magnitude
"""
mag = -2.5 * np.log10(flux * 1.0e-26) - 48.6
return mag


def mag_to_flux(mag):
"""
Convert AB Magnitude to flux in milliJansky

1 Jy = 1e-23 erg/cm2/s/Hz
Fnu = 3631 Jy = 3.631*1e-20 erg/cm2/s/Hz
ABmag = 0-2.5*log10( Fnu )-48.6 = 0

Parameters
----------
mag: float
AB Magnitude

Returns
-------
flux: float
Corresponding flux in milliJansky
"""
flux = pow(10, (26 - (mag + 48.6) / 2.5))
return flux


def clean_and_sort_light_curve(times, mags, mags_err, filts):
"""
Sort the points by time and remove NaN points

Parameters
----------
time: pandas.Series of list of float
Concatenated times in MJD for the object
mags: pandas.Series of list of float
Concatenated magnitudes for the object
mags_err: pandas.Series of list of float
Concatenated magnitude errors for the object
filt: pandas.Series of list of float
Concatenated filters for the object

Returns
-------
cleaned_times: pandas.Series of list of float
Concatenated times in MJD for the object, sorted and without NaN points
cleaned_mags: pandas.Series of list of float
Concatenated magnitudes for the object, sorted and without NaN points
cleaned_errors: pandas.Series of list of float
Concatenated magnitude errors for the object, sorted and without NaN points
cleaned_filts: pandas.Series of list of float
Concatenated filters for the object, sorted and without NaN points
"""
cleaned_times = []
cleaned_magnitudes = []
cleaned_errors = []
cleaned_filters = []

for t, m, e, f in zip(times, mags, mags_err, filts):
# Convert to numpy arrays
t = np.array(t)
m = np.array(m)
e = np.array(e)
f = np.array(f)

# Create a mask for non-NaN magnitudes
mask = ~np.isnan(m)

# Apply the mask to both times and magnitudes
valid_times = t[mask]
valid_mags = m[mask]
valid_errs = e[mask]
valid_filts = f[mask]

# if len(valid_mags) > 4:
# Sort the cleaned data by time
sorted_indices = np.argsort(valid_times)
sorted_times = valid_times[sorted_indices]
sorted_magnitudes = valid_mags[sorted_indices]
sorted_errors = valid_errs[sorted_indices]
sorted_filters = valid_filts[sorted_indices]

cleaned_times.append(sorted_times)
cleaned_magnitudes.append(sorted_magnitudes)
cleaned_errors.append(sorted_errors)
cleaned_filters.append(sorted_filters)

return (
pd.Series(cleaned_times),
pd.Series(cleaned_magnitudes),
pd.Series(cleaned_errors),
pd.Series(cleaned_filters),
)
170 changes: 170 additions & 0 deletions fink_science/rubin/orphans/classifier.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
# Copyright 2019-2022 AstroLab Software
# Authors: Marina Masson
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
import pandas as pd
import numpy as np
import joblib

from sklearn import preprocessing

from fink_science.rubin.orphans.features_extraction import (
compute_duration_between_first_and_peak,
compute_rates,
compute_colours,
)
from fink_science.rubin.orphans.basic_functions import clean_and_sort_light_curve
from fink_science.rubin.orphans.fit import fit_light_curve


def get_features(ctimemjd, cabmags, cabmagserr, cfilts, valid):
"""
Computes the features from the detected light curve

Parameters
----------
ctimemjd: pandas.Series of list of float
Concatenated time in MJD for the object
cabmags: pandas.Series of list of float, or Nan
Concatenated magnitude for the object
cabmagserr: pandas.Series of list of float, or Nan
Concatenated errors on the magnitude for the object
cfilts: pandas.Series of list of int
Concatenated filters for the object

Returns
-------
out: pandas.Series of bool
Return a Pandas DataFrame containing the features
"""
# keep only non-NaN values and sort points by time
times, mags, err, filts = clean_and_sort_light_curve(
ctimemjd, cabmags, cabmagserr, cfilts
)

# duration between the first detection and the peak
duration = np.array([
compute_duration_between_first_and_peak(t, m)
for t, m, v in zip(times.values, mags.values, valid.values)
if v
])

# increase rate and decrease rates (average, in the 1/3 and the 3/3 parts of the light curve)
rates = np.array([
compute_rates(t, m, f)
for t, m, f, v in zip(times.values, mags.values, filts.values, valid.values)
if v
])

# colours for the pairs (g, r) and (r, i)
colours = np.array([
compute_colours(t, m, f)
for t, m, f, v in zip(times.values, mags.values, filts.values, valid.values)
if v
])

# parameters of the fit A, B, C, D and chi2
fit_parameters = np.array([
fit_light_curve(t, m, e, f)
for t, m, e, f, v in zip(
times.values, mags.values, err.values, filts.values, valid.values
)
if v
])

# gather all the features in one DataFrame
features = {
"duration": duration,
"increase_rate": rates[:, 0],
"decrease_rate_1": rates[:, 1],
"decrease_rate_3": rates[:, 2],
"g-r": colours[:, 0],
"r-i": colours[:, 1],
"A": fit_parameters[:, 0],
"B": fit_parameters[:, 1],
"C": fit_parameters[:, 2],
"D": fit_parameters[:, 3],
"A/B": fit_parameters[:, 0] / fit_parameters[:, 1],
"chi2": fit_parameters[:, 4],
}

df_features = pd.DataFrame(data=features)

return df_features


def get_probabilities(df_features, valid):
"""Returns probability of being an orphan afterglow predicted by the classifier

Parameters
----------
df_features: pd.DataFrame
Features extracted from the light curves
valid: np.array
Bool array, indicates if the light curve contains at least 5 data points (all filters)

Returns
-------
final_proba : np.array
Probabilities of being an orphan afterglow
Proba = 0. if the object is not valid
"""
final_proba = np.array([0.0] * len(valid)).astype(np.float64)

# load classifier
curdir = os.path.dirname(os.path.abspath(__file__))
model_path = curdir + "/../data/models/"
clf = joblib.load(model_path + "ml_model_orphans.pkl")

if len(df_features["duration"]) > 0:
# clean non-valid data
df_features = df_features.replace([np.inf, -np.inf], 1000)
df_features = df_features.fillna(0)

# normalise the features
features_norm = preprocessing.normalize(df_features, norm="max")
proba = clf.predict_proba(features_norm)
index_to_replace = np.where(valid.values)
final_proba[index_to_replace] = proba[:, 1]

return final_proba


def orphan_classifier(ctimemjd, cabmags, cabmagserr, cfilts, valid):
"""
Call the orphan_classifier

Parameters
----------
ctimemjd: pandas.Series of list of float
Concatenated time in MJD for the object
cabmags: pandas.Series of list of float, or Nan
Concatenated magnitude for the object
cabmagserr: pandas.Series of list of float, or Nan
Concatenated errors on the magnitude for the object
cfilts: pandas.Series of list of int
Concatenated filters for the object
valid: np.array
Bool array, indicates if the light curve contains at least 5 data points (all filters)

Returns
-------
out: pandas.Series of bool
Return a Pandas DataFrame with the proba of an event to be an orphan
"""
features = get_features(ctimemjd, cabmags, cabmagserr, cfilts, valid)
proba = get_probabilities(features, valid)

return proba
Loading