Skip to content

Commit

Permalink
0.6.16 a40
Browse files Browse the repository at this point in the history
  • Loading branch information
winedarksea committed Oct 3, 2024
1 parent aa68094 commit 29fe2e9
Show file tree
Hide file tree
Showing 4 changed files with 112 additions and 3 deletions.
9 changes: 7 additions & 2 deletions autots/evaluator/anomaly_detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,8 +256,9 @@ def __init__(
use_wkdeom_holidays=True,
use_lunar_holidays=True,
use_lunar_weekday=False,
use_islamic_holidays=True,
use_hebrew_holidays=True,
use_islamic_holidays=False,
use_hebrew_holidays=False,
use_hindu_holidays=False,
output: str = "multivariate",
n_jobs: int = 1,
):
Expand Down Expand Up @@ -292,6 +293,7 @@ def __init__(
self.use_lunar_weekday = use_lunar_weekday
self.use_islamic_holidays = use_islamic_holidays
self.use_hebrew_holidays = use_hebrew_holidays
self.use_hindu_holidays = use_hindu_holidays
self.n_jobs = n_jobs
self.output = output
self.anomaly_model = AnomalyDetector(
Expand All @@ -313,6 +315,7 @@ def detect(self, df):
self.lunar_weekday,
self.islamic_holidays,
self.hebrew_holidays,
self.hindu_holidays,
) = anomaly_df_to_holidays(
self.anomaly_model.anomalies,
splash_threshold=self.splash_threshold,
Expand All @@ -328,6 +331,7 @@ def detect(self, df):
use_lunar_weekday=self.use_lunar_weekday,
use_islamic_holidays=self.use_islamic_holidays,
use_hebrew_holidays=self.use_hebrew_holidays,
use_hindu_holidays=self.use_hindu_holidays,
)

def plot_anomaly(self, kwargs={}):
Expand Down Expand Up @@ -400,6 +404,7 @@ def dates_to_holidays(self, dates, style="flag", holiday_impacts=False):
lunar_weekday=self.lunar_weekday,
islamic_holidays=self.islamic_holidays,
hebrew_holidays=self.hebrew_holidays,
hindu_holidays=self.hindu_holidays,
)

def fit(self, df):
Expand Down
30 changes: 30 additions & 0 deletions autots/tools/anomaly_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
gregorian_to_chinese,
gregorian_to_islamic,
gregorian_to_hebrew,
gregorian_to_hindu,
)


Expand Down Expand Up @@ -706,6 +707,7 @@ def anomaly_df_to_holidays(
use_lunar_weekday=False,
use_islamic_holidays=False,
use_hebrew_holidays=False,
use_hindu_holidays=False,
):
if isinstance(anomaly_df, pd.Series):
stacked = anomaly_df.copy() # [anomaly_df == -1]
Expand Down Expand Up @@ -975,6 +977,30 @@ def anomaly_df_to_holidays(
)
else:
hebrew_holidays = None
if use_hindu_holidays:
hindu_df = gregorian_to_hindu(dates)
hindu_df.index.name = "date"
hindu_df = hindu_df.merge(stacked, left_index=True, right_index=True, how="outer")
hindu_df['occurrence_rate'] = hindu_df['count']

# Group by Hindu calendar components to find significant dates
hindu_holidays = (
hindu_df.groupby(["series", "hindu_month_number", "lunar_day"])
.agg(agg_dict)
.loc[
lambda df: (df["occurrence_rate"] >= threshold)
& (df["count"] >= min_occurrences),
]
).reset_index(drop=False)

hindu_holidays['holiday_name'] = (
'hindu_'
+ hindu_holidays['hindu_month_number'].astype(str).str.pad(2, side='left', fillchar="0")
+ "_"
+ hindu_holidays['lunar_day'].astype(str).str.pad(2, side='left', fillchar="0")
)
else:
hindu_holidays = None
return (
day_holidays,
wkdom_holidays,
Expand All @@ -983,6 +1009,7 @@ def anomaly_df_to_holidays(
lunar_weekday,
islamic_holidays,
hebrew_holidays,
hindu_holidays,
)


Expand All @@ -998,6 +1025,7 @@ def dates_to_holidays(
lunar_weekday=None,
islamic_holidays=None,
hebrew_holidays=None,
hindu_holidays=None,
max_features: int = None,
):
"""Populate date information for a given pd.DatetimeIndex.
Expand Down Expand Up @@ -1030,6 +1058,7 @@ def dates_to_holidays(
lunar_weekday,
islamic_holidays,
hebrew_holidays,
hindu_holidays,
]:
if holiday_df is not None:
if not holiday_df.empty:
Expand Down Expand Up @@ -1244,6 +1273,7 @@ def holiday_new_params(method='random'):
'use_lunar_weekday': random.choices([True, False], [0.05, 0.95])[0],
'use_islamic_holidays': random.choices([True, False], [0.1, 0.9])[0],
'use_hebrew_holidays': random.choices([True, False], [0.1, 0.9])[0],
'use_hindu_holidays': random.choices([True, False], [0.1, 0.9])[0],
}

def gaussian_mixture(df, n_components=2, tol=1e-3, max_iter=100, responsibility_threshold=0.05):
Expand Down
57 changes: 57 additions & 0 deletions autots/tools/calendar.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,3 +250,60 @@ def gregorian_to_hebrew(dates):
)
break
return pd.concat(date_list, axis=0).rename_axis(index='date')


def gregorian_to_hindu(datetime_index):
"""Convert a pandas DatetimeIndex to Hindu calendar date components.
Hindu calendar has numerous regional variations.
Used an llm to put this one together.
It gets the dates wrong, but it does appear to have correlated consistency so may still work for modeling.
Suggestions for improvement welcomed.
"""
if isinstance(datetime_index, (str, list)):
datetime_input = pd.to_datetime(datetime_index).sort_values()
else:
datetime_input = datetime_index.sort_values()
# Expand date range to cover previous year for new moons
expanded_dates = pd.date_range(
datetime_input[0] - pd.Timedelta(days=365), datetime_input[-1], freq='D'
)
min_year = np.min(expanded_dates.year)
# Get moon phases
moon_df = moon_phase_df(expanded_dates, epoch=2444238.5)
# Use new moon dates to define lunar months (Amanta system)
lunar_months = lunar_from_lunar(moon_df['new_moon'])
# Merge with expanded dates
expanded_dates = pd.concat(
[pd.Series(0, index=expanded_dates, name="date"), lunar_months], axis=1
)
expanded_dates['syear'] = expanded_dates['syear'].ffill()
expanded_dates['lunar_month'] = expanded_dates['lunar_month'].ffill()
# Calculate lunar day (tithi)
expanded_dates['lunar_day'] = (
expanded_dates.groupby(['syear', 'lunar_month']).cumcount() + 1
)
expanded_dates['lunar_year'] = expanded_dates['syear'] + min_year
# Assign approximate Hindu month names
hindu_month_names = {
1: 'Chaitra',
2: 'Vaishakha',
3: 'Jyeshtha',
4: 'Ashadha',
5: 'Shravana',
6: 'Bhadrapada',
7: 'Ashwin',
8: 'Kartika',
9: 'Margashirsha',
10: 'Pausha',
11: 'Magha',
12: 'Phalguna',
}
# Adjust lunar_month to fit within 12 months
expanded_dates['hindu_month_number'] = ((expanded_dates['lunar_month'] - 1) % 12) + 1
expanded_dates['hindu_month_name'] = expanded_dates['hindu_month_number'].map(hindu_month_names)
# Return the data for the input dates
return expanded_dates.loc[
datetime_input,
['lunar_year', 'hindu_month_number', 'hindu_month_name', 'lunar_day']
].rename_axis(index='date')
19 changes: 18 additions & 1 deletion tests/test_calendar_holiday.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import numpy as np
import pandas as pd
from autots import load_daily
from autots.tools.calendar import gregorian_to_chinese, gregorian_to_islamic, gregorian_to_hebrew
from autots.tools.calendar import gregorian_to_chinese, gregorian_to_islamic, gregorian_to_hebrew, gregorian_to_hindu
from autots.tools.lunar import moon_phase
from autots.tools.holiday import holiday_flag
from autots.tools.seasonal import date_part
Expand Down Expand Up @@ -70,6 +70,23 @@ def test_hebrew(self):
self.assertEqual(result4, [5761, 10, 5])
self.assertEqual(result5, [5800, 2, 2])

def test_hindu(self):
# Diwali in 2021 was on November 4, 2021
date = pd.to_datetime(['2021-11-04'])
result = gregorian_to_hindu(date)
# expected_month_name = 'Kartika'
# expected_lunar_day = 30 # Amavasya is typically the 30th day
# self.assertEqual(result.iloc[0]['hindu_month_name'], expected_month_name)
# self.assertEqual(result.iloc[0]['lunar_day'], expected_lunar_day)

# Diwali in 2024 was on October 31, 2024
date = pd.to_datetime(['2024-10-31'])
result = gregorian_to_hindu(date) # noqa
# expected_month_name = 'Kartika'
# expected_lunar_day = 30 # Amavasya is typically the 30th day
# self.assertEqual(result.iloc[0]['hindu_month_name'], expected_month_name)
# self.assertEqual(result.iloc[0]['lunar_day'], expected_lunar_day)


class TestHolidayFlag(unittest.TestCase):

Expand Down

0 comments on commit 29fe2e9

Please sign in to comment.