From 29fe2e958caeebefba8c21beb31727c88bd1c5e9 Mon Sep 17 00:00:00 2001 From: Colin Catlin Date: Wed, 2 Oct 2024 19:49:55 -0500 Subject: [PATCH] 0.6.16 a40 --- autots/evaluator/anomaly_detector.py | 9 ++++- autots/tools/anomaly_utils.py | 30 +++++++++++++++ autots/tools/calendar.py | 57 ++++++++++++++++++++++++++++ tests/test_calendar_holiday.py | 19 +++++++++- 4 files changed, 112 insertions(+), 3 deletions(-) diff --git a/autots/evaluator/anomaly_detector.py b/autots/evaluator/anomaly_detector.py index 84c4e83b..80703e26 100644 --- a/autots/evaluator/anomaly_detector.py +++ b/autots/evaluator/anomaly_detector.py @@ -256,8 +256,9 @@ def __init__( use_wkdeom_holidays=True, use_lunar_holidays=True, use_lunar_weekday=False, - use_islamic_holidays=True, - use_hebrew_holidays=True, + use_islamic_holidays=False, + use_hebrew_holidays=False, + use_hindu_holidays=False, output: str = "multivariate", n_jobs: int = 1, ): @@ -292,6 +293,7 @@ def __init__( self.use_lunar_weekday = use_lunar_weekday self.use_islamic_holidays = use_islamic_holidays self.use_hebrew_holidays = use_hebrew_holidays + self.use_hindu_holidays = use_hindu_holidays self.n_jobs = n_jobs self.output = output self.anomaly_model = AnomalyDetector( @@ -313,6 +315,7 @@ def detect(self, df): self.lunar_weekday, self.islamic_holidays, self.hebrew_holidays, + self.hindu_holidays, ) = anomaly_df_to_holidays( self.anomaly_model.anomalies, splash_threshold=self.splash_threshold, @@ -328,6 +331,7 @@ def detect(self, df): use_lunar_weekday=self.use_lunar_weekday, use_islamic_holidays=self.use_islamic_holidays, use_hebrew_holidays=self.use_hebrew_holidays, + use_hindu_holidays=self.use_hindu_holidays, ) def plot_anomaly(self, kwargs={}): @@ -400,6 +404,7 @@ def dates_to_holidays(self, dates, style="flag", holiday_impacts=False): lunar_weekday=self.lunar_weekday, islamic_holidays=self.islamic_holidays, hebrew_holidays=self.hebrew_holidays, + hindu_holidays=self.hindu_holidays, ) def fit(self, df): diff --git a/autots/tools/anomaly_utils.py b/autots/tools/anomaly_utils.py index f6ab8543..d51839bd 100644 --- a/autots/tools/anomaly_utils.py +++ b/autots/tools/anomaly_utils.py @@ -20,6 +20,7 @@ gregorian_to_chinese, gregorian_to_islamic, gregorian_to_hebrew, + gregorian_to_hindu, ) @@ -706,6 +707,7 @@ def anomaly_df_to_holidays( use_lunar_weekday=False, use_islamic_holidays=False, use_hebrew_holidays=False, + use_hindu_holidays=False, ): if isinstance(anomaly_df, pd.Series): stacked = anomaly_df.copy() # [anomaly_df == -1] @@ -975,6 +977,30 @@ def anomaly_df_to_holidays( ) else: hebrew_holidays = None + if use_hindu_holidays: + hindu_df = gregorian_to_hindu(dates) + hindu_df.index.name = "date" + hindu_df = hindu_df.merge(stacked, left_index=True, right_index=True, how="outer") + hindu_df['occurrence_rate'] = hindu_df['count'] + + # Group by Hindu calendar components to find significant dates + hindu_holidays = ( + hindu_df.groupby(["series", "hindu_month_number", "lunar_day"]) + .agg(agg_dict) + .loc[ + lambda df: (df["occurrence_rate"] >= threshold) + & (df["count"] >= min_occurrences), + ] + ).reset_index(drop=False) + + hindu_holidays['holiday_name'] = ( + 'hindu_' + + hindu_holidays['hindu_month_number'].astype(str).str.pad(2, side='left', fillchar="0") + + "_" + + hindu_holidays['lunar_day'].astype(str).str.pad(2, side='left', fillchar="0") + ) + else: + hindu_holidays = None return ( day_holidays, wkdom_holidays, @@ -983,6 +1009,7 @@ def anomaly_df_to_holidays( lunar_weekday, islamic_holidays, hebrew_holidays, + hindu_holidays, ) @@ -998,6 +1025,7 @@ def dates_to_holidays( lunar_weekday=None, islamic_holidays=None, hebrew_holidays=None, + hindu_holidays=None, max_features: int = None, ): """Populate date information for a given pd.DatetimeIndex. @@ -1030,6 +1058,7 @@ def dates_to_holidays( lunar_weekday, islamic_holidays, hebrew_holidays, + hindu_holidays, ]: if holiday_df is not None: if not holiday_df.empty: @@ -1244,6 +1273,7 @@ def holiday_new_params(method='random'): 'use_lunar_weekday': random.choices([True, False], [0.05, 0.95])[0], 'use_islamic_holidays': random.choices([True, False], [0.1, 0.9])[0], 'use_hebrew_holidays': random.choices([True, False], [0.1, 0.9])[0], + 'use_hindu_holidays': random.choices([True, False], [0.1, 0.9])[0], } def gaussian_mixture(df, n_components=2, tol=1e-3, max_iter=100, responsibility_threshold=0.05): diff --git a/autots/tools/calendar.py b/autots/tools/calendar.py index 40e18b90..dc7b15c4 100644 --- a/autots/tools/calendar.py +++ b/autots/tools/calendar.py @@ -250,3 +250,60 @@ def gregorian_to_hebrew(dates): ) break return pd.concat(date_list, axis=0).rename_axis(index='date') + + +def gregorian_to_hindu(datetime_index): + """Convert a pandas DatetimeIndex to Hindu calendar date components. + Hindu calendar has numerous regional variations. + + Used an llm to put this one together. + It gets the dates wrong, but it does appear to have correlated consistency so may still work for modeling. + Suggestions for improvement welcomed. + """ + if isinstance(datetime_index, (str, list)): + datetime_input = pd.to_datetime(datetime_index).sort_values() + else: + datetime_input = datetime_index.sort_values() + # Expand date range to cover previous year for new moons + expanded_dates = pd.date_range( + datetime_input[0] - pd.Timedelta(days=365), datetime_input[-1], freq='D' + ) + min_year = np.min(expanded_dates.year) + # Get moon phases + moon_df = moon_phase_df(expanded_dates, epoch=2444238.5) + # Use new moon dates to define lunar months (Amanta system) + lunar_months = lunar_from_lunar(moon_df['new_moon']) + # Merge with expanded dates + expanded_dates = pd.concat( + [pd.Series(0, index=expanded_dates, name="date"), lunar_months], axis=1 + ) + expanded_dates['syear'] = expanded_dates['syear'].ffill() + expanded_dates['lunar_month'] = expanded_dates['lunar_month'].ffill() + # Calculate lunar day (tithi) + expanded_dates['lunar_day'] = ( + expanded_dates.groupby(['syear', 'lunar_month']).cumcount() + 1 + ) + expanded_dates['lunar_year'] = expanded_dates['syear'] + min_year + # Assign approximate Hindu month names + hindu_month_names = { + 1: 'Chaitra', + 2: 'Vaishakha', + 3: 'Jyeshtha', + 4: 'Ashadha', + 5: 'Shravana', + 6: 'Bhadrapada', + 7: 'Ashwin', + 8: 'Kartika', + 9: 'Margashirsha', + 10: 'Pausha', + 11: 'Magha', + 12: 'Phalguna', + } + # Adjust lunar_month to fit within 12 months + expanded_dates['hindu_month_number'] = ((expanded_dates['lunar_month'] - 1) % 12) + 1 + expanded_dates['hindu_month_name'] = expanded_dates['hindu_month_number'].map(hindu_month_names) + # Return the data for the input dates + return expanded_dates.loc[ + datetime_input, + ['lunar_year', 'hindu_month_number', 'hindu_month_name', 'lunar_day'] + ].rename_axis(index='date') diff --git a/tests/test_calendar_holiday.py b/tests/test_calendar_holiday.py index adfe8b11..1ae639d8 100644 --- a/tests/test_calendar_holiday.py +++ b/tests/test_calendar_holiday.py @@ -5,7 +5,7 @@ import numpy as np import pandas as pd from autots import load_daily -from autots.tools.calendar import gregorian_to_chinese, gregorian_to_islamic, gregorian_to_hebrew +from autots.tools.calendar import gregorian_to_chinese, gregorian_to_islamic, gregorian_to_hebrew, gregorian_to_hindu from autots.tools.lunar import moon_phase from autots.tools.holiday import holiday_flag from autots.tools.seasonal import date_part @@ -70,6 +70,23 @@ def test_hebrew(self): self.assertEqual(result4, [5761, 10, 5]) self.assertEqual(result5, [5800, 2, 2]) + def test_hindu(self): + # Diwali in 2021 was on November 4, 2021 + date = pd.to_datetime(['2021-11-04']) + result = gregorian_to_hindu(date) + # expected_month_name = 'Kartika' + # expected_lunar_day = 30 # Amavasya is typically the 30th day + # self.assertEqual(result.iloc[0]['hindu_month_name'], expected_month_name) + # self.assertEqual(result.iloc[0]['lunar_day'], expected_lunar_day) + + # Diwali in 2024 was on October 31, 2024 + date = pd.to_datetime(['2024-10-31']) + result = gregorian_to_hindu(date) # noqa + # expected_month_name = 'Kartika' + # expected_lunar_day = 30 # Amavasya is typically the 30th day + # self.assertEqual(result.iloc[0]['hindu_month_name'], expected_month_name) + # self.assertEqual(result.iloc[0]['lunar_day'], expected_lunar_day) + class TestHolidayFlag(unittest.TestCase):