From 3669e28b0a3d51f729c16355a6696665047a0f9c Mon Sep 17 00:00:00 2001 From: Nemo Date: Tue, 28 Jan 2025 15:51:53 +0530 Subject: [PATCH] Add AMFI Fund Details --- .gitignore | 482 +++++++++++++++++++++++++++++++++++++++++++++++ .python-version | 1 + Makefile | 12 +- pyproject.toml | 10 + requirements.txt | 12 ++ src/amc.py | 94 +++++++++ 6 files changed, 608 insertions(+), 3 deletions(-) create mode 100644 .gitignore create mode 100644 .python-version create mode 100644 pyproject.toml create mode 100644 requirements.txt create mode 100644 src/amc.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e96e03d --- /dev/null +++ b/.gitignore @@ -0,0 +1,482 @@ +*.xlsx +uv.lock +# These are Excel-XML files +details/734.xml +details/11659.xml +details/5285.xml +details/13851.xml +details/4450.xml +details/12123.xml +details/13182.xml +details/2787.xml +details/3853.xml +details/10883.xml +details/13674.xml +details/12233.xml +details/13767.xml +details/11842.xml +details/2540.xml +details/5923.xml +details/10466.xml +details/13078.xml +details/13509.xml +details/4480.xml +details/1527.xml +details/11561.xml +details/12327.xml +details/13634.xml +details/8830.xml +details/13976.xml +details/11764.xml +details/13115.xml +details/13769.xml +details/11788.xml +details/855.xml +details/11579.xml +details/13170.xml +details/1026.xml +details/1481.xml +details/11654.xml +details/11503.xml +details/13102.xml +details/11870.xml +details/11638.xml +details/11839.xml +details/11660.xml +details/13678.xml +details/5736.xml +details/12353.xml +details/13239.xml +details/13599.xml +details/12218.xml +details/9416.xml +details/11632.xml +details/10914.xml +details/6637.xml +details/13958.xml +details/11535.xml +details/11301.xml +details/12711.xml +details/12217.xml +details/11623.xml +details/13503.xml +details/594.xml +details/13651.xml +details/12262.xml +details/13824.xml +details/9243.xml +details/13068.xml +details/13164.xml +details/12234.xml +details/12751.xml +details/12699.xml +details/13938.xml +details/13171.xml +details/12112.xml +details/11781.xml +details/2688.xml +details/11569.xml +details/11803.xml +details/12306.xml +details/9464.xml +details/11823.xml +details/7179.xml +details/11472.xml +details/13070.xml +details/1022.xml +details/12367.xml +details/11815.xml +details/10933.xml +details/8318.xml +details/11847.xml +details/6636.xml +details/11896.xml +details/5009.xml +details/4701.xml +details/13165.xml +details/1024.xml +details/11670.xml +details/13559.xml +details/12081.xml +details/10797.xml +details/12305.xml +details/11733.xml +details/13583.xml +details/712.xml +details/1465.xml +details/1256.xml +details/136.xml +details/19.xml +details/11714.xml +details/11650.xml +details/826.xml +details/2922.xml +details/6642.xml +details/11898.xml +details/4356.xml +details/4479.xml +details/2082.xml +details/11568.xml +details/11804.xml +details/9087.xml +details/13577.xml +details/12341.xml +details/11876.xml +details/11581.xml +details/12713.xml +details/134.xml +details/13774.xml +details/4477.xml +details/4515.xml +details/13953.xml +details/132.xml +details/8475.xml +details/13181.xml +details/14083.xml +details/14018.xml +details/1475.xml +details/11192.xml +details/13500.xml +details/5400.xml +details/13094.xml +details/11874.xml +details/1474.xml +details/8803.xml +details/9468.xml +details/12245.xml +details/11969.xml +details/1278.xml +details/17.xml +details/1555.xml +details/13218.xml +details/11627.xml +details/11195.xml +details/12333.xml +details/18.xml +details/2081.xml +details/13585.xml +details/1025.xml +details/13551.xml +details/1437.xml +details/1018.xml +details/11218.xml +details/13192.xml +details/11497.xml +details/11297.xml +details/7304.xml +details/1020.xml +details/8541.xml +details/11712.xml +details/12180.xml +details/14007.xml +details/12056.xml +details/12708.xml +details/13819.xml +details/12371.xml +details/6638.xml +details/11849.xml +details/11895.xml +details/10989.xml +details/11501.xml +details/11525.xml +details/12136.xml +details/6648.xml +details/539.xml +details/6663.xml +details/12272.xml +details/11977.xml +details/11744.xml +details/11240.xml +details/12067.xml +details/13960.xml +details/13173.xml +details/527.xml +details/13865.xml +details/8993.xml +details/1571.xml +details/13822.xml +details/13930.xml +details/11710.xml +details/13110.xml +details/11723.xml +details/13156.xml +details/14021.xml +details/1268.xml +details/7846.xml +details/130.xml +details/13691.xml +details/954.xml +details/6687.xml +details/12698.xml +details/11567.xml +details/1479.xml +details/12372.xml +details/11946.xml +details/1021.xml +details/11897.xml +details/12697.xml +details/13631.xml +details/11833.xml +details/13125.xml +details/11524.xml +details/11899.xml +details/12369.xml +details/12700.xml +details/9531.xml +details/11704.xml +details/20.xml +details/13789.xml +details/461.xml +details/11655.xml +details/11901.xml +details/4267.xml +details/12061.xml +details/12155.xml +details/13232.xml +details/13191.xml +details/13100.xml +details/3875.xml +details/11798.xml +details/5385.xml +details/583.xml +details/14057.xml +details/12328.xml +details/10846.xml +details/13238.xml +details/13660.xml +details/12331.xml +details/13883.xml +details/13966.xml +details/1504.xml +details/11883.xml +details/14060.xml +details/13840.xml +details/11759.xml +details/4225.xml +details/2899.xml +details/11671.xml +details/13610.xml +details/11884.xml +details/11217.xml +details/511.xml +details/11450.xml +details/4556.xml +details/1114.xml +details/13975.xml +details/11625.xml +details/11735.xml +details/13504.xml +details/8729.xml +details/12253.xml +details/13598.xml +details/11907.xml +details/11566.xml +details/13104.xml +details/13096.xml +details/9003.xml +details/12727.xml +details/11852.xml +details/11893.xml +details/11328.xml +details/4565.xml +details/15.xml +details/13596.xml +details/6634.xml +details/11717.xml +details/1145.xml +details/11639.xml +details/796.xml +details/11984.xml +details/11496.xml +details/12738.xml +details/13124.xml +details/1700.xml +details/11779.xml +details/11322.xml +details/11681.xml +details/10454.xml +details/3358.xml +details/11812.xml +details/79.xml +details/11548.xml +details/11857.xml +details/11858.xml +details/13688.xml +details/12726.xml +details/13795.xml +details/12036.xml +details/4313.xml +details/14087.xml +details/6639.xml +details/11266.xml +details/11713.xml +details/12113.xml +details/11749.xml +details/10939.xml +details/12718.xml +details/11999.xml +details/4508.xml +details/1221.xml +details/12247.xml +details/4481.xml +details/4486.xml +details/4478.xml +details/10593.xml +details/1019.xml +details/11673.xml +details/11727.xml +details/12270.xml +details/133.xml +details/1473.xml +details/1027.xml +details/460.xml +details/13499.xml +details/12294.xml +details/13593.xml +details/769.xml +details/12308.xml +details/11935.xml +details/13101.xml +details/13105.xml +details/3774.xml +details/6688.xml +details/11676.xml +details/8769.xml +details/11892.xml +details/13951.xml +details/13790.xml +details/14075.xml +details/13531.xml +details/11640.xml +details/6123.xml +details/510.xml +details/11708.xml +details/13158.xml +details/13896.xml +details/13234.xml +details/11277.xml +details/11642.xml +details/9443.xml +details/14025.xml +details/526.xml +details/753.xml +details/1485.xml +details/13917.xml +details/11954.xml +details/12295.xml +details/12026.xml +details/8601.xml +details/11171.xml +details/5001.xml +details/12171.xml +details/2920.xml +details/11502.xml +details/11294.xml +details/7653.xml +details/4728.xml +details/12732.xml +details/5713.xml +details/13989.xml +details/710.xml +details/9104.xml +details/13501.xml +details/11183.xml +details/12340.xml +details/6900.xml +details/8332.xml +details/12731.xml +details/11375.xml +details/11474.xml +details/12140.xml +details/7124.xml +details/11498.xml +details/13528.xml +details/13174.xml +details/11785.xml +details/80.xml +details/11512.xml +details/12259.xml +details/22.xml +details/1023.xml +details/3735.xml +details/14082.xml +details/13128.xml +details/12101.xml +details/770.xml +details/11900.xml +details/11859.xml +details/447.xml +details/6641.xml +details/10956.xml +details/12320.xml +details/11641.xml +details/12183.xml +details/13614.xml +details/11989.xml +details/5559.xml +details/12332.xml +details/1640.xml +details/13077.xml +details/12703.xml +details/783.xml +details/2014.xml +details/11633.xml +details/8777.xml +details/1476.xml +details/12071.xml +details/129.xml +details/1068.xml +details/12161.xml +details/701.xml +details/11235.xml +details/11750.xml +details/11703.xml +details/11789.xml +details/11818.xml +details/131.xml +details/11800.xml +details/1480.xml +details/11971.xml +details/709.xml +details/8630.xml +details/4371.xml +details/12220.xml +details/11618.xml +details/12366.xml +details/6479.xml +details/13118.xml +details/11484.xml +details/3445.xml +details/538.xml +details/11624.xml +details/11415.xml +details/14046.xml +details/14017.xml +details/5691.xml +details/11669.xml +details/4353.xml +details/8338.xml +details/622.xml +details/10440.xml +details/11209.xml +details/11481.xml +details/11972.xml +details/11187.xml +details/11737.xml +details/11831.xml +details/12028.xml +details/11770.xml +details/11304.xml +details/11169.xml +details/11517.xml +details/11753.xml +details/12151.xml +details/11615.xml +details/458.xml +details/13219.xml +details/13114.xml +details/135.xml +details/11292.xml +details/4687.xml +details/13803.xml +details/11807.xml diff --git a/.python-version b/.python-version new file mode 100644 index 0000000..e4fba21 --- /dev/null +++ b/.python-version @@ -0,0 +1 @@ +3.12 diff --git a/Makefile b/Makefile index 3286e3f..41d812a 100644 --- a/Makefile +++ b/Makefile @@ -1,11 +1,17 @@ -kuvera: +kuvera.csv: curl --retry 10 \ --connect-timeout 30 \ --retry-max-time 100 \ --silent https://api.kuvera.in/mf/api/v4/fund_schemes/list.json | \ jq -r '.[] | .[] | .[] | .[] | .c' | sort -u | xargs -n150 src/fetch.sh sort -uo _.csv _.csv - cat src/header.csv _.csv > data.csv + cat src/header.csv _.csv > kuvera.csv rm _.csv -all: kuvera \ No newline at end of file +all: kuvera.csv amfi_funds.csv + +amfi_funds.csv: + python src/amc.py + +requirements.txt: + uv pip compile pyproject.toml \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..4f96622 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,10 @@ +[project] +name = "kuvera-mutual-funds-lookup" +version = "0.1.0" +description = "Add your description here" +readme = "README.md" +requires-python = ">=3.12" +dependencies = [ + "lxml>=5.3.0", + "requests>=2.32.3", +] diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..343cab4 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,12 @@ +# This file was autogenerated by uv via the following command: +# uv pip compile pyproject.toml +certifi==2024.12.14 + # via requests +charset-normalizer==3.4.1 + # via requests +idna==3.10 + # via requests +requests==2.32.3 + # via kuvera-mutual-funds-lookup (pyproject.toml) +urllib3==2.3.0 + # via requests diff --git a/src/amc.py b/src/amc.py new file mode 100644 index 0000000..ab33d4a --- /dev/null +++ b/src/amc.py @@ -0,0 +1,94 @@ +import requests +import csv +import lxml.etree +import json +import os +from typing import List, Tuple + + +def generate_amc_ids(): + """Fetch AMC IDs from AMFI website.""" + url = "https://www.amfiindia.com/research-information/other-data/scheme-details" + response = requests.get(url) + response.raise_for_status() + + # Find all option values in the HTML content + content = response.text + amc_ids = [] + + # Simple parsing to extract values from option tags + for line in content.split('\n'): + if 'option value="' in line: + try: + value = line.split('option value="')[1].split('"')[0] + fund_name = line.split('option value="')[1].split('>')[1].split('<')[0] + if value.isdigit(): + yield (value, fund_name) + except IndexError: + continue + + return sorted(amc_ids) + + +def fetch_fund_ids(amc_id: str) -> List[Tuple[str, str]]: + """Fetch fund IDs for a given AMC ID.""" + url = 'https://www.amfiindia.com/modules/FetchSchemeFromMFID' + headers = { + "X-Requested-With": "XMLHttpRequest" + } + data = { + "ID": amc_id + } + + response = requests.post(url, headers=headers, data=data) + response.raise_for_status() + + funds = response.json() + funds_list = [(fund['Value'], fund['Text']) for fund in funds if int(fund['Value']) > 0] + # The MF is no longer alive + if len(funds_list) > 0: + if funds_list[0][0] == "-1": + return [] + else: + return funds_list + else: + return [] + + +def write_to_csv(data: List[Tuple[str, str]], filename: str = 'amfi_funds.csv'): + """Write AMC and Fund IDs to CSV file.""" + with open(filename, 'w', newline='') as csvfile: + writer = csv.writer(csvfile) + writer.writerow(['AMC_ID', 'MF_ID', 'MF_NAME']) # Write headers + writer.writerows(data) + + +def download_fund_ssd(fund_id: str, fund_name: str, amc_name: str): + file_name = f'details/{fund_id}.xml' + if not os.path.exists(file_name): + url = f"https://portal.amfiindia.com/spages/SSD_{fund_id}.xml" + response = requests.get(url) + + if response.status_code == 404: + return + elif response.status_code != 200: + response.raise_for_status() + + with open(file_name, 'wb') as f: + f.write(response.content) + print(f"\"{amc_name}\", {fund_id}, \"{fund_name}\", DOWNLOADED") + + +def main(): + + all_data = [] + for amc_id, amc_name in generate_amc_ids(): + for fund_id, fund_name in fetch_fund_ids(amc_id): + download_fund_ssd(fund_id, fund_name, amc_name) + all_data.append((amc_id, fund_id, fund_name)) + + write_to_csv(all_data) + + +if __name__ == "__main__": + main()