Skip to content

Commit

Permalink
Merge pull request #2 from LeadsOnTrees/add-return-report
Browse files Browse the repository at this point in the history
Add return report
  • Loading branch information
tadasgedgaudas authored Feb 9, 2025
2 parents e3b7e7c + 21c723a commit 98929bf
Show file tree
Hide file tree
Showing 15 changed files with 385 additions and 42 deletions.
15 changes: 15 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,21 @@ uv run research https://example.com --max-results 30
However, the more pages you scrape the more data LLM will have to give you better insights.


Installing:

```
pip install git+https://github.com/LeadsOnTrees/company-researcher-agent.git
```

And then running

```python
from company_researcher import CompanyResearcher

researcher = CompanyResearcher(url="https://example.com")
report = await researcher.research()
```

# What is LeadsOnTrees?

LeadsOnTrees is a platform which aggregates all VC funded startups and their founders.
Expand Down
6 changes: 3 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
[project]
name = "company-researcher"
version = "0.1.0"
version = "1.0.0"
description = "Add your description here"
readme = "README.md"
requires-python = ">=3.12"
requires-python = ">=3.10"
dependencies = [
"beautifulsoup4>=4.12.3",
"instructor>=1.7.2",
"jinja2>=3.1.5",
"playwright>=1.49.1",
"playwright>=1",
"pydantic>=2.10.6",
"pydantic-settings>=2.7.1",
"tenacity>=9.0.0",
Expand Down
3 changes: 3 additions & 0 deletions src/company_researcher/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from company_researcher.main import CompanyResearcher

__all__ = ["CompanyResearcher"]
2 changes: 1 addition & 1 deletion src/company_researcher/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,6 @@ def validate_llm_api_keys(self):
class Config:
env_file = ".env"
env_file_encoding = "utf-8"

extra = "ignore"

config = Settings()
33 changes: 22 additions & 11 deletions src/company_researcher/main.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from collections import defaultdict
from company_researcher.models.models import Response
from company_researcher.models import Report, Response
from company_researcher.modules.apis.lot_funding import LeadsOnTreesClient
from company_researcher.modules.prompts.career_generator import CareersInfoGenerator
from company_researcher.modules.prompts.company_description import (
Expand All @@ -25,11 +25,13 @@ def __init__(
max_depth: int = 5,
max_concurrent: int = 8,
max_results: int = 100,
generate_pdf: bool = True,
) -> None:
self.url = url
self.max_depth = max_depth
self.max_concurrent = max_concurrent
self.max_results = max_results
self.generate_pdf = generate_pdf

async def deep_crawl(self, url: str) -> list[Response]:
worker = ScraperWorker(
Expand Down Expand Up @@ -59,7 +61,7 @@ async def load_page_types_to_responses(
page_types[page_type].append(response)
return page_types

async def research(self) -> None:
async def research(self) -> Report:
logger.info("Starting deep crawl for URL: %s", self.url)
responses = await self.deep_crawl(self.url)
logger.info("Found %d responses", len(responses))
Expand Down Expand Up @@ -97,14 +99,23 @@ async def research(self) -> None:
company_description = await company_description_generator.generate()
logger.info("Generated company description")

# Generate PDF report.
logger.info("Generating PDF report")
pdf_generator = PDFReport(
report_title, company_description, careers_info, funding_data
report = Report(
title=report_title,
company_description=company_description,
careers_info=careers_info,
funding_data=funding_data,
)
pdf_path = await pdf_generator.generate()
logger.info("PDF report generated: %s", pdf_path)

# Open the generated PDF report.
logger.info("Opening PDF report")
pdf_generator.open()
if self.generate_pdf:
# Generate PDF report.
logger.info("Generating PDF report")
pdf_generator = PDFReport(report)

pdf_path = await pdf_generator.generate()
logger.info("PDF report generated: %s", pdf_path)

# Open the generated PDF report.
logger.info("Opening PDF report")
pdf_generator.open()

return report
4 changes: 4 additions & 0 deletions src/company_researcher/models/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from company_researcher.models.response import Response
from company_researcher.models.report import Report

__all__ = ["Response", "Report"]
9 changes: 0 additions & 9 deletions src/company_researcher/models/models.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,6 @@
from pydantic import BaseModel


class Response(BaseModel):
status_code: int
html: str
text: str
urls: list[str]
title: str | None
url: str


class FundingData(BaseModel):
company_name: str
funding_amount: float
Expand Down
12 changes: 12 additions & 0 deletions src/company_researcher/models/report.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from pydantic import BaseModel

from company_researcher.models.models import FundingData
from company_researcher.modules.prompts.career_generator import JobDescription
from company_researcher.modules.prompts.company_description import CompanyDescription


class Report(BaseModel):
title: str
company_description: CompanyDescription
careers_info: list[JobDescription]
funding_data: list[FundingData]
10 changes: 10 additions & 0 deletions src/company_researcher/models/response.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from pydantic import BaseModel


class Response(BaseModel):
status_code: int
html: str
text: str
urls: list[str]
title: str | None
url: str
2 changes: 1 addition & 1 deletion src/company_researcher/modules/prompts/career_generator.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from pydantic import BaseModel, Field

from company_researcher.models.models import Response
from company_researcher.models import Response
from company_researcher.modules.ai.llm import AI


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from typing import List
import textwrap

from company_researcher.models.models import Response
from company_researcher.models import Response
from company_researcher.modules.ai.llm import AI
from company_researcher.modules.prompts.career_generator import JobDescription

Expand Down
17 changes: 6 additions & 11 deletions src/company_researcher/reports/pdf_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,29 +5,24 @@

from pathlib import Path

from company_researcher.models.models import FundingData
from company_researcher.modules.prompts.career_generator import JobDescription
from company_researcher.modules.prompts.company_description import CompanyDescription
from company_researcher.models import Report


class PDFReport:
def __init__(
self,
title: str,
company_description: CompanyDescription,
careers: list[JobDescription],
funding_data: list[FundingData],
report: Report,
filename=None,
):
base_filename = (
filename or f"company_report_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
)
self.title = title
self.title = report.title
self.html_path = str(Path.cwd() / "reports" / f"{base_filename}.html")
self.pdf_path = str(Path.cwd() / "reports" / f"{base_filename}.pdf")
self.company_description = company_description
self.careers = careers
self.funding_data = funding_data
self.company_description = report.company_description
self.careers = report.careers_info
self.funding_data = report.funding_data

def _generate_html(self):
"""Generate HTML report using Jinja2 template"""
Expand Down
2 changes: 1 addition & 1 deletion src/company_researcher/scrapers/scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from tenacity import retry, stop_after_attempt, wait_exponential
import asyncio

from company_researcher.models.models import Response
from company_researcher.models import Response
from company_researcher.scrapers.parser import Parser

logger = logging.getLogger(__name__)
Expand Down
2 changes: 1 addition & 1 deletion src/company_researcher/scrapers/worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import logging
from typing import Set
from company_researcher.scrapers.scraper import Scraper, ScraperException
from company_researcher.models.models import Response
from company_researcher.models import Response


logger = logging.getLogger(__name__)
Expand Down
Loading

0 comments on commit 98929bf

Please sign in to comment.