Skip to content

Commit

Permalink
[pre-commit.ci] auto fixes from pre-commit.com hooks
Browse files Browse the repository at this point in the history
for more information, see https://pre-commit.ci
  • Loading branch information
pre-commit-ci[bot] committed Jan 31, 2024
1 parent a5798bc commit 839aa6e
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 25 deletions.
44 changes: 20 additions & 24 deletions examples/confluence-reader/fetch_confluence_space.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Copyright 2021-2024 VMware, Inc.
# SPDX-License-Identifier: Apache-2.0
import logging
import json
import logging
from datetime import datetime

from langchain_community.document_loaders import ConfluenceLoader
Expand All @@ -12,42 +12,42 @@

def update_saved_documents(file_path, new_docs):
try:
with open(file_path, 'r') as file:
with open(file_path) as file:
existing_docs = json.load(file)

if isinstance(existing_docs, list):
existing_docs = {doc['id']: doc for doc in existing_docs}
existing_docs = {doc["id"]: doc for doc in existing_docs}
except (FileNotFoundError, json.JSONDecodeError):
existing_docs = {}

if not existing_docs:
with open(file_path, 'w') as file:
with open(file_path, "w") as file:
json.dump(list(new_docs), file, indent=4)
else:
for doc in new_docs:
existing_docs[doc['id']] = doc
existing_docs[doc["id"]] = doc

with open(file_path, 'w') as file:
with open(file_path, "w") as file:
json.dump(list(existing_docs.values()), file, indent=4)


def flag_deleted_pages(file_path, current_confluence_pages):
try:
with open(file_path, 'r') as file:
with open(file_path) as file:
existing_docs = json.load(file)
except (FileNotFoundError, json.JSONDecodeError):
print("File not found or invalid format. Exiting.")
return

# convert to a set of IDs for faster lookup
current_page_ids = set(page.metadata['id'] for page in current_confluence_pages)
current_page_ids = {page.metadata["id"] for page in current_confluence_pages}

# flag deleted pages
for doc in existing_docs:
if doc['id'] not in current_page_ids:
doc['deleted'] = True
if doc["id"] not in current_page_ids:
doc["deleted"] = True

with open(file_path, 'w') as file:
with open(file_path, "w") as file:
json.dump(existing_docs, file, indent=4)


Expand All @@ -60,15 +60,15 @@ def __init__(self, confluence_url, token, space_key):

def fetch_updated_pages_in_confluence_space(self):
try:
with open('last_modification.txt', 'r') as file:
with open("last_modification.txt") as file:
last_date = file.read().strip()

cql_query = f"lastModified > '{last_date}' and type = page and space = {self.space_key}"

current_date_time = datetime.now()
formatted_current_date_time = current_date_time.strftime("%Y-%m-%d %H:%M")

with open('last_modification.txt', 'w') as file:
with open("last_modification.txt", "w") as file:
file.write(formatted_current_date_time)

documents = self.loader.load(cql=cql_query, limit=5, max_pages=5)
Expand All @@ -93,19 +93,13 @@ def fetch_all_pages_in_confluence_space(self):
def run(job_input: IJobInput):
log.info(f"Starting job step {__name__}")

confluence_url = job_input.get_property(
"confluence_url", ""
)
token = job_input.get_property(
"confluence_token", ""
)
space_key = job_input.get_property(
"confluence_space_key", ""
)
confluence_url = job_input.get_property("confluence_url", "")
token = job_input.get_property("confluence_token", "")
space_key = job_input.get_property("confluence_space_key", "")

confluence_reader = ConfluenceDataSource(confluence_url, token, space_key)

file_path = 'confluence_data.json'
file_path = "confluence_data.json"

# check updatesfd
docs = confluence_reader.fetch_updated_pages_in_confluence_space()
Expand All @@ -115,7 +109,9 @@ def run(job_input: IJobInput):
update_saved_documents(file_path, docs_metadata)

# check for deletions
flag_deleted_pages(file_path, confluence_reader.fetch_all_pages_in_confluence_space())
flag_deleted_pages(
file_path, confluence_reader.fetch_all_pages_in_confluence_space()
)


# ignore pictures for the comment but we need to generally handle them
2 changes: 1 addition & 1 deletion examples/confluence-reader/last_modification.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
2000-01-01 00:00
2000-01-01 00:00

0 comments on commit 839aa6e

Please sign in to comment.