Skip to content

Commit 77113eb

Browse files
committed
update
1 parent 22ff8e9 commit 77113eb

File tree

10 files changed

+197
-21
lines changed

10 files changed

+197
-21
lines changed

Makefile

+32-4
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
1+
ENV = dev
12

23
.PHONY: test
34
test:
45
pytest -s .
56

67
.PHONY: invoke_lambda
78
invoke_lambda:
8-
aws lambda invoke --function-name dev-youtube-comment-sentiment-analysis --cli-binary-format raw-in-base64-out --payload file://events/event.json output.json && cat output.json
9+
aws lambda invoke --function-name ${ENV}-youtube-comment-sentiment-analysis --cli-binary-format raw-in-base64-out --payload file://events/event.json output.json && cat output.json
910

1011
.ONESHELL:
1112
.PHONY: build-lambda
@@ -37,6 +38,33 @@ terraform-apply:
3738
terraform-destroy:
3839
terraform -chdir=terraform destroy
3940

40-
.PHONY: dev-workspace
41-
dev-workspace:
42-
terraform -chdir=terraform workspace select dev
41+
.PHONY: workspace
42+
workspace:
43+
terraform -chdir=terraform workspace select ${ENV}
44+
45+
.PHONY: create-table-bucket
46+
delete-table-bucket:
47+
aws s3tables delete-table \
48+
--table-bucket-arn arn:aws:s3tables:us-east-1:639269844451:bucket/${ENV}-youtube-comment-metastore \
49+
--namespace aws_s3_metadata \
50+
--name dev_youtube_comments_monitoring \
51+
--region us-east-1
52+
53+
aws s3tables delete-table-bucket \
54+
--region us-east-1 \
55+
--table-bucket-arn arn:aws:s3tables:us-east-1:639269844451:bucket/${ENV}-youtube-comment-metastore
56+
57+
.PHONY: delete-metadata-table-config
58+
delete-metadata-table-config:
59+
aws s3api delete-bucket-metadata-table-configuration \
60+
--bucket ${ENV}-youtube-comment-storage \
61+
--region us-east-1
62+
63+
.PHONY: create-metadata-table-config
64+
create-metadata-table-config:
65+
aws s3api create-bucket-metadata-table-configuration \
66+
--bucket ${ENV}-youtube-comment-storage \
67+
--metadata-table-configuration \
68+
"S3TablesDestination={TableBucketArn=arn:aws:s3tables:us-east-1:639269844451:bucket/${ENV}-youtube-comment-metastore,TableName=${ENV}_youtube_comments_monitoring}" \
69+
--region us-east-1
70+

function/src/app.py

+27-13
Original file line numberDiff line numberDiff line change
@@ -57,31 +57,37 @@ def fetch_comments_page(
5757
response.raise_for_status()
5858
return response.json()
5959

60-
@staticmethod
61-
def format_comment(comment_data: dict) -> dict:
60+
@tracer.capture_method
61+
def format_comment(
62+
self,
63+
comment_data: dict,
64+
additional_data: dict = {},
65+
) -> dict:
6266
"""Format a single comment for storage."""
6367

64-
author_channel_id = (
65-
comment_data["snippet"].get("authorChannelId", {}).get("value", "none")
66-
)
67-
6868
snippet_snake_case = {}
6969
for key, value in comment_data["snippet"].items():
7070
new_key = inflection.underscore(key)
7171
snippet_snake_case[new_key] = value
7272

73+
author_channel_id = (
74+
comment_data["snippet"].get("authorChannelId", {}).get("value", "none")
75+
)
76+
7377
data = {
7478
"id": comment_data["id"],
7579
**snippet_snake_case,
76-
"author_channel_id": author_channel_id,
80+
**additional_data,
7781
"parent_id": comment_data.get("parentId", "none"),
78-
"fetched_at": datetime.now().isoformat(),
82+
"author_channel_id": author_channel_id,
7983
}
8084

8185
return data
8286

8387
@tracer.capture_method
84-
def retrieve_comments_from_youtube(self, video_id: str, api_key: str) -> list[dict]:
88+
def retrieve_comments_from_youtube(
89+
self, video_id: str, api_key: str, additional_data: dict = None
90+
) -> list[dict]:
8591
"""Retrieve all comments for a given video."""
8692

8793
comments = []
@@ -95,16 +101,16 @@ def retrieve_comments_from_youtube(self, video_id: str, api_key: str) -> list[di
95101
for item in response_data["items"]:
96102
# Process top-level comment
97103
top_level_comment = self.format_comment(
98-
item["snippet"]["topLevelComment"]
104+
item["snippet"]["topLevelComment"],
105+
additional_data,
99106
)
100107

101108
comments.append(top_level_comment)
102109

103110
# Process replies
104111
if "replies" in item:
105112
for reply in item["replies"]["comments"]:
106-
reply_comment = self.format_comment(reply)
107-
113+
reply_comment = self.format_comment(reply, additional_data)
108114
comments.append(reply_comment)
109115

110116
next_page_token = response_data.get("nextPageToken")
@@ -191,6 +197,7 @@ class Action(str, Enum):
191197
class Event(BaseModel):
192198
video_id: str
193199
action: Action
200+
execution_id: str
194201

195202

196203
handler = YouTubeCommentsHandler()
@@ -205,6 +212,7 @@ def lambda_handler(event: Event, context: LambdaContext) -> dict:
205212

206213
video_id = event.video_id
207214
action = event.action
215+
execution_id = event.execution_id
208216

209217
match action:
210218
case Action.INSERT:
@@ -213,7 +221,13 @@ def lambda_handler(event: Event, context: LambdaContext) -> dict:
213221
api_key = parameters.get_secret(YOUTUBE_API_KEY_SECRET_NAME)
214222

215223
# Retrieve and process comments
216-
comments = handler.retrieve_comments_from_youtube(video_id, api_key)
224+
additional_data = {
225+
"execution_id": execution_id,
226+
"fetched_at": datetime.now().isoformat(),
227+
}
228+
comments = handler.retrieve_comments_from_youtube(
229+
video_id, api_key, additional_data=additional_data
230+
)
217231

218232
# Batch detect sentiment
219233
comments_with_sentiment = handler.batch_detect_sentiment(comments)

terraform/main.tf

+7-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ locals {
55
module "lambda" {
66
source = "./modules/lambda"
77
env = local.env
8-
function_name = "${local.env}-youtube-comment-management"
8+
function_name = "${local.env}-youtube-comment-processor"
99
bucket_name = module.s3.bucket_name
1010
function_dir = "${abspath(path.root)}/../function"
1111
service_name = var.service_name
@@ -25,3 +25,9 @@ module "glue" {
2525
table_name = "${local.env}_youtube_comments_analytics"
2626
bucket_name = module.s3.bucket_name
2727
}
28+
29+
module "sfn" {
30+
source = "./modules/sfn"
31+
state_machine_name = "${local.env}-youtube-comment-processor"
32+
lambda_arn = module.lambda.function_arn
33+
}

terraform/modules/glue/main.tf

+5
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,11 @@ resource "aws_glue_catalog_table" "table" {
8989
type = "timestamp"
9090
}
9191

92+
columns {
93+
name = "execution_id"
94+
type = "string"
95+
}
96+
9297
columns {
9398
name = "sentiment"
9499
type = "string"

terraform/modules/lambda/main.tf

+4-3
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ resource "aws_lambda_function" "function" {
6262
}
6363

6464
resource "aws_iam_role" "lambda_role" {
65-
name = "${var.function_name}-role"
65+
name = "${var.function_name}-lambda-execution-role"
6666
assume_role_policy = jsonencode({
6767
Version = "2012-10-17"
6868
Statement = [
@@ -77,7 +77,7 @@ resource "aws_iam_role" "lambda_role" {
7777

7878

7979
resource "aws_iam_policy" "lambda_policy" {
80-
name = "${var.function_name}-policy"
80+
name = "${var.function_name}-lambda-execution-policy"
8181
description = "Policy for Lambda function ${var.function_name}"
8282
policy = jsonencode({
8383
Version = "2012-10-17"
@@ -86,7 +86,8 @@ resource "aws_iam_policy" "lambda_policy" {
8686
Effect = "Allow"
8787
Action = [
8888
"s3:GetObject",
89-
"s3:PutObject"
89+
"s3:PutObject",
90+
"s3:DeleteObject"
9091
]
9192
Resource = [
9293
"arn:aws:s3:::${var.bucket_name}",
File renamed without changes.

terraform/modules/sfn/main.tf

+51
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
2+
resource "aws_sfn_state_machine" "state_machine" {
3+
name = var.state_machine_name
4+
role_arn = aws_iam_role.state_machine_role.arn
5+
6+
definition = templatefile("${path.module}/state_machine.json", {
7+
lambda_arn = var.lambda_arn
8+
})
9+
}
10+
11+
12+
resource "aws_iam_role" "state_machine_role" {
13+
name = "${var.state_machine_name}-sfn-execution-role"
14+
assume_role_policy = jsonencode({
15+
Version = "2012-10-17"
16+
Statement = [
17+
{
18+
Action = "sts:AssumeRole",
19+
Effect = "Allow",
20+
Principal = {
21+
Service = "states.amazonaws.com"
22+
}
23+
}
24+
]
25+
})
26+
}
27+
28+
resource "aws_iam_policy" "state_machine_policy" {
29+
name = "${var.state_machine_name}-sfn-execution-policy"
30+
description = "Policy for Step Functions state machine ${var.state_machine_name}"
31+
policy = jsonencode({
32+
Version = "2012-10-17",
33+
Statement = [
34+
{
35+
Effect = "Allow",
36+
Action = "lambda:InvokeFunction",
37+
Resource = "${var.lambda_arn}:*"
38+
},
39+
{
40+
Effect = "Allow",
41+
Action = "states:StartExecution"
42+
Resource = aws_sfn_state_machine.state_machine.arn
43+
}
44+
]
45+
})
46+
}
47+
48+
resource "aws_iam_role_policy_attachment" "state_machine_policy_attachment" {
49+
role = aws_iam_role.state_machine_role.name
50+
policy_arn = aws_iam_policy.state_machine_policy.arn
51+
}

terraform/modules/sfn/outputs.tf

+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
2+
output "sfn_state_machine_name" {
3+
value = aws_sfn_state_machine.state_machine.name
4+
}
5+
6+
output "sfn_state_machine_arn" {
7+
value = aws_sfn_state_machine.state_machine.arn
8+
}
+55
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
{
2+
"Comment": "A Hello World example of the Amazon States Language using an AWS Lambda Function",
3+
"StartAt": "Map Videoid",
4+
"States": {
5+
"Map Videoid": {
6+
"Type": "Map",
7+
"ItemProcessor": {
8+
"ProcessorConfig": {
9+
"Mode": "DISTRIBUTED",
10+
"ExecutionType": "STANDARD"
11+
},
12+
"StartAt": "Process Comments",
13+
"States": {
14+
"Process Comments": {
15+
"Type": "Task",
16+
"Resource": "arn:aws:states:::lambda:invoke",
17+
"OutputPath": "$.Payload",
18+
"Parameters": {
19+
"FunctionName": "arn:aws:lambda:us-east-1:639269844451:function:dev-youtube-comment-processor:$LATEST",
20+
"Payload": {
21+
"video_id.$": "$$.Execution.Input.video_id",
22+
"action.$": "$$.Execution.Input.action",
23+
"execution_id.$": "$$.Execution.Input.execution_id"
24+
}
25+
},
26+
"Retry": [
27+
{
28+
"ErrorEquals": [
29+
"Lambda.ServiceException",
30+
"Lambda.AWSLambdaException",
31+
"Lambda.SdkClientException",
32+
"Lambda.TooManyRequestsException"
33+
],
34+
"IntervalSeconds": 1,
35+
"MaxAttempts": 3,
36+
"BackoffRate": 2,
37+
"JitterStrategy": "FULL"
38+
}
39+
],
40+
"End": true
41+
}
42+
}
43+
},
44+
"End": true,
45+
"Label": "MapVideoid",
46+
"MaxConcurrency": 100,
47+
"ItemsPath": "$.video_id",
48+
"ItemSelector": {
49+
"action.$": "$$.Execution.Input.action",
50+
"video_id.$": "$$.Map.Item.Value",
51+
"execution_id.$": "$$.Execution.Name"
52+
}
53+
}
54+
}
55+
}

terraform/modules/sfn/variables.tf

+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
variable "state_machine_name" {
2+
type = string
3+
}
4+
5+
variable "lambda_arn" {
6+
type = string
7+
}
8+

0 commit comments

Comments
 (0)