Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Hackweek: Added Makefile entry to fetch schema; eliminate type errors #1198

Merged
merged 9 commits into from
Aug 4, 2020
7 changes: 6 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
.PHONY: develop setup-git test install-python-dependencies

develop: install-python-dependencies setup-git
develop: install-python-dependencies setup-git fetchschemas

setup-git:
pip install 'pre-commit==2.4.0'
Expand All @@ -11,3 +11,8 @@ test:

install-python-dependencies:
pip install -e .

fetchschemas:
mkdir -p schema
curl https://getsentry.github.io/relay/event-schema/event.schema.json -o schema/event.schema.json
.PHONY: fetchschemas
30 changes: 20 additions & 10 deletions snuba/datasets/events_processor_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,17 @@

from jsonschema_typed import JSONSchema

EventData = JSONSchema["/Users/untitaker/projects/snuba/event.schema.json"]
EventData = JSONSchema["schema/event.schema.json"]


class Event(TypedDict):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this class is duplicate of InsertEvent below. We should have only one.

data: EventData
search_message: Any
message: Any
event_id: Any
datetime: str
group_id: str
primary_hash: str


logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -128,7 +131,7 @@ def extract_sdk(
output["sdk_integrations"] = sdk_integrations

def process_message(
self, message, metadata: Optional[KafkaMessageMetadata] = None
self, message: Event, metadata: Optional[KafkaMessageMetadata] = None
) -> Optional[ProcessedMessage]:
"""\
Process a raw message into a tuple of (action_type, processed_message):
Expand All @@ -137,11 +140,13 @@ def process_message(
Returns `None` if the event is too old to be written.
"""
action_type = None
processed: Optional[MutableMapping[str, Any]] = None

if isinstance(message, dict):
# deprecated unwrapped event message == insert
action_type = ProcessorAction.INSERT
try:

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

intentional ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nope.

processed = self.process_insert(message, metadata)
except EventTooOld:
return None
Expand Down Expand Up @@ -206,11 +211,12 @@ def process_message(

def process_insert(
self, event: Event, metadata: Optional[KafkaMessageMetadata] = None
) -> Optional[Mapping[str, Any]]:
) -> Optional[MutableMapping[str, Any]]:
if not self._should_process(event):
return None

processed = {"deleted": 0}
processed: MutableMapping[str, Any] = {"deleted": 0}

extract_project_id(processed, event)
self._extract_event_id(processed, event)
processed["retention_days"] = enforce_retention(
Expand All @@ -228,7 +234,7 @@ def process_insert(
self.extract_common(processed, event, metadata)
self.extract_custom(processed, event, metadata)

sdk = data.get("sdk", None) or {}
sdk = data.get("sdk", None) or {} # type: ignore
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What was the issue here ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"sdk" does not exists on the data object.

self.extract_sdk(processed, sdk)

tags = _as_dict_safe(data.get("tags", None))
Expand All @@ -248,9 +254,13 @@ def process_insert(
)

exception = (
data.get("exception", data.get("sentry.interfaces.Exception", None)) or {}
data.get(
"exception",
data.get("sentry.interfaces.Exception", None), # type: ignore
)
or {}
)
stacks = exception.get("values", None) or []
stacks: Sequence[Any] = exception.get("values", None) or []
self.extract_stacktraces(processed, stacks)

if metadata is not None:
Expand All @@ -273,15 +283,15 @@ def extract_common(

# Properties we get from the "data" dict, which is the actual event body.

received = _collapse_uint32(int(data["received"]))
received = _collapse_uint32(data["received"])
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The extra int here is probably a bug as int(None) fails, _collapse_uint32(None) returns None, and the subsequent code checks for None

output["received"] = (
datetime.utcfromtimestamp(received) if received is not None else None
)
output["culprit"] = _unicodify(data.get("culprit", None))
output["type"] = _unicodify(data.get("type", None))
output["version"] = _unicodify(data.get("version", None))
output["title"] = _unicodify(data.get("title", None))
output["location"] = _unicodify(data.get("location", None))
output["title"] = _unicodify(data.get("title", None)) # type: ignore
output["location"] = _unicodify(data.get("location", None)) # type: ignore

module_names = []
module_versions = []
Expand Down