Skip to content

Commit

Permalink
create strx section and transcoder
Browse files Browse the repository at this point in the history
  • Loading branch information
sethmachine committed Jan 16, 2025
1 parent 695b2b5 commit 9929307
Show file tree
Hide file tree
Showing 5 changed files with 124 additions and 2 deletions.
Empty file.
56 changes: 56 additions & 0 deletions src/richchk/model/chk/strx/decoded_strx_section.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
""" "STRx" - String Data.
Required for all versions and all game types (or STR) Validation: Must be at least 1
byte (assumed, not confirmed).
Note, the STR section can also exist and optionally replace the STRx section.
Note, the encoding of text in the STRx section is unspecified. Commonly it is UTF-8 but
anything is possible.
The STRx section is a simple bit extension to the STR section introduced in StarCraft
Remastered.
This section contains all the strings in the map.
u32: Number of strings in the section (Default: 1024)
u32[Number of strings]: 1 integer for each string specifying the offset (the spot where
the string starts in the section from the start of it).
Strings: After the offsets, this is where every string in the map goes, one after
another. Each one is terminated by a null character.
"""

import dataclasses

from ...chk_section_name import ChkSectionName
from ..decoded_chk_section import DecodedChkSection


@dataclasses.dataclass(frozen=True)
class DecodedStrxSection(DecodedChkSection):
# u32: Number of strings in the section (Default: 1024)
_number_of_strings: int
# u32[Number of strings]: 1 integer for each string specifying the offset
# (the spot where the string starts in the section from the start of it).
_string_offsets: list[int]
# Strings: After the offsets, this is where every string in the map goes,
# one after another. Each one is terminated by a null character.
_strings: list[str]

@classmethod
def section_name(cls) -> ChkSectionName:
return ChkSectionName.STRX

@property
def number_of_strings(self) -> int:
return self._number_of_strings

@property
def strings_offsets(self) -> list[int]:
return self._string_offsets

@property
def strings(self) -> list[str]:
return self._strings
1 change: 1 addition & 0 deletions src/richchk/model/chk_section_name.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ class ChkSectionName(Enum):
IOWN = ("IOWN",)
OWNR = ("OWNR",)
STR = ("STR ",)
STRX = ("STRx",)
UNIS = ("UNIS",)
UNIX = ("UNIx",)
MRGN = ("MRGN",)
Expand Down
60 changes: 60 additions & 0 deletions src/richchk/transcoder/chk/transcoders/chk_strx_transcoder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
"""Decode and encode the STRx section which contains all strings in the CHK file."""

import struct
from io import BytesIO

from ....model.chk.strx.decoded_strx_section import DecodedStrxSection
from ....transcoder.chk.chk_section_transcoder import ChkSectionTranscoder
from ....transcoder.chk.chk_section_transcoder_factory import _RegistrableTranscoder
from ....transcoder.chk.strings_common import (
_NULL_TERMINATE_CHAR_FOR_STRING,
_STRING_ENCODING,
)


class ChkStrxTranscoder(
ChkSectionTranscoder[DecodedStrxSection],
_RegistrableTranscoder,
chk_section_name=DecodedStrxSection.section_name(),
):
def decode(self, chk_section_binary_data: bytes) -> DecodedStrxSection:
bytes_stream: BytesIO = BytesIO(chk_section_binary_data)
num_strings: int = struct.unpack("I", bytes_stream.read(4))[0]
string_offsets: list[int] = []
for _ in range(num_strings):
string_offsets.append(struct.unpack("I", bytes_stream.read(4))[0])
strings: list[str] = []
# there can be more offsets than actual string data,
# means some offsets reference the same string!
while bytes_stream.tell() != len(chk_section_binary_data):
char: str = struct.unpack("c", bytes_stream.read(1))[0].decode(
_STRING_ENCODING
)
chars: list[str] = []
# until null character, read one char at a time,
# strings won't store the null terminators
while char != _NULL_TERMINATE_CHAR_FOR_STRING:
chars.append(char)
char = struct.unpack("c", bytes_stream.read(1))[0].decode(
_STRING_ENCODING
)
strings.append("".join(chars))
return DecodedStrxSection(
_number_of_strings=num_strings,
_string_offsets=string_offsets,
_strings=strings,
)

def _encode(self, decoded_chk_section: DecodedStrxSection) -> bytes:
data: bytes = b""
data += struct.pack("I", decoded_chk_section.number_of_strings)
for i in range(decoded_chk_section.number_of_strings):
data += struct.pack("I", decoded_chk_section.strings_offsets[i])
for string_ in decoded_chk_section.strings:
data += struct.pack(
"{}s".format(len(string_)), bytes(string_, _STRING_ENCODING)
)
data += struct.pack(
"1s", bytes(_NULL_TERMINATE_CHAR_FOR_STRING, _STRING_ENCODING)
)
return data
9 changes: 7 additions & 2 deletions test/io/chk/chk_io_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
)

T = TypeVar("T", bound=DecodedChkSection, covariant=True)
_EXCLUDED_CHK_SECTIONS = [ChkSectionName.STRX]


@pytest.fixture(scope="function")
Expand Down Expand Up @@ -69,10 +70,14 @@ def _assert_decoded_chk_has_expected_decoded_sections(chk: DecodedChk):
expected_decoded_section_names: set[ChkSectionName] = {
x for x in ChkSectionName
}.intersection(
{x for x in ChkSectionTranscoderFactory.get_all_registered_chk_section_names()}
{
x
for x in ChkSectionTranscoderFactory.get_all_registered_chk_section_names()
if x not in _EXCLUDED_CHK_SECTIONS
}
)
for expected_section_name in expected_decoded_section_names:
assert expected_section_name.value in section_by_name
assert expected_section_name.value in section_by_name.keys()


def _get_actual_section_name_for_chk_section(
Expand Down

0 comments on commit 9929307

Please sign in to comment.