diff --git a/src/richchk/model/chk/strx/__init__.py b/src/richchk/model/chk/strx/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/richchk/model/chk/strx/decoded_strx_section.py b/src/richchk/model/chk/strx/decoded_strx_section.py new file mode 100644 index 0000000..33aa8c8 --- /dev/null +++ b/src/richchk/model/chk/strx/decoded_strx_section.py @@ -0,0 +1,56 @@ +""" "STRx" - String Data. + +Required for all versions and all game types (or STR) Validation: Must be at least 1 +byte (assumed, not confirmed). + +Note, the STR section can also exist and optionally replace the STRx section. + +Note, the encoding of text in the STRx section is unspecified. Commonly it is UTF-8 but +anything is possible. + +The STRx section is a simple bit extension to the STR section introduced in StarCraft +Remastered. + +This section contains all the strings in the map. + +u32: Number of strings in the section (Default: 1024) + +u32[Number of strings]: 1 integer for each string specifying the offset (the spot where +the string starts in the section from the start of it). + +Strings: After the offsets, this is where every string in the map goes, one after +another. Each one is terminated by a null character. +""" + +import dataclasses + +from ...chk_section_name import ChkSectionName +from ..decoded_chk_section import DecodedChkSection + + +@dataclasses.dataclass(frozen=True) +class DecodedStrxSection(DecodedChkSection): + # u32: Number of strings in the section (Default: 1024) + _number_of_strings: int + # u32[Number of strings]: 1 integer for each string specifying the offset + # (the spot where the string starts in the section from the start of it). + _string_offsets: list[int] + # Strings: After the offsets, this is where every string in the map goes, + # one after another. Each one is terminated by a null character. + _strings: list[str] + + @classmethod + def section_name(cls) -> ChkSectionName: + return ChkSectionName.STRX + + @property + def number_of_strings(self) -> int: + return self._number_of_strings + + @property + def strings_offsets(self) -> list[int]: + return self._string_offsets + + @property + def strings(self) -> list[str]: + return self._strings diff --git a/src/richchk/model/chk_section_name.py b/src/richchk/model/chk_section_name.py index 855a861..957898c 100644 --- a/src/richchk/model/chk_section_name.py +++ b/src/richchk/model/chk_section_name.py @@ -20,6 +20,7 @@ class ChkSectionName(Enum): IOWN = ("IOWN",) OWNR = ("OWNR",) STR = ("STR ",) + STRX = ("STRx",) UNIS = ("UNIS",) UNIX = ("UNIx",) MRGN = ("MRGN",) diff --git a/src/richchk/transcoder/chk/transcoders/chk_strx_transcoder.py b/src/richchk/transcoder/chk/transcoders/chk_strx_transcoder.py new file mode 100644 index 0000000..c0a340b --- /dev/null +++ b/src/richchk/transcoder/chk/transcoders/chk_strx_transcoder.py @@ -0,0 +1,60 @@ +"""Decode and encode the STRx section which contains all strings in the CHK file.""" + +import struct +from io import BytesIO + +from ....model.chk.strx.decoded_strx_section import DecodedStrxSection +from ....transcoder.chk.chk_section_transcoder import ChkSectionTranscoder +from ....transcoder.chk.chk_section_transcoder_factory import _RegistrableTranscoder +from ....transcoder.chk.strings_common import ( + _NULL_TERMINATE_CHAR_FOR_STRING, + _STRING_ENCODING, +) + + +class ChkStrxTranscoder( + ChkSectionTranscoder[DecodedStrxSection], + _RegistrableTranscoder, + chk_section_name=DecodedStrxSection.section_name(), +): + def decode(self, chk_section_binary_data: bytes) -> DecodedStrxSection: + bytes_stream: BytesIO = BytesIO(chk_section_binary_data) + num_strings: int = struct.unpack("I", bytes_stream.read(4))[0] + string_offsets: list[int] = [] + for _ in range(num_strings): + string_offsets.append(struct.unpack("I", bytes_stream.read(4))[0]) + strings: list[str] = [] + # there can be more offsets than actual string data, + # means some offsets reference the same string! + while bytes_stream.tell() != len(chk_section_binary_data): + char: str = struct.unpack("c", bytes_stream.read(1))[0].decode( + _STRING_ENCODING + ) + chars: list[str] = [] + # until null character, read one char at a time, + # strings won't store the null terminators + while char != _NULL_TERMINATE_CHAR_FOR_STRING: + chars.append(char) + char = struct.unpack("c", bytes_stream.read(1))[0].decode( + _STRING_ENCODING + ) + strings.append("".join(chars)) + return DecodedStrxSection( + _number_of_strings=num_strings, + _string_offsets=string_offsets, + _strings=strings, + ) + + def _encode(self, decoded_chk_section: DecodedStrxSection) -> bytes: + data: bytes = b"" + data += struct.pack("I", decoded_chk_section.number_of_strings) + for i in range(decoded_chk_section.number_of_strings): + data += struct.pack("I", decoded_chk_section.strings_offsets[i]) + for string_ in decoded_chk_section.strings: + data += struct.pack( + "{}s".format(len(string_)), bytes(string_, _STRING_ENCODING) + ) + data += struct.pack( + "1s", bytes(_NULL_TERMINATE_CHAR_FOR_STRING, _STRING_ENCODING) + ) + return data diff --git a/test/io/chk/chk_io_test.py b/test/io/chk/chk_io_test.py index ae622f3..28b9132 100644 --- a/test/io/chk/chk_io_test.py +++ b/test/io/chk/chk_io_test.py @@ -15,6 +15,7 @@ ) T = TypeVar("T", bound=DecodedChkSection, covariant=True) +_EXCLUDED_CHK_SECTIONS = [ChkSectionName.STRX] @pytest.fixture(scope="function") @@ -69,10 +70,14 @@ def _assert_decoded_chk_has_expected_decoded_sections(chk: DecodedChk): expected_decoded_section_names: set[ChkSectionName] = { x for x in ChkSectionName }.intersection( - {x for x in ChkSectionTranscoderFactory.get_all_registered_chk_section_names()} + { + x + for x in ChkSectionTranscoderFactory.get_all_registered_chk_section_names() + if x not in _EXCLUDED_CHK_SECTIONS + } ) for expected_section_name in expected_decoded_section_names: - assert expected_section_name.value in section_by_name + assert expected_section_name.value in section_by_name.keys() def _get_actual_section_name_for_chk_section(