create strx section and transcoder

sethmachine · Jan 16, 2025 · 9929307 · 9929307
1 parent 695b2b5
commit 9929307
Show file tree

Hide file tree

Showing 5 changed files with 124 additions and 2 deletions.
diff --git a/src/richchk/model/chk/strx/__init__.py b/src/richchk/model/chk/strx/__init__.py
diff --git a/src/richchk/model/chk/strx/decoded_strx_section.py b/src/richchk/model/chk/strx/decoded_strx_section.py
@@ -0,0 +1,56 @@
+""" "STRx" - String Data.
+
+Required for all versions and all game types (or STR) Validation: Must be at least 1
+byte (assumed, not confirmed).
+
+Note, the STR section can also exist and optionally replace the STRx section.
+
+Note, the encoding of text in the STRx section is unspecified. Commonly it is UTF-8 but
+anything is possible.
+
+The STRx section is a simple bit extension to the STR section introduced in StarCraft
+Remastered.
+
+This section contains all the strings in the map.
+
+u32: Number of strings in the section (Default: 1024)
+
+u32[Number of strings]: 1 integer for each string specifying the offset (the spot where
+the string starts in the section from the start of it).
+
+Strings: After the offsets, this is where every string in the map goes, one after
+another. Each one is terminated by a null character.
+"""
+
+import dataclasses
+
+from ...chk_section_name import ChkSectionName
+from ..decoded_chk_section import DecodedChkSection
+
+
+@dataclasses.dataclass(frozen=True)
+class DecodedStrxSection(DecodedChkSection):
+    # u32: Number of strings in the section (Default: 1024)
+    _number_of_strings: int
+    # u32[Number of strings]: 1 integer for each string specifying the offset
+    # (the spot where the string starts in the section from the start of it).
+    _string_offsets: list[int]
+    # Strings: After the offsets, this is where every string in the map goes,
+    # one after another. Each one is terminated by a null character.
+    _strings: list[str]
+
+    @classmethod
+    def section_name(cls) -> ChkSectionName:
+        return ChkSectionName.STRX
+
+    @property
+    def number_of_strings(self) -> int:
+        return self._number_of_strings
+
+    @property
+    def strings_offsets(self) -> list[int]:
+        return self._string_offsets
+
+    @property
+    def strings(self) -> list[str]:
+        return self._strings
diff --git a/src/richchk/model/chk_section_name.py b/src/richchk/model/chk_section_name.py
@@ -20,6 +20,7 @@ class ChkSectionName(Enum):
     IOWN = ("IOWN",)
     OWNR = ("OWNR",)
     STR = ("STR ",)
+    STRX = ("STRx",)
     UNIS = ("UNIS",)
     UNIX = ("UNIx",)
     MRGN = ("MRGN",)

diff --git a/src/richchk/transcoder/chk/transcoders/chk_strx_transcoder.py b/src/richchk/transcoder/chk/transcoders/chk_strx_transcoder.py
@@ -0,0 +1,60 @@
+"""Decode and encode the STRx section which contains all strings in the CHK file."""
+
+import struct
+from io import BytesIO
+
+from ....model.chk.strx.decoded_strx_section import DecodedStrxSection
+from ....transcoder.chk.chk_section_transcoder import ChkSectionTranscoder
+from ....transcoder.chk.chk_section_transcoder_factory import _RegistrableTranscoder
+from ....transcoder.chk.strings_common import (
+    _NULL_TERMINATE_CHAR_FOR_STRING,
+    _STRING_ENCODING,
+)
+
+
+class ChkStrxTranscoder(
+    ChkSectionTranscoder[DecodedStrxSection],
+    _RegistrableTranscoder,
+    chk_section_name=DecodedStrxSection.section_name(),
+):
+    def decode(self, chk_section_binary_data: bytes) -> DecodedStrxSection:
+        bytes_stream: BytesIO = BytesIO(chk_section_binary_data)
+        num_strings: int = struct.unpack("I", bytes_stream.read(4))[0]
+        string_offsets: list[int] = []
+        for _ in range(num_strings):
+            string_offsets.append(struct.unpack("I", bytes_stream.read(4))[0])
+        strings: list[str] = []
+        # there can be more offsets than actual string data,
+        # means some offsets reference the same string!
+        while bytes_stream.tell() != len(chk_section_binary_data):
+            char: str = struct.unpack("c", bytes_stream.read(1))[0].decode(
+                _STRING_ENCODING
+            )
+            chars: list[str] = []
+            # until null character, read one char at a time,
+            # strings won't store the null terminators
+            while char != _NULL_TERMINATE_CHAR_FOR_STRING:
+                chars.append(char)
+                char = struct.unpack("c", bytes_stream.read(1))[0].decode(
+                    _STRING_ENCODING
+                )
+            strings.append("".join(chars))
+        return DecodedStrxSection(
+            _number_of_strings=num_strings,
+            _string_offsets=string_offsets,
+            _strings=strings,
+        )
+
+    def _encode(self, decoded_chk_section: DecodedStrxSection) -> bytes:
+        data: bytes = b""
+        data += struct.pack("I", decoded_chk_section.number_of_strings)
+        for i in range(decoded_chk_section.number_of_strings):
+            data += struct.pack("I", decoded_chk_section.strings_offsets[i])
+        for string_ in decoded_chk_section.strings:
+            data += struct.pack(
+                "{}s".format(len(string_)), bytes(string_, _STRING_ENCODING)
+            )
+            data += struct.pack(
+                "1s", bytes(_NULL_TERMINATE_CHAR_FOR_STRING, _STRING_ENCODING)
+            )
+        return data
diff --git a/test/io/chk/chk_io_test.py b/test/io/chk/chk_io_test.py
@@ -15,6 +15,7 @@
 )
 
 T = TypeVar("T", bound=DecodedChkSection, covariant=True)
+_EXCLUDED_CHK_SECTIONS = [ChkSectionName.STRX]
 
 
 @pytest.fixture(scope="function")
@@ -69,10 +70,14 @@ def _assert_decoded_chk_has_expected_decoded_sections(chk: DecodedChk):
     expected_decoded_section_names: set[ChkSectionName] = {
         x for x in ChkSectionName
     }.intersection(
-        {x for x in ChkSectionTranscoderFactory.get_all_registered_chk_section_names()}
+        {
+            x
+            for x in ChkSectionTranscoderFactory.get_all_registered_chk_section_names()
+            if x not in _EXCLUDED_CHK_SECTIONS
+        }
     )
     for expected_section_name in expected_decoded_section_names:
-        assert expected_section_name.value in section_by_name
+        assert expected_section_name.value in section_by_name.keys()
 
 
 def _get_actual_section_name_for_chk_section(