-
Notifications
You must be signed in to change notification settings - Fork 31
/
Copy pathyaml_parser.gd
456 lines (439 loc) · 18.3 KB
/
yaml_parser.gd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
# This file is part of Unidot Importer. See LICENSE.txt for full MIT license.
# Copyright (c) 2021-present Lyuma <[email protected]> and contributors
# SPDX-License-Identifier: MIT
@tool
extends RefCounted
############ FIXME: This should be Array(fileId, guid, utype)
#### WE CANNOT STORE Resource AS INNER CLASS!!
#class UnidotRef extends Resource:
# var fileID: int = 0
# var guid: String = ""
# var utype: int = 0
# func to_string() -> String:
# return _to_string()
# func _to_string() -> String:
# var ret: String = "[REF {fileID: " + str(fileID)
# if not guid.is_empty():
# ret += ", guid: " + str(guid)
# ret += "}]"
# return ret
const STRING_KEYS: Dictionary = {
"value": 1,
"m_Name": 1,
"m_TagString": 1,
"name": 1,
"first": 1,
"propertyPath": 1,
"path": 1,
"attribute": 1,
"m_ShaderKeywords": 1,
"typelessdata": 1, # Mesh m_VertexData; Texture image data
"m_IndexBuffer": 1,
"Hash": 1,
# Avatar:
"m_BoneName": 1,
"m_SkeletonNameIDArray": 1,
"m_HumanSkeletonIndexArray": 1,
"m_HumanSkeletonReverseIndexArray": 1,
"m_HumanBoneIndex": 1,
"m_HandBoneIndex": 1,
"m_ID": 1,
}
var debug_guid: String = ""
var debug_path: String = ""
var indentation_level: int = 0
var current_obj: Object = null
var current_obj_type: String = ""
var current_obj_utype: int = 0
var current_obj_fileID: int = 0
var current_obj_stripped: bool = false
var brace_line: String = ""
var continuation_line_indentation_level: int = 0
var double_quote_line: String = ""
var single_quote_line: String = ""
var has_brace_line: bool = false
var has_double_quote_line: bool = false
var has_single_quote_line: bool = false
var prev_key_simple: String = ""
var prev_key: String = ""
var prev_complex_key: String = ""
var current_obj_tree: Array = []
var current_indent_tree: Array = []
var meta_guid: String = ""
var search_obj_key_regex: RegEx = RegEx.new()
var arr_obj_key_regex: RegEx = RegEx.new()
var line_number: int = 0
func _init():
current_obj = null
arr_obj_key_regex = RegEx.new()
arr_obj_key_regex.compile("^-?\\s?([^\"\'{}:]*[^\"\'{}:\\s])\\s*:\\s*")
search_obj_key_regex = RegEx.new()
search_obj_key_regex.compile("\\s*([^\"\'{}:]*[^\"\'{}:\\s])\\s*:\\s*")
static func parse_main_object_type(yaml: String) -> String:
var first_indent: int = yaml.find("\n ")
if first_indent == -1:
return ""
var last_colon: int = yaml.rfind(":", first_indent)
if last_colon == -1:
return ""
var last_newline: int = yaml.rfind("\n", last_colon) + 1
return yaml.substr(last_newline, last_colon - last_newline).strip_edges()
static func parse_dependency_guids(yaml: String, asset_meta: Object) -> Dictionary:
var idx: int = 0
var dependencies: Dictionary
while true:
idx = yaml.find("guid:", idx)
if idx == -1:
break
var comma := yaml.find(",", idx)
var lbrace := yaml.rfind("{", idx)
var brace := yaml.find("}", idx)
var newline := yaml.find("}", idx)
if brace != -1 and brace < comma:
comma = brace
if newline != -1 and newline < comma:
idx = newline
continue
var guid_str := yaml.substr(idx + 5, comma - idx - 5).strip_edges()
var fileid_idx := yaml.find("fileID:", lbrace)
var fileid_comma := yaml.find(",", fileid_idx)
if fileid_comma > brace:
fileid_comma = brace
var fileid: int = 0
if fileid_idx != -1:
fileid = yaml.substr(fileid_idx + 7, fileid_comma - fileid_idx - 7).strip_edges().to_int()
dependencies[guid_str] = fileid
idx = comma
if asset_meta:
asset_meta.dependency_guids = dependencies
return dependencies
func parse_value(line: String, keyname: String, parent_key: String) -> Variant:
# WHAT THE FUCK IS THIS AND WHY DOES IT FIX line.begins_with("{") always returning false???
# EXPLOIT HEISENBUG NATURE TO FIX OUR PROBLEM
# - _Outline_Color: {r: 1, g: 1, b: 1, a: 1}
str(str(line.substr(0, 1)).begins_with(str(line.substr(0, 1))))
# User must decode this as desired.
var force_string = keyname == "_typelessdata" or keyname == "m_IndexBuffer" or keyname == "Hash" or parent_key == "fileIDToRecycleName" or parent_key == "internalIDToNameTable"
if not force_string and not STRING_KEYS.has(keyname) and len(line) < 24 and line.is_valid_int():
return line.to_int()
if not force_string and not STRING_KEYS.has(keyname) and len(line) < 32 and line.is_valid_float():
return line.to_float()
if not force_string and line == "[]":
return [].duplicate()
if not force_string and line.begins_with("{}"):
# either {}
# or:
# - a: b
# - c: d
# We treat dictionaries as arrays where each item is a single key dictionary
# This is technically wrong, so we might want to fix some day.
# fileIdToRecycleMap is the only known usage of this anyway
return [].duplicate()
if not force_string and line.begins_with("{"):
if not line.ends_with("}"):
push_error("Invalid object value " + line.substr(0, 64))
return null
var value_color: Color = Color()
var value_quat: Quaternion = Quaternion()
var value_vec3: Vector3 = Vector3()
var value_vec2: Vector2 = Vector2()
var is_vec2: bool = false
var is_color: bool = false
var is_vec3: bool = false
var is_rect: bool = false
var is_quat: bool = false
var value_ref: Array = [] # UnidotRef
# UnidotRef, Vector2, Vector3, Quaternion?
var offset = 1
while true:
var match_obj = search_obj_key_regex.search(line, offset)
if match_obj == null:
push_error("Unable to match regex on inline object @" + str(line_number) + ": " + line.substr(128))
# break
offset = match_obj.get_end()
var comma = line.find(",", offset)
var value: String = ""
if comma == -1:
value = line.substr(offset, len(line) - offset - 1)
else:
value = line.substr(offset, comma - offset)
offset = comma + 1
var key = match_obj.get_string(1)
match key:
"x":
value_quat.x = value.to_float()
value_vec3.x = value.to_float()
value_vec2.x = value.to_float()
"y":
value_quat.y = value.to_float()
value_vec3.y = value.to_float()
value_vec2.y = value.to_float()
is_vec2 = true
"z", "width":
value_quat.z = value.to_float()
value_vec3.z = value.to_float()
is_vec3 = true
"height":
value_quat.w = value.to_float()
is_rect = true
"w":
value_quat.w = value.to_float()
is_quat = true
"r":
value_color.r = value.to_float()
is_color = true
"g":
value_color.g = value.to_float()
"b":
value_color.b = value.to_float()
"a":
value_color.a = value.to_float()
"instanceID", "fileID": # {instanceID: 0} instead of fileID??
#if value != "0":
if value_ref.is_empty():
value_ref.resize(4)
value_ref[1] = value.to_int()
"guid":
if value_ref.is_empty():
value_ref.resize(4)
value_ref[2] = value
"type":
if value_ref.is_empty():
value_ref.resize(4)
value_ref[3] = value.to_int()
_:
push_error("Unsupported serializable struct type " + key + ": " + line.substr(128))
if comma == -1:
break
if is_quat:
return value_quat
elif is_rect:
return Rect2(value_quat.x, value_quat.y, value_quat.z, value_quat.w)
elif is_color:
return value_color
elif is_vec3:
return value_vec3
elif is_vec2:
return value_vec2
elif not value_ref.is_empty():
return value_ref
else:
return null
elif line.begins_with('\"'):
var j = JSON.new()
j.parse(line)
return j.get_data()
elif line.begins_with("'"):
var s: String = line.substr(1, len(line) - 1)
str(str(typeof(s)) + "/" + str(line))
return s.replace("''", "")
else:
return line
func xprint(s: String):
pass
func parse_line(line: Variant, meta: Object, is_meta: bool, xinstantiate_unidot_object: Callable) -> Resource: # object_adapter.UnidotObject
var instantiate_unidot_object = xinstantiate_unidot_object
line_number = line_number + 1
if line_number % 10000 == 0:
meta.log_debug(current_obj_fileID, "guid " + str(meta.guid if meta != null else "null") + " line " + str(line_number))
str(str(typeof(line)) + "/" + str(line))
line = line.replace("\r", "")
while line.ends_with("\r"):
line = line.substr(0, len(line) - 1)
# The last line of a multiline single-quoted string is not indented if that line is empty
var end_single_multiline: bool = has_single_quote_line and line == "'"
var line_plain: String = line.dedent()
var obj_key_match: RegExMatch = arr_obj_key_regex.search(line_plain)
var value_start: int = 2
var this_key: String = ""
var this_prev_key_simple: String = prev_key_simple
if obj_key_match != null:
value_start = 0 + obj_key_match.get_end()
this_key = obj_key_match.get_string(1)
prev_key_simple = this_key
var missing_brace: bool = false
var missing_single_quote: bool = false
var missing_double_quote: bool = false
var ending_double_quotes: bool = line_plain.ends_with('"')
var ending_single_quotes: int = 1 if line_plain.ends_with("'") else 0
if line_plain.ends_with("''"):
ending_single_quotes = (len(line_plain) - len(line_plain.rstrip("'")))
if ending_double_quotes:
var idx: int = len(line_plain) - 2
while line_plain.substr(idx, 1) == "\\":
idx -= 1
ending_double_quotes = not ending_double_quotes
#meta.log_debug(current_obj_fileID, "st=" + str(value_start) + " < " + str(len(line_plain)) + ":" + line_plain)
#meta.log_debug(current_obj_fileID, JSON.print(line))
if value_start < len(line_plain) and not has_brace_line and not has_single_quote_line and not has_double_quote_line:
missing_brace = line_plain.substr(value_start, 1) == "{" and not line_plain.ends_with("}")
missing_double_quote = line_plain.substr(value_start, 1) == '"' and not line_plain.ends_with('"')
missing_single_quote = (line_plain.substr(value_start, 1) == "'" and ending_single_quotes % 2 == (1 if ending_single_quotes + value_start == len(line_plain) else 0))
var new_indentation_level = len(line) - len(line_plain)
var object_to_return: Object = null
if line.begins_with("--- ") or (line == "" and single_quote_line == ""):
if current_obj != null:
#meta.log_debug(current_obj_fileID, "line " + str(line) + ": Returning object of type " + str(current_obj.type))
object_to_return = current_obj
# meta.log_debug(current_obj_fileID, "returning " + str(current_obj) + " at line " + str(line_number)+":" + str(single_quote_line))
indentation_level = 0
if line.begins_with("--- "):
current_obj = null
var parts = line.split(" ")
if !parts[1].begins_with("!u!"):
meta.log_fail(current_obj_fileID, "Separator line not starting with --- !u!: " + line.substr(128))
current_obj_utype = parts[1].substr(3).to_int()
current_obj_fileID = parts[2].substr(1).to_int()
current_obj_stripped = line.ends_with(" stripped")
elif line == "%YAML 1.1":
pass
elif line.begins_with("%TAG !u! tag:"):
pass
elif is_meta and line.begins_with("fileFormatVersion:"):
# usually 2?
pass
elif is_meta and line.begins_with("folderAsset:"):
# For directories; is set to "yes" if this is a folder .meta file
pass
elif is_meta and line.begins_with("timeCreated:"):
# For directories; Unix time, in seconds
pass
elif is_meta and line.begins_with("licenseType:"):
# For directories; Always says "Free"
pass
elif is_meta and (line.begins_with("labels:") or line.begins_with("- ")):
# labels and a list of strings that comes after it. ignore.
pass
elif is_meta and line.begins_with("guid:"):
meta.guid = line.split(":")[1].strip_edges()
elif line_plain == "data:":
pass
elif new_indentation_level == 0 and line.ends_with(":"):
if current_obj != null:
meta.log_fail(current_obj_fileID, "Creating toplevel object without header")
current_obj_type = line.split(":")[0]
current_obj = instantiate_unidot_object.call(meta, current_obj_fileID, current_obj_utype, current_obj_type)
if current_obj_stripped:
current_obj.is_stripped = true
elif line == "" and has_single_quote_line:
single_quote_line += "\n"
#meta.log_debug(current_obj_fileID, "Missing single start " + str(single_quote_line))
elif new_indentation_level == 0 and not end_single_multiline:
meta.log_fail(current_obj_fileID, "Invalid toplevel line @" + str(line_number) + ": " + line.replace("\r", "").substr(128))
elif missing_single_quote:
single_quote_line = line_plain
has_single_quote_line = true
continuation_line_indentation_level = new_indentation_level
#meta.log_debug(current_obj_fileID, "Missing single start " + str(single_quote_line))
elif missing_double_quote:
double_quote_line = line_plain
has_double_quote_line = true
continuation_line_indentation_level = new_indentation_level
#meta.log_debug(current_obj_fileID, "Missing double start")
elif missing_brace:
brace_line = line_plain
has_brace_line = true
continuation_line_indentation_level = new_indentation_level
#meta.log_debug(current_obj_fileID, "Missing brace start")
elif has_single_quote_line and new_indentation_level > continuation_line_indentation_level and (ending_single_quotes % 2) == 0:
single_quote_line += line_plain
#meta.log_debug(current_obj_fileID, "Missing single mid: " + brace_line)
elif has_double_quote_line and new_indentation_level > continuation_line_indentation_level and not ending_double_quotes:
double_quote_line += " " + line_plain
#meta.log_debug(current_obj_fileID, "Missing double mid: " + brace_line)
elif has_brace_line and new_indentation_level > continuation_line_indentation_level and not line_plain.ends_with("}"):
brace_line += " " + line_plain
meta.log_fail(current_obj_fileID, "Missing brace mid: " + brace_line) # Never seen structs big enough to wrap twice.
elif not current_obj_tree.is_empty() and obj_key_match == null and not has_brace_line and not has_single_quote_line and not has_double_quote_line and typeof(current_obj_tree.back()) == TYPE_DICTIONARY and typeof(current_obj_tree.back().get(this_prev_key_simple, null)) == TYPE_STRING and new_indentation_level > indentation_level:
#print("Found an indented line " + str(line_plain))
current_obj_tree.back()[this_prev_key_simple] += " " + line_plain
elif not current_obj_tree.is_empty() and not line_plain.begins_with("-") and not has_brace_line and not has_single_quote_line and not has_double_quote_line and typeof(current_obj_tree.back()) == TYPE_ARRAY and typeof(current_obj_tree.back()[-1]) == TYPE_STRING and new_indentation_level > indentation_level:
#print("Found an indented arr line " + str(line_plain))
current_obj_tree.back()[-1] += " " + line_plain
else:
if new_indentation_level > continuation_line_indentation_level or end_single_multiline:
var endcontinuation: bool = false
if has_brace_line and line_plain.ends_with("}"):
line_plain = brace_line + " " + line_plain
brace_line = ""
has_brace_line = false
endcontinuation = true
#meta.log_debug(current_obj_fileID, "Missing brace end: " + line_plain)
if has_single_quote_line and (ending_single_quotes % 2) != 0:
line_plain = single_quote_line + line_plain
single_quote_line = ""
has_single_quote_line = false
endcontinuation = true
#meta.log_debug(current_obj_fileID, "Missing single end")
if has_double_quote_line and ending_double_quotes:
line_plain = double_quote_line + " " + line_plain
double_quote_line = ""
has_double_quote_line = false
endcontinuation = true
#meta.log_debug(current_obj_fileID, "Missing double end")
if endcontinuation:
new_indentation_level = continuation_line_indentation_level
obj_key_match = arr_obj_key_regex.search(line_plain)
if obj_key_match != null:
value_start = 0 + obj_key_match.get_end()
this_key = obj_key_match.get_string(1)
if new_indentation_level > indentation_level or (new_indentation_level == indentation_level and line_plain.begins_with("- ") and typeof(current_obj_tree.back()) != TYPE_ARRAY):
if line_plain.begins_with("- "):
current_indent_tree.push_back(indentation_level)
var new_arr: Array = [].duplicate()
current_obj_tree.back()[prev_key] = new_arr
current_obj_tree.push_back(new_arr)
indentation_level = new_indentation_level
else:
var new_obj: Dictionary = {}.duplicate()
current_indent_tree.push_back(indentation_level)
if indentation_level == 0:
new_obj = current_obj.keys
else:
current_obj_tree.back()[prev_key] = new_obj
current_obj_tree.push_back(new_obj)
indentation_level = new_indentation_level
else:
while new_indentation_level < indentation_level:
indentation_level = current_indent_tree[-1]
current_indent_tree.pop_back()
current_obj_tree.pop_back()
prev_key = ""
if typeof(current_obj_tree.back()) == TYPE_ARRAY and not line_plain.begins_with("- "):
current_indent_tree.pop_back()
current_obj_tree.pop_back()
if line_plain.begins_with("- ") and obj_key_match != null:
current_indent_tree.push_back(indentation_level)
indentation_level = new_indentation_level + 2
var new_obj = {}.duplicate()
if typeof(current_obj_tree.back()) != TYPE_ARRAY:
meta.log_fail(current_obj_fileID, "Invalid obj tree type @" + str(debug_guid) + ":" + str(debug_path) + ":" + str(line_number))
current_obj_tree.back().push_back(new_obj)
current_obj_tree.push_back(new_obj)
if obj_key_match != null:
if obj_key_match.get_end() == len(line_plain):
prev_key = this_key
if this_key != "first" and this_key != "second" and this_key != "data":
prev_complex_key = this_key
else:
var parsed_val = parse_value(line_plain.substr(obj_key_match.get_end()), this_key, prev_complex_key)
if typeof(parsed_val) == TYPE_ARRAY and len(parsed_val) >= 3 and parsed_val[0] == null and typeof(parsed_val[2]) == TYPE_STRING:
match this_key:
# m_SourcePrefab (new), m_ParentPrefab (legacy) used in prefabs and scenes.
# prefab (trees) and prototype (details) are used in Terrain
# other Object->Prefab/GameObject/Transform references need to be added here:
"m_SourcePrefab", "m_ParentPrefab", "prefab", "prototype":
# meta.log_debug(current_obj_fileID, " Possible Ref " + str(this_key))
meta.prefab_dependency_guids[parsed_val[2]] = parsed_val[1]
if is_meta:
meta.meta_dependency_guids[parsed_val[2]] = parsed_val[1]
meta.dependency_guids[parsed_val[2]] = parsed_val[1]
current_obj_tree.back()[this_key] = parsed_val
elif line_plain.begins_with("- "):
var parsed_val = parse_value(line_plain.substr(2), "", prev_complex_key)
if typeof(parsed_val) == TYPE_ARRAY and len(parsed_val) >= 3 and parsed_val[0] == null and typeof(parsed_val[2]) == TYPE_STRING:
if is_meta:
meta.meta_dependency_guids[parsed_val[2]] = parsed_val[1]
meta.dependency_guids[parsed_val[2]] = parsed_val[1]
current_obj_tree.back().push_back(parsed_val)
return object_to_return