-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathhousing_metadata.yaml
61 lines (54 loc) · 2.45 KB
/
housing_metadata.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
global: # In this section you can specify training and inference settings which will be set for all tables
train_settings:
epochs: 2
drop_null: true
row_limit: 1800
print_report: true
infer_settings:
size: 100
run_parallel: false
random_seed: 10
print_report: true
housing_properties: # Use table name here
train_settings: # Settings for training process
source: "./examples/example-data/housing_properties.csv" # The path to the original data. Supported formats include local files in CSV, Avro formats. Required parameter
epochs: 8 # Number of epochs
drop_null: true # Drop rows with NULL values
row_limit: 1800 # Limit of rows for training process
print_report: true # Turn on or turn off generation of the report
infer_settings: # Settings for infer process
destination: "./examples/generated-data/housing_properties_generated.csv" # The path where the generated data will be stored. Supported formats include local files in CSV, Avro formats. Required parameter
size: 90 # Size for generated data
run_parallel: false # Turn on or turn off parallel training process
random_seed: 10 # Ensure reproducible tables generation
print_report: true # Turn on or turn off generation of the report
keys: # Keys of the table
households_pk: # Name of a key
type: "PK" # Type of key
columns: # Columns related to key
- households
housing_conditions:
train_settings:
source: "./examples/example-data/housing_conditions.csv"
epochs: 5
drop_null: true
row_limit: 1800
print_report: true
column_types:
categorical: # Force listed columns to have categorical type (use dictionary of values)
- housing_median_age
infer_settings:
destination: "./examples/generated-data/housing_conditions_generated.csv"
size: 90
run_parallel: false
random_seed: 10
print_report: true
keys:
households_fk:
type: "FK"
columns:
- households
references:
table: housing_properties
columns:
- households