Skip to content

Commit

Permalink
diff: preliminary diff_timeseries_v2()
Browse files Browse the repository at this point in the history
  • Loading branch information
yunjunz committed Sep 20, 2024
1 parent a264f96 commit ed1797f
Show file tree
Hide file tree
Showing 2 changed files with 91 additions and 30 deletions.
4 changes: 4 additions & 0 deletions src/mintpy/cli/diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@
diff.py timeseries_ERA5_ramp_demErr.h5 ../GIANT/Stack/LS-PARAMS.h5 -o mintpy_giant.h5
diff.py reconUnwrapIfgram.h5 ./inputs/ifgramStack.h5 -o diffUnwrapIfgram.h5
# different resolutions
diff.py timeseries.h5 inputs/SET.h5 -o timeseries_SET.h5
diff.py timeseries_SET.h5 ion.h5 -o timeseries_SET_ion.h5
# different file types
diff.py filt_20220905_20230220.unw ./inputs/ERA5.h5 -o filt_20220905_20230220_ERA5.unw
diff.py timeseries.h5 ./inputs/ITRF14.h5 -o timeseries_ITRF14.h5
Expand Down
117 changes: 87 additions & 30 deletions src/mintpy/diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,70 @@ def check_reference(atr1, atr2):
return ref_date, ref_y, ref_x


def resample_file(file, target_length, target_width):
"""Resample the entire file to match the target length and width."""
data = readfile.read(file)[0] # Read the whole file
return resize(data, (data.shape[0], target_length, target_width), order=1, preserve_range=True)
def diff_timeseries_v2(file1, file2, out_file, force_diff=False):
"""Calculate the difference between two time-series files,
assuming the two files have the same spatial coverage.
"""

# basic info
atr1 = readfile.read_attribute(file1)
atr2 = readfile.read_attribute(file2)
k1 = atr1['FILE_TYPE']
k2 = atr2['FILE_TYPE']
length1, width1 = int(atr1['LENGTH']), int(atr1['WIDTH'])
length2, width2 = int(atr2['LENGTH']), int(atr2['WIDTH'])
date_list1 = timeseries(file1).get_date_list()
date_list2 = timeseries(file2).get_date_list()

# check file size
different_size = False
if length1 != length2 or width1 != width2:
different_size = True
kwargs = dict(
output_shape=(length1, width1),
order=1,
mode='constant',
anti_aliasing=True,
preserve_range=True,
)
print('WARNING: file 1/2 have different sizes:')
print(f' file 1: ({atr1["LENGTH"]}, {atr1["WIDTH"]})')
print(f' file 2: ({atr2["LENGTH"]}, {atr2["WIDTH"]})')
if different_size and not force_diff:
raise Exception('To enforce the differencing anyway, use --force option.')

# check reference date / point
ref_date, ref_y, ref_x = check_reference(atr1, atr2)
if ref_date:
ref_data = readfile.read(file2, datasetName=ref_date, resize2shape=(length1, width1))[0]
if different_size:
ref_data = resize(ref_data, **kwargs)

# check dates shared by two timeseries files
date_list_shared = [i for i in date_list1 if i in date_list2]
if date_list_shared != date_list1:
print(f'WARNING: {file2} does not contain all dates in {file1}')
if force_diff:
date_list_ex = list(set(date_list1) - set(date_list_shared))
print('Continue and enforce the differencing for their shared dates only.')
print(f'\twith following dates are ignored for differencing:\n{date_list_ex}')
else:
raise Exception('To enforce the differencing anyway, use --force option.')

# get reference matrix
if ref_date:
ref_val = readfile.read(file2, datasetName=ref_date, resize2shape=(length1, width1))[0]
else:
ref_val = None

#for date_str in date_list1:
# if date_str in date_list2:
# # read
# # substract
# else:
# # do nothing
# #write


def diff_timeseries(file1, file2, out_file, force_diff=False, max_num_pixel=2e8):
"""Calculate the difference between two time-series files.
Expand All @@ -84,6 +144,16 @@ def diff_timeseries(file1, file2, out_file, force_diff=False, max_num_pixel=2e8)
date_list2 = giantTimeseries(file2).get_date_list()
unit_fac = 0.001

# check file size and resolution
different_size = False
if any(int(atr1[x]) != int(atr2[x]) for x in ['LENGTH', 'WIDTH']):
different_size = True
print('WARNING: file 1/2 have different sizes:')
print(f' file 1: ({atr1["LENGTH"]}, {atr1["WIDTH"]})')
print(f' file 2: ({atr2["LENGTH"]}, {atr2["WIDTH"]})')
if different_size and not force_diff:
raise Exception('Could NOT run differencing on files with different sizes! Use --force option to overwrite.')

# check reference point
ref_date, ref_y, ref_x = check_reference(atr1, atr2)

Expand All @@ -102,31 +172,18 @@ def diff_timeseries(file1, file2, out_file, force_diff=False, max_num_pixel=2e8)

if ref_y and ref_x:
ref_box = (ref_x, ref_y, ref_x + 1, ref_y + 1)
ref_val = readfile.read(file1, datasetName=date_list_shared, box=ref_box)[0] * unit_fac
ref_val = readfile.read(file2, datasetName=date_list_shared, box=ref_box)[0] * unit_fac
else:
ref_val = None

# instantiate the output file
writefile.layout_hdf5(out_file, ref_file=file1)

# Resample file2 if necessary
length1, width1 = int(atr1['LENGTH']), int(atr1['WIDTH'])
length2, width2 = int(atr2['LENGTH']), int(atr2['WIDTH'])

if not force_diff and (length1 != length2 or width1 != width2):
raise Exception('Length and Width of the files do not match. Use --force option to proceed.')

# Resample the whole file2 to match the dimensions of file1
if force_diff and (length1 != length2 or width1 != width2):
print(f'Resampling {file2} to match {file1} dimensions.')
data2_resampled = resample_file(file2, length1, width1) * unit_fac
else:
data2_resampled = readfile.read(file2)[0] * unit_fac

# Split both files into corresponding blocks
num_box = int(np.ceil(len(date_list1) * length1 * width1 / max_num_pixel))
# block-by-block IO
length, width = int(atr1['LENGTH']), int(atr1['WIDTH'])
num_box = int(np.ceil(len(date_list1) * length * width / max_num_pixel))
box_list, num_box = cluster.split_box2sub_boxes(
box=(0, 0, width1, length1),
box=(0, 0, width, length),
num_split=num_box,
dimension='y',
print_msg=True,
Expand All @@ -139,8 +196,7 @@ def diff_timeseries(file1, file2, out_file, force_diff=False, max_num_pixel=2e8)

# read data2 (consider different reference_date/pixel)
print(f'read from file: {file2}')
data2 = data2_resampled[:, box[1]:box[3], box[0]:box[2]]* unit_fac

data2 = readfile.read(file2, datasetName=date_list_shared, box=box)[0] * unit_fac

if ref_val is not None:
print(f'* referencing data from {os.path.basename(file2)} to y/x: {ref_y}/{ref_x}')
Expand All @@ -153,20 +209,21 @@ def diff_timeseries(file1, file2, out_file, force_diff=False, max_num_pixel=2e8)

# read data1
print(f'read from file: {file1}')
data1 = readfile.read(file1, box=box)[0]
data = readfile.read(file1, box=box)[0]

# apply differencing
mask = data1 == 0.
data1[date_flag_shared] -= data2
data1[mask] = 0. # Do not change zero phase value
mask = data == 0.
data[date_flag_shared] -= data2
data[mask] = 0. # Do not change zero phase value
del data2

# write the block
block = [0, data1.shape[0], box[1], box[3], box[0], box[2]]
writefile.write_hdf5_block(out_file, data=data1, datasetName=k1, block=block)
block = [0, data.shape[0], box[1], box[3], box[0], box[2]]
writefile.write_hdf5_block(out_file, data=data, datasetName=k1, block=block)

return out_file


def diff_timeseries_and_velocity(file1, file2, out_file, max_num_pixel=2e8):
"""Calculate the difference between a time-series file and a velocity file.
Expand Down

0 comments on commit ed1797f

Please sign in to comment.