Skip to content

Commit

Permalink
Improve segment name check in metadata push (#9359)
Browse files Browse the repository at this point in the history
* Improve segment name check in metadata push

* address comments

* fix broken tests
  • Loading branch information
zhtaoxiang authored Sep 10, 2022
1 parent 454a1d8 commit ff98c5a
Show file tree
Hide file tree
Showing 8 changed files with 65 additions and 31 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
import org.apache.pinot.common.utils.TarGzCompressionUtils;
import org.apache.pinot.common.utils.http.HttpClient;
import org.apache.pinot.segment.spi.V1Constants;
import org.apache.pinot.segment.spi.creator.name.SegmentNameUtils;
import org.apache.pinot.spi.auth.AuthProvider;
import org.apache.pinot.spi.config.table.TableType;
import org.apache.pinot.spi.filesystem.PinotFS;
Expand Down Expand Up @@ -237,8 +238,10 @@ public static void sendSegmentUriAndMetadata(SegmentGenerationJobSpec spec, Pino
for (String segmentUriPath : segmentUriToTarPathMap.keySet()) {
String tarFilePath = segmentUriToTarPathMap.get(segmentUriPath);
String fileName = new File(tarFilePath).getName();
Preconditions.checkArgument(fileName.endsWith(Constants.TAR_GZ_FILE_EXT));
String segmentName = fileName.substring(0, fileName.length() - Constants.TAR_GZ_FILE_EXT.length());
// segments stored in Pinot deep store do not have .tar.gz extension
String segmentName = fileName.endsWith(Constants.TAR_GZ_FILE_EXT)
? fileName.substring(0, fileName.length() - Constants.TAR_GZ_FILE_EXT.length()) : fileName;
SegmentNameUtils.validatePartialOrFullSegmentName(segmentName);
File segmentMetadataFile = generateSegmentMetadataFile(fileSystem, URI.create(tarFilePath));
AuthProvider authProvider = AuthProviderUtils.makeAuthProvider(spec.getAuthToken());
try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,7 @@ public class FixedSegmentNameGenerator implements SegmentNameGenerator {

public FixedSegmentNameGenerator(String segmentName) {
Preconditions.checkArgument(segmentName != null, "Missing segmentName for FixedSegmentNameGenerator");
Preconditions
.checkArgument(isValidSegmentName(segmentName), "Invalid segmentName: %s for FixedSegmentNameGenerator",
segmentName);
SegmentNameUtils.validatePartialOrFullSegmentName(segmentName);
_segmentName = segmentName;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,14 +66,14 @@ public NormalizedDateSegmentNameGenerator(String tableName, @Nullable String seg
_segmentNamePrefix = segmentNamePrefix != null ? segmentNamePrefix.trim() : tableName;
Preconditions
.checkArgument(_segmentNamePrefix != null, "Missing segmentNamePrefix for NormalizedDateSegmentNameGenerator");
Preconditions.checkArgument(isValidSegmentName(_segmentNamePrefix),
"Invalid segmentNamePrefix: %s for NormalizedDateSegmentNameGenerator", _segmentNamePrefix);
SegmentNameUtils.validatePartialOrFullSegmentName(_segmentNamePrefix);
_excludeSequenceId = excludeSequenceId;
_appendPushType = "APPEND".equalsIgnoreCase(pushType);
_segmentNamePostfix = segmentNamePostfix != null ? segmentNamePostfix.trim() : null;
_appendUUIDToSegmentName = appendUUIDToSegmentName;
Preconditions.checkArgument(_segmentNamePostfix == null || isValidSegmentName(_segmentNamePostfix),
"Invalid segmentNamePostfix: %s for NormalizedDateSegmentNameGenerator", _segmentNamePostfix);
if (_segmentNamePostfix != null) {
SegmentNameUtils.validatePartialOrFullSegmentName(_segmentNamePostfix);
}

// Include time info for APPEND push type
if (_appendPushType) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,27 +20,15 @@

import com.google.common.base.Joiner;
import java.io.Serializable;
import java.util.regex.Pattern;
import javax.annotation.Nullable;


/**
* Interface for segment name generator based on the segment sequence id and time range.
*/
public interface SegmentNameGenerator extends Serializable {
Pattern INVALID_SEGMENT_NAME_REGEX = Pattern.compile(".*[\\\\/:\\*?\"<>|].*");
Joiner JOINER = Joiner.on('_').skipNulls();

/**
* A handy util to validate if segment name is valid.
*
* @param segmentName provide segment name
* @return true if segmentName is valid.
*/
default boolean isValidSegmentName(String segmentName) {
return !INVALID_SEGMENT_NAME_REGEX.matcher(segmentName).matches();
}

/**
* Generates the segment name.
*
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.pinot.segment.spi.creator.name;

import java.util.regex.Pattern;


/**
* Utils for segment names.
*/
public class SegmentNameUtils {
private static final Pattern INVALID_SEGMENT_NAME_REGEX = Pattern.compile(".*[\\\\/:\\*?\"<>|].*");

private SegmentNameUtils() {
}

/**
* A handy util to validate if segment name is valid.
*
* @param partialOrFullSegmentName provide partial or full segment name
*/
public static void validatePartialOrFullSegmentName(String partialOrFullSegmentName) {
if (INVALID_SEGMENT_NAME_REGEX.matcher(partialOrFullSegmentName).matches()) {
throw new IllegalArgumentException("Invalid partial or full segment name: " + partialOrFullSegmentName);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -48,21 +48,23 @@ public SimpleSegmentNameGenerator(String segmentNamePrefix, @Nullable String seg
public SimpleSegmentNameGenerator(String segmentNamePrefix, @Nullable String segmentNamePostfix,
boolean appendUUIDToSegmentName) {
Preconditions.checkArgument(segmentNamePrefix != null, "Missing segmentNamePrefix for SimpleSegmentNameGenerator");
Preconditions.checkArgument(isValidSegmentName(segmentNamePrefix),
"Invalid segmentNamePrefix: %s for SimpleSegmentNameGenerator", segmentNamePrefix);
Preconditions.checkArgument(segmentNamePostfix == null || isValidSegmentName(segmentNamePostfix),
"Invalid segmentNamePostfix: %s for SimpleSegmentNameGenerator", segmentNamePostfix);
SegmentNameUtils.validatePartialOrFullSegmentName(segmentNamePrefix);
if (segmentNamePostfix != null) {
SegmentNameUtils.validatePartialOrFullSegmentName(segmentNamePostfix);
}
_segmentNamePrefix = segmentNamePrefix;
_segmentNamePostfix = segmentNamePostfix;
_appendUUIDToSegmentName = appendUUIDToSegmentName;
}

@Override
public String generateSegmentName(int sequenceId, @Nullable Object minTimeValue, @Nullable Object maxTimeValue) {
Preconditions.checkArgument(minTimeValue == null || isValidSegmentName(minTimeValue.toString()),
"Invalid minTimeValue: %s for SimpleSegmentNameGenerator", minTimeValue);
Preconditions.checkArgument(maxTimeValue == null || isValidSegmentName(maxTimeValue.toString()),
"Invalid maxTimeValue: %s for SimpleSegmentNameGenerator", maxTimeValue);
if (minTimeValue != null) {
SegmentNameUtils.validatePartialOrFullSegmentName(minTimeValue.toString());
}
if (maxTimeValue != null) {
SegmentNameUtils.validatePartialOrFullSegmentName(maxTimeValue.toString());
}

return JOINER.join(_segmentNamePrefix, minTimeValue, maxTimeValue, _segmentNamePostfix,
sequenceId >= 0 ? sequenceId : null, _appendUUIDToSegmentName ? UUID.randomUUID() : null);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ public void testWithMalFormedSegmentName() {
Assert.fail();
} catch (IllegalArgumentException e) {
// Expected
assertEquals(e.getMessage(), "Invalid segmentName: seg*01 for FixedSegmentNameGenerator");
assertEquals(e.getMessage(), "Invalid partial or full segment name: seg*01");
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ public void testWithMalFormedTableNameSegmentNamePostfixTimeValue() {
Assert.fail();
} catch (IllegalArgumentException e) {
// Expected
assertEquals(e.getMessage(), "Invalid maxTimeValue: 12|34 for SimpleSegmentNameGenerator");
assertEquals(e.getMessage(), "Invalid partial or full segment name: 12|34");
}
}
}

0 comments on commit ff98c5a

Please sign in to comment.