diff --git a/.gitignore b/.gitignore index a3c393f..9aad68c 100644 --- a/.gitignore +++ b/.gitignore @@ -15,3 +15,5 @@ out/ /ligradle .DS_Store /doc +/target +target/ diff --git a/.travis.yml b/.travis.yml index 420571e..b215a48 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,10 +1,11 @@ language: java -jdk: - - oraclejdk8 - - oraclejdk7 -script: - - gradle build +sudo: false +dist: trusty + +matrix: + include: + - jdk: openjdk13 after_success: - - gradle jacocoTestReport coveralls + - bash <(curl -s https://codecov.io/bash) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5df71df..8ab630a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,16 @@ Change Log ========== +Version 2.0.0 +-------------------------- +- Bumped minimal java version to java 13 +- StoreReader and StoreWriter implement AutoCloseable +- New config builder +- Reader and writer use generics +- Reader is now fully thread-safe +- Bloom filters could be used for better performance + + Version 1.2.0 *(June 26th 2016)* -------------------------- diff --git a/README.md b/README.md index 4792f15..3865462 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,8 @@ PalDB ========== -[![Build Status](https://travis-ci.org/linkedin/PalDB.svg?branch=master)](https://travis-ci.org/linkedin/PalDB) -[![Coverage Status](https://coveralls.io/repos/linkedin/PalDB/badge.svg?branch=master&service=github)](https://coveralls.io/github/linkedin/PalDB?branch=master) +[![Build Status](https://travis-ci.org/soundvibe/PalDB.svg)](https://travis-ci.org/soundvibe/PalDB) +[![codecov](https://codecov.io/gh/soundvibe/PalDB/branch/java11/graph/badge.svg)](https://codecov.io/gh/soundvibe/PalDB) PalDB is an embeddable write-once key-value store written in Java. @@ -42,29 +42,26 @@ API documentation can be found [here](http://linkedin.github.com/PalDB/doc/javad How to write a store ```java -StoreWriter writer = PalDB.createWriter(new File("store.paldb")); -writer.put("foo", "bar"); -writer.put(1213, new int[] {1, 2, 3}); -writer.close(); +try (var writer = PalDB.createWriter(new File("store.paldb"))) { + writer.put(1213, "foo"); +} ``` How to read a store ```java -StoreReader reader = PalDB.createReader(new File("store.paldb")); -String val1 = reader.get("foo"); -int[] val2 = reader.get(1213); -reader.close(); +try (var reader = PalDB.createReader(new File("store.paldb"))) { + String val = reader.get(1213); +} ``` How to iterate on a store ```java -StoreReader reader = PalDB.createReader(new File("store.paldb")); -Iterable> iterable = reader.iterable(); -for (Map.Entry entry : iterable) { - String key = entry.getKey(); - String value = entry.getValue(); +try (var reader = PalDB.createReader(new File("store.paldb"))) { + for (var entry : reader) { + Integer key = entry.getKey(); + String value = entry.getValue(); + } } -reader.close(); ``` For Scala examples, see [here](https://gist.github.com/mbastian/9b9b49a4b96333da33ec) and [here](https://gist.github.com/mbastian/440a706f5e863bb65622). @@ -75,14 +72,14 @@ Use it PalDB is available on Maven Central, hence just add the following dependency: ``` - com.linkedin.paldb + net.soundvibe paldb - 1.2.0 + 2.0.0 ``` Scala SBT ``` -libraryDependencies += "com.linkedin.paldb" % "paldb" % "1.2.0" +libraryDependencies += "net.soundvibe" % "paldb" % "2.0.0" ``` @@ -104,26 +101,26 @@ No, like a hashtable PalDB stores have no order. Build ----- -PalDB requires Java 6+ and gradle. The target Java version is 6. +PalDB requires Java 13+ and maven. The target Java version is 13. ```bash -gradle build +mvn build ``` Performance tests are run separately from the build ```bash -gradle perfTest +mvn clean test -Dtag=performance ``` Test ---- -We use the TestNG framework for our unit tests. You can run them via the `gradle clean test` command. +We use the JUnit framework for our unit tests. You can run them via the `mvn clean test` command. Coverage -------- -Coverage is run using JaCoCo. You can run a report via `gradle jacocoTestReport`. The report will be generated in `paldb/build/reports/jacoco/test/html/`. +Coverage is run using JaCoCo. You can run a report via `mvn jacoco:report`. The report will be generated in `paldb/build/reports/jacoco/test/html/`. Advanced configuration ---------------------- @@ -132,27 +129,30 @@ Write parameters: + `load.factor`, index load factor (double) [default: 0.75] + `compression.enabled`, enable compression (boolean) [default: false] ++ `bloom.filter.enabled`, enable bloom filter (boolean) [default: false] ++ `bloom.filter.error.factor`, bloom filter error rate (double) [default: 0.01] Read parameters: + `mmap.data.enabled`, enable memory mapping for data (boolean) [default: true] + `mmap.segment.size`, memory map segment size (bytes) [default: 1GB] -+ `cache.enabled`, LRU cache enabled (boolean) [default: false] -+ `cache.bytes`, cache limit (bytes) [default: Xmx - 100MB] -+ `cache.initial.capacity`, cache initial capacity (int) [default: 1000] -+ `cache.load.factor`, cache load factor (double) [default: 0.75] -Configuration values are passed at init time. Example: +Configuration values are passed at init time. Example using fluent builder: ```java -Configuration config = PalDB.newConfiguration(); -config.set(Configuration.CACHE_ENABLED, "true"); -StoreReader reader = PalDB.createReader(new File("store.paldb"), config); +var config = PalDBConfigBuilder.create() + .withMemoryMapSegmentSize(512 * 1024 * 1024) + .withMemoryMapDataEnabled(false) + .withIndexLoadFactor(0.75) + .withEnableCompression(true) + .withEnableBloomFilter(true) + .build(); +StoreReader reader = PalDB.createReader(new File("store.paldb"), config); ``` A few tips on how configuration can affect performance: + Disabling memory mapping will significantly reduce performance as disk seeks will be performed instead. -+ Enabling the cache makes sense when the value size is large and there's a significant cost in deserialization. Otherwise, the cache adds an overhead. The cache is also useful when memory mapping is disabled. ++ Enabling the bloom filter makes sense when you expect to miss finding some values. It will greatly increase read performance in this case. + Compression can be enabled when the store size is a concern and the values are large (e.g. a sparse matrix). By default, PalDB already uses a compact serialization. Snappy is used for compression. Custom serializer @@ -175,20 +175,20 @@ public class PointSerializer implements Serializer { output.writeInt(point.x); output.writeInt(point.y); } - + @Override - public int getWeight(Point instance) { - return 8; + public Class serializedClass() { + return Point.class; } } ``` -The `write` method serializes the instance to the `DataOutput`. The `read` method deserializes from `DataInput` and creates new object instances. The `getWeight` method returns the estimated memory used by an instance in bytes. The latter is used by the cache to evaluate the amount of memory it's currently using. +The `write` method serializes the instance to the `DataOutput`. The `read` method deserializes from `DataInput` and creates new object instances. Serializer implementation should be registered using the `Configuration`: ```java -Configuration configuration = PalDB.newConfiguration(); +var configuration = PalDB.newConfiguration(); configuration.registerSerializer(new PointSerializer()); ``` @@ -205,7 +205,7 @@ Limitations ----------- + PalDB is optimal in replacing the usage of large in-memory data storage but still use memory (off-heap, yet much less) to do its job. Disabling memory mapping and relying on seeks is possible but is not what PalDB has been optimized for. + The size of the index is limited to 2GB. There's no limitation in the data size however. -+ PalDB is not thread-safe at the moment so synchronization should be done externally if multi-threaded. ++ PalDB reader is thread-safe but writer is not thread-safe at the moment so synchronization should be done externally if multi-threaded. Contributions ----------- @@ -215,4 +215,5 @@ Any helpful feedback is more than welcome. This includes feature requests, bug r Copyright & License ------------------- +PalDB © 2019 Linas Naginionis. Licensed under the terms of the Apache License, Version 2.0. PalDB © 2015 LinkedIn Corp. Licensed under the terms of the Apache License, Version 2.0. diff --git a/build.gradle b/build.gradle deleted file mode 100644 index 05e851e..0000000 --- a/build.gradle +++ /dev/null @@ -1,19 +0,0 @@ -buildscript { - repositories { - mavenCentral() - } - dependencies { - classpath "org.kt3k.gradle.plugin:coveralls-gradle-plugin:2.4.0" - } -} - -allprojects { - repositories { - mavenCentral() - } -} - -allprojects { - apply plugin: 'eclipse' - apply plugin: 'idea' -} diff --git a/gradle.properties b/gradle.properties deleted file mode 100644 index a8c1fd0..0000000 --- a/gradle.properties +++ /dev/null @@ -1,8 +0,0 @@ -org.gradle.daemon=true -org.gradle.configureondemand=true -ide.recursive=true -org.gradle.parallel=false - -VERSION_NAME=1.2.0 -GROUP=com.linkedin.paldb -ARCHIVE_NAME=paldb diff --git a/paldb/overview.html b/overview.html similarity index 81% rename from paldb/overview.html rename to overview.html index 057ef2d..97e43be 100644 --- a/paldb/overview.html +++ b/overview.html @@ -2,7 +2,7 @@

PalDB is an embeddable persistent write-once key-value store.

-

Code samples

+

Code samples

How to write a store:
@@ -23,20 +23,17 @@ 

Code samples

How to iterate on a store:
 StoreReader reader = PalDB.createReader(new File("store.paldb"));
-Iterable> iterable = reader.iterable();
-for (Map.Entry entry : iterable) {
+Iterable<Map.Entry<String, String>> iterable = reader.iterable();
+for (Map.Entry<String, String> entry : iterable) {
   String key = entry.getKey();
   String value = entry.getValue();
 }
 reader.close();
 
-

API Changes

-

  • (April 2015) Initial API
-

diff --git a/paldb/build.gradle b/paldb/build.gradle deleted file mode 100644 index ecacd6e..0000000 --- a/paldb/build.gradle +++ /dev/null @@ -1,126 +0,0 @@ -apply plugin: 'java' -apply plugin: 'jacoco' -apply plugin: 'com.github.kt3k.coveralls' -apply plugin: 'signing' -apply plugin: 'maven' - -sourceCompatibility = 1.6 -group = GROUP -version = VERSION_NAME - -test { - useTestNG() -} - -sourceSets { - - perfTest { - java.srcDir file('src/perfTest/java') - resources.srcDir file('src/perfTest/resources') - compileClasspath += sourceSets.main.output + test.output - runtimeClasspath += sourceSets.main.output + test.output - } - -} - -jar { - from sourceSets.main.allSource -} - -task perfTest(type: Test) { - testClassesDir = sourceSets.perfTest.output.classesDir - classpath = sourceSets.perfTest.runtimeClasspath -} - -perfTest { - useTestNG() - - maxHeapSize = "2g" -} - -dependencies { - compile 'org.xerial.snappy:snappy-java:1.0.5' - - testCompile 'org.testng:testng:6.8.8' - testCompile 'commons-lang:commons-lang:2.6' - - perfTestCompile configurations.testCompile - - // Only used for benchmark - perfTestCompile 'org.fusesource.leveldbjni:leveldbjni-all:1.8' - perfTestCompile 'org.rocksdb:rocksdbjni:4.0' -} - -javadoc { - options.overview = "overview.html" -} - -jacocoTestReport { - reports { - html.enabled = true - xml.enabled = true - } -} - -if (project.hasProperty('release')) { - - signing { - sign configurations.archives - } - - task javadocJar(type: Jar) { - classifier = 'javadoc' - from javadoc - } - - task sourcesJar(type: Jar) { - classifier = 'sources' - from sourceSets.main.allSource - } - - artifacts { - archives javadocJar, sourcesJar - } - - uploadArchives { - repositories { - mavenDeployer { - // POM signature - beforeDeployment { MavenDeployment deployment -> signing.signPom(deployment) } - // Target repository - repository(url: "https://oss.sonatype.org/service/local/staging/deploy/maven2/") { - authentication(userName: ossrhUser, password: ossrhPassword) - } - pom.project { - group 'com.linkedin.paldb' - name 'paldb' - description 'Embeddable persistent write-once key-value store' - packaging 'jar' - url 'https://github.com/linkedin/PalDB' - - scm { - connection 'scm:git:https://github.com/linkedin/PalDB.git' - developerConnection 'scm:git@github.com:linkedin/PalDB.git' - url 'https://github.com/linkedin/PalDB' - } - - licenses { - license { - name 'The Apache Software License, Version 2.0' - url 'http://www.apache.org/licenses/LICENSE-2.0.txt' - distribution 'repo' - } - } - - developers { - developer { - id 'mbastian' - name 'Mathieu Bastian' - email 'mbastian@apache.org' - } - } - } - } - } - } -} \ No newline at end of file diff --git a/paldb/src/main/java/com/linkedin/paldb/api/NotFoundException.java b/paldb/src/main/java/com/linkedin/paldb/api/NotFoundException.java deleted file mode 100644 index 9983c28..0000000 --- a/paldb/src/main/java/com/linkedin/paldb/api/NotFoundException.java +++ /dev/null @@ -1,28 +0,0 @@ -/* -* Copyright 2015 LinkedIn Corp. All rights reserved. -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -*/ - -package com.linkedin.paldb.api; - -/** - * Exception returned when an key can't be found. - * - * @see StoreReader - */ -@SuppressWarnings("serial") -public class NotFoundException extends Exception { - - public NotFoundException(Object key) { - super("The key '" + key.toString() + "' couldn't be found"); - } -} diff --git a/paldb/src/main/java/com/linkedin/paldb/api/StoreReader.java b/paldb/src/main/java/com/linkedin/paldb/api/StoreReader.java deleted file mode 100644 index 89052eb..0000000 --- a/paldb/src/main/java/com/linkedin/paldb/api/StoreReader.java +++ /dev/null @@ -1,454 +0,0 @@ -/* -* Copyright 2015 LinkedIn Corp. All rights reserved. -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -*/ - -package com.linkedin.paldb.api; - -import java.io.File; -import java.util.Map; - - -/** - * Main interface to read data from a PalDB store. - *

- * PalDB.createReader() method and then call the - * get() method to fetch. Call the - * close() to liberate resources when done. - */ -public interface StoreReader { - - /** - * Closes the store reader and free resources. - *

- * A closed reader can't be reopened. - */ - public void close(); - - /** - * Returns the reader's configuration. - * - * @return the store configuration - */ - public Configuration getConfiguration(); - - /** - * Returns the store file. - * - * @return file - */ - public File getFile(); - - /** - * Returns the number of keys in the store. - * - * @return key count - */ - public long size(); - - /** - * Gets the value for key or null if not found. - * - * @param key key to fetch - * @param return type - * @return value or null if not found - */ - public K get(Object key); - - /** - * Gets the value for key or defaultValue if not found. - * - * @param key key to fetch - * @param defaultValue default value - * @param return type - * @return value of defaultValue if not found - */ - public K get(Object key, K defaultValue); - - /** - * Gets the int value for key. - * - * @param key key to fetch - * @return int value - * @throws NotFoundException if not found - */ - public int getInt(Object key) - throws NotFoundException; - - /** - * Gets the int value for key or defaultValue if not found. - * - * @param key key to fetch - * @param defaultValue default value - * @return int value or defaultValue if not found - */ - public int getInt(Object key, int defaultValue); - - /** - * Gets the long value for key. - * - * @param key key to fetch - * @return long value - * @throws NotFoundException if not found - */ - public long getLong(Object key) - throws NotFoundException; - - /** - * Gets the long value for key or defaultValue if not found. - * - * @param key key to fetch - * @param defaultValue default value - * @return long value or defaultValue if not found - */ - public long getLong(Object key, long defaultValue); - - /** - * Gets the boolean value for key. - * - * @param key key to fetch - * @return boolean value - * @throws NotFoundException if not found - */ - public boolean getBoolean(Object key) - throws NotFoundException; - - /** - * Gets the boolean value for key or defaultValue if not found. - * - * @param key key to fetch - * @param defaultValue default value - * @return boolean value or defaultValue if not found - */ - public boolean getBoolean(Object key, boolean defaultValue); - - /** - * Gets the float value for key. - * - * @param key key to fetch - * @return float value - * @throws NotFoundException if not found - */ - public float getFloat(Object key) - throws NotFoundException; - - /** - * Gets the float value for key or defaultValue if not found. - * - * @param key key to fetch - * @param defaultValue default value - * @return float value or defaultValue if not found - */ - public float getFloat(Object key, float defaultValue); - - /** - * Gets the double value for key. - * - * @param key key to fetch - * @return double value - * @throws NotFoundException if not found - */ - public double getDouble(Object key) - throws NotFoundException; - - /** - * Gets the double value for key or defaultValue if not found. - * - * @param key key to fetch - * @param defaultValue default value - * @return double value or defaultValue if not found - */ - public double getDouble(Object key, double defaultValue); - - /** - * Gets the short value for key. - * - * @param key key to fetch - * @return short value - * @throws NotFoundException if not found - */ - public short getShort(Object key) - throws NotFoundException; - - /** - * Gets the short value for key or defaultValue if not found. - * - * @param key key to fetch - * @param defaultValue default value - * @return short value or defaultValue if not found - */ - public short getShort(Object key, short defaultValue); - - /** - * Gets the byte value for key. - * - * @param key key to fetch - * @return byte value - * @throws NotFoundException if not found - */ - public byte getByte(Object key) - throws NotFoundException; - - /** - * Gets the byte value for key or defaultValue if not found. - * - * @param key key to fetch - * @param defaultValue default value - * @return byte value or defaultValue if not found - */ - public byte getByte(Object key, byte defaultValue); - - /** - * Gets the string value for key or null if not found. - * - * @param key key to fetch - * @return string value - */ - public String getString(Object key); - - /** - * Gets the string value for key or defaultValue if not found. - * - * @param key key to fetch - * @param defaultValue default value - * @return string value or defaultValue if not found - */ - public String getString(Object key, String defaultValue); - - /** - * Gets the char value for key. - * - * @param key key to fetch - * @return char value - * @throws NotFoundException if not found - */ - public char getChar(Object key) - throws NotFoundException; - - /** - * Gets the char value for key or defaultValue if not found. - * - * @param key key to fetch - * @param defaultValue default value - * @return char value or defaultValue if not found - */ - public char getChar(Object key, char defaultValue); - - /** - * Gets the object array value for key or null if not found. - * - * @param key key to fetch - * @param return type - * @return object array value or null if not found - */ - public K[] getArray(Object key); - - /** - * Gets the object array value for key or defaultValue if not found. - * - * @param key key to fetch - * @param defaultValue default value - * @param return type - * @return object array value or defaultValue if not found - */ - public K[] getArray(Object key, K[] defaultValue); - - /** - * Gets the int array value for key. - * - * @param key key to fetch - * @return int array value - * @throws NotFoundException if not found - */ - public int[] getIntArray(Object key) - throws NotFoundException; - - /** - * Gets the int array value for key or defaultValue if not found. - * - * @param key key to fetch - * @param defaultValue default value - * @return int array value or defaultValue if not found - */ - public int[] getIntArray(Object key, int[] defaultValue); - - /** - * Gets the long array value for key. - * - * @param key key to fetch - * @return long array value - * @throws NotFoundException if not found - */ - public long[] getLongArray(Object key) - throws NotFoundException; - - /** - * Gets the long array value for key or defaultValue if not found. - * - * @param key key to fetch - * @param defaultValue default value - * @return long array value or defaultValue if not found - */ - public long[] getLongArray(Object key, long[] defaultValue); - - /** - * Gets the boolean array value for key. - * - * @param key key to fetch - * @return boolean array value - * @throws NotFoundException if not found - */ - public boolean[] getBooleanArray(Object key) - throws NotFoundException; - - /** - * Gets the boolean array value for key or defaultValue if not found. - * - * @param key key to fetch - * @param defaultValue default value - * @return boolean array value or defaultValue if not found - */ - public boolean[] getBooleanArray(Object key, boolean[] defaultValue); - - /** - * Gets the float array value for key. - * - * @param key key to fetch - * @return float array value - * @throws NotFoundException if not found - */ - public float[] getFloatArray(Object key) - throws NotFoundException; - - /** - * Gets the float array value for key or defaultValue if not found. - * - * @param key key to fetch - * @param defaultValue default value - * @return float array value or defaultValue if not found - */ - public float[] getFloatArray(Object key, float[] defaultValue); - - /** - * Gets the double array value for key. - * - * @param key key to fetch - * @return double array value - * @throws NotFoundException if not found - */ - public double[] getDoubleArray(Object key) - throws NotFoundException; - - /** - * Gets the double array value for key or defaultValue if not found. - * - * @param key key to fetch - * @param defaultValue default value - * @return double array value or defaultValue if not found - */ - public double[] getDoubleArray(Object key, double[] defaultValue); - - /** - * Gets the short array value for key. - * - * @param key key to fetch - * @return short array value - * @throws NotFoundException if not found - */ - public short[] getShortArray(Object key) - throws NotFoundException; - - /** - * Gets the short array value for key or defaultValue if not found. - * - * @param key key to fetch - * @param defaultValue default value - * @return short array value or defaultValue if not found - */ - public short[] getShortArray(Object key, short[] defaultValue); - - /** - * Gets the byte array value for key. - * - * @param key key to fetch - * @return byte array value - * @throws NotFoundException if not found - */ - public byte[] getByteArray(Object key) - throws NotFoundException; - - /** - * Gets the byte array value for key or defaultValue if not found. - * - * @param key key to fetch - * @param defaultValue default value - * @return byte array value or defaultValue if not found - */ - public byte[] getByteArray(Object key, byte[] defaultValue); - - /** - * Gets the char array value for key. - * - * @param key key to fetch - * @return char array value - * @throws NotFoundException if not found - */ - public char[] getCharArray(Object key) - throws NotFoundException; - - /** - * Gets the char array value for key or defaultValue if not found. - * - * @param key key to fetch - * @param defaultValue default value - * @return char array value or defaultValue if not found - */ - public char[] getCharArray(Object key, char[] defaultValue); - - /** - * Gets the string array value for key or null if not found. - * - * @param key key to fetch - * @return string array value or null if not found - * @throws NotFoundException if not found - */ - public String[] getStringArray(Object key) - throws NotFoundException; - - /** - * Gets the string array value for key or defaultValue if not found. - * - * @param key key to fetch - * @param defaultValue default value - * @return string array value or defaultValue if not found - */ - public String[] getStringArray(Object key, String[] defaultValue); - - /** - * Gets the store iterable. - *

- * Note that entry objects are reused. - * - * @param key type - * @param value type - * @return iterable over store - */ - public Iterable> iterable(); - - /** - * Gets the store keys iterable. - * - * @param key type - * @return iterable over keys - */ - public Iterable keys(); -} diff --git a/paldb/src/main/java/com/linkedin/paldb/impl/ReaderImpl.java b/paldb/src/main/java/com/linkedin/paldb/impl/ReaderImpl.java deleted file mode 100644 index ded364a..0000000 --- a/paldb/src/main/java/com/linkedin/paldb/impl/ReaderImpl.java +++ /dev/null @@ -1,426 +0,0 @@ -/* -* Copyright 2015 LinkedIn Corp. All rights reserved. -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -*/ - -package com.linkedin.paldb.impl; - -import com.linkedin.paldb.api.Configuration; -import com.linkedin.paldb.api.NotFoundException; -import com.linkedin.paldb.api.StoreReader; -import com.linkedin.paldb.utils.DataInputOutput; -import java.io.File; -import java.io.IOException; -import java.util.Map; -import java.util.logging.Level; -import java.util.logging.Logger; - - -/** - * Store reader implementation. - */ -public final class ReaderImpl implements StoreReader { - - // Logger - private final static Logger LOGGER = Logger.getLogger(ReaderImpl.class.getName()); - // Configuration - private final Configuration config; - // Buffer - private final DataInputOutput dataInputOutput = new DataInputOutput(); - // Storage - private final StorageReader storage; - // Serialization - private final StorageSerialization serialization; - // Cache - private final StorageCache cache; - // File - private final File file; - // Opened? - private boolean opened; - - /** - * Private constructor. - * - * @param config configuration - * @param file store file - */ - ReaderImpl(Configuration config, File file) { - this.config = config; - this.file = file; - - // Open storage - try { - LOGGER.log(Level.INFO, "Opening reader storage"); - serialization = new StorageSerialization(config); - storage = new StorageReader(config, file); - } catch (IOException ex) { - throw new RuntimeException(ex); - } - opened = true; - - // Cache - cache = StorageCache.initCache(config); - } - - @Override - public void close() { - checkOpen(); - try { - LOGGER.log(Level.INFO, "Closing reader storage"); - storage.close(); - opened = false; - } catch (IOException ex) { - throw new RuntimeException(ex); - } - } - - @Override - public long size() { - checkOpen(); - return storage.getKeyCount(); - } - - @Override - public Configuration getConfiguration() { - return config; - } - - @Override - public File getFile() { - return file; - } - - @Override - public K get(Object key) { - return (K) get(key, null); - } - - @Override - public K get(Object key, K defaultValue) { - checkOpen(); - if (key == null) { - throw new NullPointerException("The key can't be null"); - } - K value = cache.get(key); - if (value == null) { - try { - byte[] valueBytes = storage.get(serialization.serializeKey(key)); - if (valueBytes != null) { - - Object v = serialization.deserialize(dataInputOutput.reset(valueBytes)); - cache.put(key, v); - return (K) v; - } else { - return defaultValue; - } - } catch (Exception ex) { - throw new RuntimeException(ex); - } - } else if (value == StorageCache.NULL_VALUE) { - return null; - } - return value; - } - - @Override - public int getInt(Object key, int defaultValue) { - return get(key, defaultValue); - } - - @Override - public int getInt(Object key) - throws NotFoundException { - Object val = get(key); - if (val == null) { - throw new NotFoundException(key); - } - return ((Integer) val).intValue(); - } - - @Override - public long getLong(Object key, long defaultValue) { - return get(key, defaultValue); - } - - @Override - public long getLong(Object key) - throws NotFoundException { - Object val = get(key); - if (val == null) { - throw new NotFoundException(key); - } - return ((Long) val).longValue(); - } - - @Override - public boolean getBoolean(Object key, boolean defaultValue) { - return get(key, defaultValue); - } - - @Override - public boolean getBoolean(Object key) - throws NotFoundException { - Object val = get(key); - if (val == null) { - throw new NotFoundException(key); - } - return ((Boolean) val).booleanValue(); - } - - @Override - public float getFloat(Object key, float defaultValue) { - return get(key, defaultValue); - } - - @Override - public float getFloat(Object key) - throws NotFoundException { - Object val = get(key); - if (val == null) { - throw new NotFoundException(key); - } - return ((Float) val).floatValue(); - } - - @Override - public double getDouble(Object key, double defaultValue) { - return get(key, defaultValue); - } - - @Override - public double getDouble(Object key) - throws NotFoundException { - Object val = get(key); - if (val == null) { - throw new NotFoundException(key); - } - return ((Double) val).intValue(); - } - - @Override - public short getShort(Object key, short defaultValue) { - return get(key, defaultValue); - } - - @Override - public short getShort(Object key) - throws NotFoundException { - Object val = get(key); - if (val == null) { - throw new NotFoundException(key); - } - return ((Short) val).shortValue(); - } - - @Override - public byte getByte(Object key, byte defaultValue) { - return get(key, defaultValue); - } - - @Override - public byte getByte(Object key) - throws NotFoundException { - Object val = get(key); - if (val == null) { - throw new NotFoundException(key); - } - return ((Byte) val).byteValue(); - } - - @Override - public String getString(Object key, String defaultValue) { - return get(key, defaultValue); - } - - @Override - public String getString(Object key) { - return (String) get(key, null); - } - - @Override - public char getChar(Object key, char defaultValue) { - return get(key, defaultValue); - } - - @Override - public char getChar(Object key) - throws NotFoundException { - Object val = get(key); - if (val == null) { - throw new NotFoundException(key); - } - return ((Character) val).charValue(); - } - - @Override - public K[] getArray(Object key) { - return (K[]) get(key, null); - } - - @Override - public K[] getArray(Object key, K[] defaultValue) { - return (K[]) get(key, defaultValue); - } - - @Override - public int[] getIntArray(Object key, int[] defaultValue) { - return get(key, defaultValue); - } - - @Override - public int[] getIntArray(Object key) - throws NotFoundException { - Object val = get(key); - if (val == null) { - throw new NotFoundException(key); - } - return (int[]) val; - } - - @Override - public long[] getLongArray(Object key, long[] defaultValue) { - return get(key, defaultValue); - } - - @Override - public long[] getLongArray(Object key) - throws NotFoundException { - Object val = get(key); - if (val == null) { - throw new NotFoundException(key); - } - return (long[]) val; - } - - @Override - public boolean[] getBooleanArray(Object key, boolean[] defaultValue) { - return get(key, defaultValue); - } - - @Override - public boolean[] getBooleanArray(Object key) - throws NotFoundException { - Object val = get(key); - if (val == null) { - throw new NotFoundException(key); - } - return (boolean[]) val; - } - - @Override - public float[] getFloatArray(Object key, float[] defaultValue) { - return get(key, defaultValue); - } - - @Override - public float[] getFloatArray(Object key) - throws NotFoundException { - Object val = get(key); - if (val == null) { - throw new NotFoundException(key); - } - return (float[]) val; - } - - @Override - public double[] getDoubleArray(Object key, double[] defaultValue) { - return get(key, defaultValue); - } - - @Override - public double[] getDoubleArray(Object key) - throws NotFoundException { - Object val = get(key); - if (val == null) { - throw new NotFoundException(key); - } - return (double[]) val; - } - - @Override - public short[] getShortArray(Object key, short[] defaultValue) { - return get(key, defaultValue); - } - - @Override - public short[] getShortArray(Object key) - throws NotFoundException { - Object val = get(key); - if (val == null) { - throw new NotFoundException(key); - } - return (short[]) val; - } - - @Override - public byte[] getByteArray(Object key, byte[] defaultValue) { - return get(key, defaultValue); - } - - @Override - public byte[] getByteArray(Object key) - throws NotFoundException { - Object val = get(key); - if (val == null) { - throw new NotFoundException(key); - } - return (byte[]) val; - } - - @Override - public String[] getStringArray(Object key, String[] defaultValue) { - return get(key, defaultValue); - } - - @Override - public String[] getStringArray(Object key) { - return (String[]) get(key, null); - } - - @Override - public char[] getCharArray(Object key, char[] defaultValue) { - return get(key, defaultValue); - } - - @Override - public char[] getCharArray(Object key) - throws NotFoundException { - Object val = get(key); - if (val == null) { - throw new NotFoundException(key); - } - return (char[]) val; - } - - @Override - public Iterable> iterable() { - checkOpen(); - return new ReaderIterable(storage, serialization); - } - - @Override - public Iterable keys() { - checkOpen(); - return new ReaderKeyIterable(storage, serialization); - } - - // UTILITIES - - /** - * Checks if the store is open and throws an exception otherwise. - */ - private void checkOpen() { - if (!opened) { - throw new IllegalStateException("The store is closed"); - } - } -} diff --git a/paldb/src/main/java/com/linkedin/paldb/impl/Serializers.java b/paldb/src/main/java/com/linkedin/paldb/impl/Serializers.java deleted file mode 100644 index 4601a7a..0000000 --- a/paldb/src/main/java/com/linkedin/paldb/impl/Serializers.java +++ /dev/null @@ -1,315 +0,0 @@ -/* -* Copyright 2015 LinkedIn Corp. All rights reserved. -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -*/ - -package com.linkedin.paldb.impl; - -import com.linkedin.paldb.api.Serializer; - -import java.io.DataInput; -import java.io.DataOutput; -import java.io.IOException; -import java.io.ObjectInputStream; -import java.io.ObjectOutputStream; -import java.io.Serializable; -import java.lang.reflect.Array; -import java.lang.reflect.GenericArrayType; -import java.lang.reflect.ParameterizedType; -import java.lang.reflect.Type; -import java.util.Arrays; -import java.util.HashMap; -import java.util.Map; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.logging.Level; -import java.util.logging.Logger; - - -/** - * Manages the custom serializers. - */ -public final class Serializers implements Serializable { - - // Logger - private final static Logger LOGGER = Logger.getLogger(Serializers.class.getName()); - private AtomicInteger COUNTER; - private Map serializers; - private Serializer[] serializersArray; - - /** - * Default constructor. - */ - public Serializers() { - COUNTER = new AtomicInteger(); - serializers = new HashMap(); - serializersArray = new Serializer[0]; - } - - /** - * Registers the serializer. - * - * @param serializer serializer - */ - public synchronized void registerSerializer(Serializer serializer) { - Class objClass = getSerializerType(serializer); - if (!serializers.containsKey(objClass)) { - int index = COUNTER.getAndIncrement(); - serializers.put(objClass, new SerializerWrapper(index, serializer)); - if (serializersArray.length <= index) { - serializersArray = Arrays.copyOf(serializersArray, index + 1); - } - serializersArray[index] = serializer; - - LOGGER.info(String - .format("Registered new serializer '%s' %n for '%s' at index %d", serializer.getClass().getName(), - objClass.getName(), index)); - } - } - - /** - * Get the serializer instance associated with cls or null if not found. - * - * @param cls object class - * @return serializer instance or null if not found - */ - public Serializer getSerializer(Class cls) { - SerializerWrapper w = getSerializerWrapper(cls); - if (w != null) { - return w.serializer; - } - return null; - } - - /** - * Serializes this instance into an output stream. - * - * @param out data output - * @throws IOException if an io error occurs - */ - private void writeObject(ObjectOutputStream out) - throws IOException { - serialize(out, this); - } - - /** - * Serializes this class into a data output. - * - * @param out data output - * @param serializers serializers instance - * @throws IOException if an io error occurs - */ - static void serialize(DataOutput out, Serializers serializers) - throws IOException { - StringBuilder msg = new StringBuilder(String.format("Serialize %d serializer classes:", serializers.serializers.values().size())); - int size = serializers.serializers.values().size(); - - out.writeInt(size); - if (size > 0) { - for (SerializerWrapper sw : serializers.serializers.values()) { - int index = sw.index; - String name = sw.serializer.getClass().getName(); - - out.writeInt(index); - out.writeUTF(name); - - msg.append(String.format("%n (%d) %s", index, name)); - } - LOGGER.info(msg.toString()); - } - } - - /** - * Deserializes this instance from an input stream. - * - * @param in data input - * @throws IOException if an io error occurs - * @throws ClassNotFoundException if a class error occurs - */ - private void readObject(ObjectInputStream in) - throws IOException, ClassNotFoundException { - // Init - COUNTER = new AtomicInteger(); - serializers = new HashMap(); - serializersArray = new Serializer[0]; - - deserialize(in, this); - } - - /** - * Deserializes this class from a data input. - * - * @param in data input - * @param serializers serializers instance - * @throws IOException if an io error occurs - * @throws ClassNotFoundException if a class error occurs - */ - static void deserialize(DataInput in, Serializers serializers) - throws IOException, ClassNotFoundException { - int size = in.readInt(); - if (size > 0) { - StringBuilder msg = new StringBuilder(String.format("Deserialize %d serializer classes:", size)); - - if (serializers.serializersArray.length < size) { - serializers.serializersArray = Arrays.copyOf(serializers.serializersArray, size); - } - - int max = 0; - for (int i = 0; i < size; i++) { - int index = in.readInt(); - max = Math.max(max, index); - String serializerClassName = in.readUTF(); - try { - Class serializerClass = (Class) Class.forName(serializerClassName); - Serializer serializerInstance = serializerClass.newInstance(); - serializers.serializers - .put(getSerializerType(serializerInstance), new SerializerWrapper(index, serializerInstance)); - serializers.serializersArray[index] = serializerInstance; - - msg.append(String.format("%n (%d) %s", index, serializerClassName)); - } catch (Exception ex) { - LOGGER.log(Level.WARNING, (String.format("Can't find the serializer '%s'", serializerClassName)), ex); - } - } - serializers.COUNTER.set(max + 1); - - LOGGER.info(msg.toString()); - } - } - - /** - * Clear all serializers. - */ - void clear() { - LOGGER.info("Clear all serializers"); - - serializers.clear(); - serializersArray = new Serializer[0]; - COUNTER.set(0); - } - - /** - * Returns the serializer index associated with cls. - * - * @param cls object class - * @return serializer index - */ - int getIndex(Class cls) { - return getSerializerWrapper(cls).index; - } - - /** - * Returns the serializer and its index associated with cls. - * - * @param cls object clas - * @return serializer wrapper object - */ - private SerializerWrapper getSerializerWrapper(Class cls) { - SerializerWrapper w = serializers.get(cls); - if (w != null) { - return w; - } else { - // Try with interfaces implemented - for (Class c : cls.getInterfaces()) { - w = serializers.get(c); - if (w != null) { - return w; - } - } - } - return null; - } - - /** - * Returns the serializer given the index. - * - * @param index serializer index - * @return serializer - */ - Serializer getSerializer(int index) { - if (index >= serializersArray.length) { - throw new IllegalArgumentException(String.format("The serializer can't be found at index %d", index)); - } - return serializersArray[index]; - } - - /** - * Inner wrapper class that keeps the index attached to a serializer. - */ - private static class SerializerWrapper implements Serializable { - private int index; - private Serializer serializer; - - /** - * Used by deserialization. - */ - public SerializerWrapper() { - } - - public SerializerWrapper(int index, Serializer serializer) { - this.index = index; - this.serializer = serializer; - } - } - - /** - * Returns the serializer's generic type. - * - * @param instance serializer instance - * @return the class the serializer can serialize - */ - private static Class getSerializerType(Object instance) { - Type type = instance.getClass().getGenericInterfaces()[0]; - if (type instanceof ParameterizedType) { - Class cls = null; - Type clsType = ((ParameterizedType) type).getActualTypeArguments()[0]; - - if (clsType instanceof GenericArrayType) { - // Workaround for Java 6 (JDK bug 7151486) - cls = Array.newInstance((Class) ((GenericArrayType) clsType).getGenericComponentType(), 0).getClass(); - } else { - cls = (Class) clsType; - } - - if (Object.class.equals(cls)) { - throw new RuntimeException("The serializer type can't be object"); - } - return cls; - } else { - throw new RuntimeException(String - .format("The serializer class %s is not generic or has an unknown type", instance.getClass().getName())); - } - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - - Serializers that = (Serializers) o; - - if (serializers.size() != that.serializers.size()) { - return false; - } - for (Map.Entry entry : serializers.entrySet()) { - SerializerWrapper sw = that.serializers.get(entry.getKey()); - if (sw == null || !sw.serializer.getClass().equals(entry.getValue().serializer.getClass())) { - return false; - } - } - - return true; - } -} diff --git a/paldb/src/main/java/com/linkedin/paldb/impl/StorageCache.java b/paldb/src/main/java/com/linkedin/paldb/impl/StorageCache.java deleted file mode 100644 index d40ba39..0000000 --- a/paldb/src/main/java/com/linkedin/paldb/impl/StorageCache.java +++ /dev/null @@ -1,267 +0,0 @@ -/* -* Copyright 2015 LinkedIn Corp. All rights reserved. -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -*/ - -package com.linkedin.paldb.impl; - -import com.linkedin.paldb.api.Configuration; -import com.linkedin.paldb.api.Serializer; -import java.text.DecimalFormat; -import java.util.LinkedHashMap; -import java.util.Map; -import java.util.logging.Level; -import java.util.logging.Logger; - - -/** - * LRU cache configured on the desired size in memory. - *

- *
This cache can be configured with the following properties:
- *
cache.enabled - LRU cache enabled
- *
cache.bytes - cache limit (bytes)
- *
cache.initial.capacity - cache initial capacity
- *
cache.load.factor - cache load factor
- *
- *

- * The cache estimates the size of the objects it contains so it consumes no more than the configured - * memory limit. - */ -public class StorageCache { - // Static null object to recognizes null from missing values - protected static final Object NULL_VALUE = new Object(); - - //Logger - private final static Logger LOGGER = Logger.getLogger(StorageCache.class.getName()); - - /** - * Factory to create and initialize the cache. - * - * @param configuration configuration - * @return new cache - */ - static StorageCache initCache(Configuration configuration) { - if (configuration.getBoolean(Configuration.CACHE_ENABLED) && configuration.getLong(Configuration.CACHE_BYTES) > 0) { - return new StorageCache(configuration); - } else { - return new DisabledCache(); - } - } - - /* - * Memory usage - * 632 bytes for the LinkedHashMap - * 24 bytes theoretical overhead per entry but more like 45 in practice - */ - final static int OVERHEAD = 50; - private final LinkedHashMap cache; - private final Configuration configuration; - private long maxWeight; - private long currentWeight; - - /** - * Cache constructor. - * - * @param config configuration - */ - private StorageCache(Configuration config) { - cache = new LinkedHashMap(config.getInt(Configuration.CACHE_INITIAL_CAPACITY), - config.getFloat(Configuration.CACHE_LOAD_FACTOR), true) { - @Override - protected boolean removeEldestEntry(Map.Entry eldest) { - boolean res = currentWeight > maxWeight; - if (res) { - Object key = eldest.getKey(); - Object value = eldest.getValue(); - currentWeight -= getWeight(key) + getWeight(value) + OVERHEAD; - } - return res; - } - }; - maxWeight = config.getLong(Configuration.CACHE_BYTES); - LOGGER.log(Level.INFO, "Cache initialized with maximum {0} Mb usage", - new DecimalFormat("#,##0.00").format(maxWeight / (1024.0 * 1024.0))); - configuration = config; - } - - /** - * Private constructor used by the DisabledCache inner class. - */ - private StorageCache() { - cache = null; - configuration = null; - } - - /** - * Gets the value in the cache for key or null if not found. - *

- * If the value associated with key exists but is null, returns - * StorageCache.NULL_VALUE. - * - * @param key key to get value for - * @param return type - * @return value, null or StorageCache.NULL_VALUE - */ - public K get(Object key) { - return (K) cache.get(key); - } - - /** - * Returns true if the cache contains key. - * - * @param key key to test presence for - * @return true if found, false otherwise - */ - public boolean contains(Object key) { - return cache.containsKey(key); - } - - /** - * Puts the key/value pair into the cache. - * - * @param key key - * @param value value - */ - public void put(Object key, Object value) { - int weight = getWeight(key) + getWeight(value) + OVERHEAD; - currentWeight += weight; - if (cache.put(key, value == null ? NULL_VALUE : value) != null) { - currentWeight -= weight; - } - } - - /** - * Gets the weight for value. - * - * @param value value to get weight for - * @return weight - */ - private int getWeight(Object value) { - if (value == null) { - return 0; - } - if (value.getClass().isArray()) { - Class cc = value.getClass().getComponentType(); - if (cc.isPrimitive()) { - if (cc.equals(int.class)) { - return ((int[]) value).length * 4; - } else if (cc.equals(long.class)) { - return ((long[]) value).length * 8; - } else if (cc.equals(double.class)) { - return ((double[]) value).length * 8; - } else if (cc.equals(float.class)) { - return ((float[]) value).length * 4; - } else if (cc.equals(boolean.class)) { - return ((boolean[]) value).length * 1; - } else if (cc.equals(byte.class)) { - return ((byte[]) value).length * 1; - } else if (cc.equals(short.class)) { - return ((short[]) value).length * 2; - } else if (cc.equals(char.class)) { - return ((char[]) value).length * 2; - } - } else if (cc.equals(String.class)) { - String[] v = (String[]) value; - int res = 0; - for (int i = 0; i < v.length; i++) { - res += v[i].length() * 2 + 40; - } - return res; - } else if (cc.equals(int[].class)) { - int[][] v = (int[][]) value; - int res = 0; - for (int i = 0; i < v.length; i++) { - res += v[i].length * 4; - } - return res; - } else if (cc.equals(long[].class)) { - long[][] v = (long[][]) value; - int res = 0; - for (int i = 0; i < v.length; i++) { - res += v[i].length * 8; - } - return res; - } else { - Object[] v = (Object[]) value; - int res = 0; - for (int i = 0; i < v.length; i++) { - res += getWeight(v[i]); - } - return res; - } - } else if (value instanceof String) { - return ((String) value).length() * 2 + 40; - } else { - Serializer serializer = configuration.getSerializer(value.getClass()); - if (serializer != null) { - return serializer.getWeight(value); - } - } - return 16; - } - - /** - * Sets the max weight in the cache. - * - * @param maxWeight max weight - */ - public void setMaxWeight(long maxWeight) { - this.maxWeight = maxWeight; - } - - /** - * Gets the cache size. - * - * @return cache size - */ - public int size() { - return cache.size(); - } - - /** - * Gets the cache current weight. - * - * @return weight - */ - public long getWeight() { - return currentWeight; - } - - /** - * Special inner class that overrides all cache's features when the cache is disabled. - */ - private static class DisabledCache extends StorageCache { - - DisabledCache() { - LOGGER.log(Level.INFO, "Cache disabled"); - } - - @Override - public Object get(Object key) { - return null; - } - - @Override - public boolean contains(Object key) { - return false; - } - - @Override - public void put(Object key, Object value) { - } - - @Override - public int size() { - return 0; - } - } -} diff --git a/paldb/src/main/java/com/linkedin/paldb/utils/HashUtils.java b/paldb/src/main/java/com/linkedin/paldb/utils/HashUtils.java deleted file mode 100644 index bcc707b..0000000 --- a/paldb/src/main/java/com/linkedin/paldb/utils/HashUtils.java +++ /dev/null @@ -1,159 +0,0 @@ -/* -* Copyright 2015 LinkedIn Corp. All rights reserved. -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -*/ - -package com.linkedin.paldb.utils; - -import java.util.zip.Checksum; - - -/** - * Hashing utility. - */ -public class HashUtils { - - // Hash implementation - private final Murmur3A hash = new Murmur3A(42); - - /** - * Returns the positive hash for the given bytes. - * - * @param bytes bytes to hash - * @return hash - */ - public int hash(byte[] bytes) { - hash.reset(); - hash.update(bytes); - return hash.getIntValue() & 0x7fffffff; - } - - /** - * Hash implementation, inspired from java-common. - * - * Originally developed for greenrobot by Markus Junginger. - */ - private static class Murmur3A implements Checksum { - - private static final int C1 = 0xcc9e2d51; - private static final int C2 = 0x1b873593; - - private final int seed; - - private int h1; - private int length; - - private int partialK1; - private int partialK1Pos; - - public Murmur3A(int seed) { - this.seed = seed; - h1 = seed; - } - - @Override - public void update(int b) { - switch (partialK1Pos) { - case 0: - partialK1 = 0xff & b; - partialK1Pos = 1; - break; - case 1: - partialK1 |= (0xff & b) << 8; - partialK1Pos = 2; - break; - case 2: - partialK1 |= (0xff & b) << 16; - partialK1Pos = 3; - break; - case 3: - partialK1 |= (0xff & b) << 24; - applyK1(partialK1); - partialK1Pos = 0; - break; - } - length++; - } - - @Override - public void update(byte[] b, int off, int len) { - while (partialK1Pos != 0 && len > 0) { - update(b[off]); - off++; - len--; - } - - int remainder = len & 3; - int stop = off + len - remainder; - for (int i = off; i < stop; i += 4) { - int k1 = getIntLE(b, i); - applyK1(k1); - } - length += stop - off; - - for (int i = 0; i < remainder; i++) { - update(b[stop + i]); - } - } - - public void update(byte[] b) { - update(b, 0, b.length); - } - - private void applyK1(int k1) { - k1 *= C1; - k1 = (k1 << 15) | (k1 >>> 17); // ROTL32(k1,15); - k1 *= C2; - - h1 ^= k1; - h1 = (h1 << 13) | (h1 >>> 19); // ROTL32(h1,13); - h1 = h1 * 5 + 0xe6546b64; - } - - @Override - public long getValue() { - return 0xFFFFFFFFL & getIntValue(); - } - - public int getIntValue() { - int finished = h1; - if (partialK1Pos > 0) { - int k1 = partialK1 * C1; - k1 = (k1 << 15) | (k1 >>> 17); // ROTL32(k1,15); - k1 *= C2; - finished ^= k1; - } - finished ^= length; - - // fmix - finished ^= finished >>> 16; - finished *= 0x85ebca6b; - finished ^= finished >>> 13; - finished *= 0xc2b2ae35; - finished ^= finished >>> 16; - - return finished; - } - - @Override - public void reset() { - h1 = seed; - length = 0; - partialK1Pos = 0; - } - - private int getIntLE(byte[] bytes, int index) { - return (bytes[index] & 0xff) | ((bytes[index + 1] & 0xff) << 8) | - ((bytes[index + 2] & 0xff) << 16) | (bytes[index + 3] << 24); - } - } -} diff --git a/paldb/src/perfTest/java/com/linkedin/paldb/TestReadThroughput.java b/paldb/src/perfTest/java/com/linkedin/paldb/TestReadThroughput.java deleted file mode 100644 index 9549f7e..0000000 --- a/paldb/src/perfTest/java/com/linkedin/paldb/TestReadThroughput.java +++ /dev/null @@ -1,162 +0,0 @@ -/* -* Copyright 2015 LinkedIn Corp. All rights reserved. -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -*/ - -package com.linkedin.paldb; - -import com.linkedin.paldb.api.Configuration; -import com.linkedin.paldb.api.PalDB; -import com.linkedin.paldb.api.StoreReader; -import com.linkedin.paldb.api.StoreWriter; -import com.linkedin.paldb.impl.GenerateTestData; -import com.linkedin.paldb.utils.DirectoryUtils; -import com.linkedin.paldb.utils.NanoBench; -import java.io.File; -import java.util.ArrayList; -import java.util.List; -import java.util.Random; -import org.apache.commons.lang.RandomStringUtils; -import org.testng.annotations.AfterMethod; -import org.testng.annotations.BeforeMethod; -import org.testng.annotations.Test; - - -public class TestReadThroughput { - - private File TEST_FOLDER = new File("testreadthroughput"); - private final int READS = 500000; - - @BeforeMethod - public void setUp() { - DirectoryUtils.deleteDirectory(TEST_FOLDER); - TEST_FOLDER.mkdir(); - } - - @AfterMethod - public void cleanUp() { - DirectoryUtils.deleteDirectory(TEST_FOLDER); - } - - @Test - public void testReadThroughput() { - - List measures = new ArrayList(); - int max = 10000000; - for (int i = 100; i <= max; i *= 10) { - Measure m = measure(i, 0, 0, false); - - measures.add(m); - } - - report("READ THROUGHPUT (Set int -> boolean)", measures); - } - - @Test - public void testReadThroughputWithCache() { - - List measures = new ArrayList(); - int max = 10000000; - for (int i = 100; i <= max; i *= 10) { - Measure m = measure(i, 0, 0.2, false); - - measures.add(m); - } - - report("READ THROUGHPUT WITH CACHE (Set int -> boolean)", measures); - } - - // UTILITY - - private Measure measure(int keysCount, int valueLength, double cacheSizeRatio, final boolean frequentReads) { - - // Generate keys - long seed = 4242; - final Integer[] keys = GenerateTestData.generateRandomIntKeys(keysCount, Integer.MAX_VALUE, seed); - - // Write store - File storeFile = new File(TEST_FOLDER, "paldb" + keysCount + "-" + valueLength + ".store"); - StoreWriter writer = PalDB.createWriter(storeFile, new Configuration()); - for (Integer key : keys) { - if (valueLength == 0) { - writer.put(key.toString(), Boolean.TRUE); - } else { - writer.put(key.toString(), RandomStringUtils.randomAlphabetic(valueLength)); - } - } - writer.close(); - - // Get reader - long cacheSize = 0; - Configuration config = PalDB.newConfiguration(); - if (cacheSizeRatio > 0) { - cacheSize = (long) (storeFile.length() * cacheSizeRatio); - config.set(Configuration.CACHE_ENABLED, "true"); - config.set(Configuration.CACHE_BYTES, String.valueOf(cacheSize)); - } else { - config.set(Configuration.CACHE_ENABLED, "false"); - } - final StoreReader reader = PalDB.createReader(storeFile, config); - - // Measure - NanoBench nanoBench = NanoBench.create(); - nanoBench.cpuOnly().warmUps(5).measurements(20).measure("Measure %d reads for %d keys with cache", new Runnable() { - @Override - public void run() { - Random r = new Random(); - int length = keys.length; - for (int i = 0; i < READS; i++) { - int index; - if (i % 2 == 0 && frequentReads) { - index = r.nextInt(length / 10); - } else { - index = r.nextInt(length); - } - Integer key = keys[index]; - reader.get(key.toString()); - } - } - }); - - // Close - reader.close(); - - // Return measure - double rps = READS * nanoBench.getTps(); - return new Measure(storeFile.length(), rps, valueLength, cacheSize, keys.length); - } - - private void report(String title, List measures) { - System.out.println(title + "\n\n"); - System.out.println("FILE LENGTH;KEYS;RPS"); - for (Measure m : measures) { - System.out.println(m.fileSize + ";" + m.keys + ";" + m.rps); - } - } - - // Measurement class - private static class Measure { - private long fileSize; - private double rps; - private int valueLength; - private long cacheSize; - private int keys; - - private Measure(long fileSize, double rps, int valueLength, long cacheSize, int keys) { - this.fileSize = fileSize; - this.rps = rps; - this.valueLength = valueLength; - this.cacheSize = cacheSize; - this.keys = keys; - } - } -} diff --git a/paldb/src/test/java/com/linkedin/paldb/impl/TestSerializers.java b/paldb/src/test/java/com/linkedin/paldb/impl/TestSerializers.java deleted file mode 100644 index daab9b9..0000000 --- a/paldb/src/test/java/com/linkedin/paldb/impl/TestSerializers.java +++ /dev/null @@ -1,226 +0,0 @@ -/* -* Copyright 2015 LinkedIn Corp. All rights reserved. -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -*/ - -package com.linkedin.paldb.impl; - -import com.linkedin.paldb.api.Serializer; -import com.linkedin.paldb.utils.DataInputOutput; - -import java.awt.*; -import java.io.DataInput; -import java.io.DataOutput; - -import org.testng.Assert; -import org.testng.annotations.BeforeMethod; -import org.testng.annotations.Test; - - -public class TestSerializers { - - private Serializers _serializers; - - @BeforeMethod - public void setUp() { - _serializers = new Serializers(); - } - - @Test - public void testRegister() { - ColorSerializer i = new ColorSerializer(); - _serializers.registerSerializer(i); - Assert.assertSame(_serializers.getSerializer(Color.class), i); - Assert.assertEquals(_serializers.getIndex(Color.class), 0); - } - - @Test - public void testRegisterTwice() { - ColorSerializer i1 = new ColorSerializer(); - ColorSerializer i2 = new ColorSerializer(); - _serializers.registerSerializer(i1); - _serializers.registerSerializer(i2); - Assert.assertSame(_serializers.getSerializer(Color.class), i1); - } - - @Test - public void testRegisterTwo() { - ColorSerializer i = new ColorSerializer(); - PointSerializer f = new PointSerializer(); - _serializers.registerSerializer(i); - _serializers.registerSerializer(f); - Assert.assertSame(_serializers.getSerializer(Color.class), i); - Assert.assertEquals(_serializers.getIndex(Color.class), 0); - Assert.assertSame(_serializers.getSerializer(Point.class), f); - Assert.assertEquals(_serializers.getIndex(Point.class), 1); - } - - @Test - public void testGetSerializer() { - ColorSerializer i = new ColorSerializer(); - _serializers.registerSerializer(i); - Assert.assertNull(_serializers.getSerializer(Point.class)); - Assert.assertNotNull(_serializers.getSerializer(Color.class)); - } - - @Test - public void testGetIndex() { - ColorSerializer i = new ColorSerializer(); - _serializers.registerSerializer(i); - Assert.assertEquals(_serializers.getIndex(Color.class), 0); - } - - @Test - public void testGetByIndex() { - ColorSerializer i = new ColorSerializer(); - _serializers.registerSerializer(i); - Assert.assertSame(_serializers.getSerializer(0), i); - } - - @Test(expectedExceptions = IllegalArgumentException.class) - public void testGetByIndexMissing() { - _serializers.getSerializer(0); - } - - @Test(expectedExceptions = RuntimeException.class) - public void testMissingType() { - MissingTypeSerializer i = new MissingTypeSerializer(); - _serializers.registerSerializer(i); - } - - @Test(expectedExceptions = RuntimeException.class) - public void testObjectType() { - ObjectTypeSerializer i = new ObjectTypeSerializer(); - _serializers.registerSerializer(i); - } - - @Test - public void testSerialize() throws Throwable { - _serializers.registerSerializer(new ColorSerializer()); - DataInputOutput dio = new DataInputOutput(); - Serializers.serialize(dio, _serializers); - byte[] bytes = dio.toByteArray(); - dio = new DataInputOutput(bytes); - _serializers.clear(); - Serializers.deserialize(dio, _serializers); - Assert.assertNotNull(_serializers.getSerializer(Color.class)); - Assert.assertEquals(_serializers.getIndex(Color.class), 0); - Assert.assertNotNull(_serializers.getSerializer(0)); - } - - @Test - public void testInterfaceType() throws Throwable { - SerializerWithInterface i = new SerializerWithInterface(); - _serializers.registerSerializer(i); - Assert.assertSame(_serializers.getSerializer(AnInterface.class), i); - } - - // HELPER - - public static class ColorSerializer implements Serializer { - - @Override - public Color read(DataInput input) { - return null; - } - - @Override - public void write(DataOutput output, Color input) { - - } - - @Override - public int getWeight(Color instance) { - return 0; - } - } - - public static class PointSerializer implements Serializer { - - @Override - public Point read(DataInput input) { - return null; - } - - @Override - public void write(DataOutput output, Point input) { - - } - - @Override - public int getWeight(Point instance) { - return 0; - } - } - - public static class MissingTypeSerializer implements Serializer { - - @Override - public Object read(DataInput input) { - return null; - } - - @Override - public void write(DataOutput output, Object input) { - - } - - @Override - public int getWeight(Object instance) { - return 0; - } - } - - public static class ObjectTypeSerializer implements Serializer { - - @Override - public Object read(DataInput input) { - return null; - } - - @Override - public void write(DataOutput output, Object input) { - - } - - @Override - public int getWeight(Object instance) { - return 0; - } - } - - public static interface AnInterface { - - } - - public static class AClass implements AnInterface { - - } - - public static class SerializerWithInterface implements Serializer { - - @Override - public AnInterface read(DataInput input) { - return null; - } - - @Override - public void write(DataOutput output, AnInterface input) { - - } - - @Override - public int getWeight(AnInterface instance) { - return 0; - } - } -} diff --git a/paldb/src/test/java/com/linkedin/paldb/impl/TestStorageCache.java b/paldb/src/test/java/com/linkedin/paldb/impl/TestStorageCache.java deleted file mode 100644 index bda6e46..0000000 --- a/paldb/src/test/java/com/linkedin/paldb/impl/TestStorageCache.java +++ /dev/null @@ -1,297 +0,0 @@ -/* -* Copyright 2015 LinkedIn Corp. All rights reserved. -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -*/ - -package com.linkedin.paldb.impl; - -import com.linkedin.paldb.api.Configuration; -import com.linkedin.paldb.api.PalDB; -import com.linkedin.paldb.api.Serializer; - -import org.testng.Assert; -import org.testng.annotations.BeforeMethod; -import org.testng.annotations.Test; - - -public class TestStorageCache { - - private final static int ENTRY_SIZE = 16 * 2 + StorageCache.OVERHEAD; - - private Configuration _configuration; - - @BeforeMethod - public void setUp() { - _configuration = PalDB.newConfiguration(); - _configuration.set(Configuration.CACHE_ENABLED, "true"); - } - - @Test - public void testContainsValid() { - StorageCache cache = StorageCache.initCache(_configuration); - cache.put(0, 0); - Assert.assertTrue(cache.contains(0)); - } - - @Test - public void testContainsInValid() { - StorageCache cache = StorageCache.initCache(_configuration); - Assert.assertFalse(cache.contains(0)); - } - - @Test - public void testEmpty() { - StorageCache cache = StorageCache.initCache(_configuration); - Assert.assertNull(cache.get(0)); - Assert.assertEquals(cache.size(), 0); - } - - @Test - public void testPutOneItem() { - StorageCache cache = StorageCache.initCache(_configuration); - cache.put(0, 0); - Assert.assertNotNull(cache.get(0)); - Assert.assertEquals(cache.size(), 1); - } - - @Test - public void testPutTwice() { - Integer second = 1; - StorageCache cache = StorageCache.initCache(_configuration); - cache.put(0, 1); - cache.put(0, second); - Assert.assertSame(cache.get(0), second); - } - - @Test - public void testPutZeroSize() { - StorageCache cache = StorageCache.initCache(_configuration); - cache.setMaxWeight(0); - cache.put(0, 1); - Assert.assertEquals(cache.size(), 0); - } - - @Test - public void testPutTwiceObjectSize() { - StorageCache cache = StorageCache.initCache(_configuration); - cache.setMaxWeight(ENTRY_SIZE); - cache.put(0, 0); - cache.put(1, 1); - Assert.assertEquals(cache.size(), 1); - Assert.assertNull(cache.get(0)); - Assert.assertNotNull(cache.get(1)); - } - - @Test - public void putSameCheckWeight() { - StorageCache cache = StorageCache.initCache(_configuration); - cache.put(0, 0); - long weight = cache.getWeight(); - cache.put(0, 0); - Assert.assertEquals(cache.getWeight(), weight); - } - - @Test - public void testPutGet() { - int objs = 100; - StorageCache cache = StorageCache.initCache(_configuration); - cache.setMaxWeight(ENTRY_SIZE * objs); - for (int i = 0; i < objs; i++) { - cache.put(i, i); - } - Assert.assertEquals(cache.size(), 100); - for (int i = 0; i < objs; i++) { - Assert.assertNotNull(cache.get(i)); - } - } - - @Test - public void testCheckOrder() { - int objs = 100; - int capacity = 50; - StorageCache cache = StorageCache.initCache(_configuration); - cache.setMaxWeight(ENTRY_SIZE * capacity); - for (int i = 0; i < objs; i++) { - cache.put(i, i); - } - Assert.assertEquals(cache.size(), capacity); - for (int i = 0; i < objs; i++) { - if (i < capacity) { - Assert.assertNull(cache.get(i)); - } else { - Assert.assertNotNull(cache.get(i)); - } - } - } - - @Test - public void testCheckAccessOrderGet() { - StorageCache cache = StorageCache.initCache(_configuration); - cache.setMaxWeight(ENTRY_SIZE * 3); - cache.put(0, 0); - cache.put(1, 1); - cache.get(0); - cache.put(2, 2); - Assert.assertEquals(cache.size(), 3); - cache.put(3, 2); - Assert.assertNull(cache.get(1)); - Assert.assertNotNull(cache.get(0)); - } - - @Test - public void testCheckAccessOrderPut() { - StorageCache cache = StorageCache.initCache(_configuration); - cache.setMaxWeight(ENTRY_SIZE * 3); - cache.put(0, 0); - cache.put(1, 1); - cache.put(0, 0); - cache.put(2, 2); - Assert.assertEquals(cache.size(), 3); - cache.put(3, 2); - Assert.assertNull(cache.get(1)); - Assert.assertNotNull(cache.get(0)); - } - - @Test - public void testWeightKeyObjects() { - StorageCache cache = StorageCache.initCache(_configuration); - cache.put(0, 0); - Assert.assertEquals(cache.getWeight(), ENTRY_SIZE); - } - - @Test - public void testWeightKeyArrayObjects() { - StorageCache cache = StorageCache.initCache(_configuration); - cache.put(new Object[]{0, 1}, 0); - Assert.assertEquals(cache.getWeight(), 16 + 32 + StorageCache.OVERHEAD); - } - - @Test - public void testWeightValueIntArrayObjects() { - StorageCache cache = StorageCache.initCache(_configuration); - cache.put(0, new int[]{1, 2}); - Assert.assertEquals(cache.getWeight(), 16 + 8 + StorageCache.OVERHEAD); - } - - @Test - public void testWeightValueLongArrayObjects() { - StorageCache cache = StorageCache.initCache(_configuration); - cache.put(0, new long[]{1, 2}); - Assert.assertEquals(cache.getWeight(), 16 + 16 + StorageCache.OVERHEAD); - } - - @Test - public void testWeightValueDoubleArrayObjects() { - StorageCache cache = StorageCache.initCache(_configuration); - cache.put(0, new double[]{1.0, 2.0}); - Assert.assertEquals(cache.getWeight(), 16 + 16 + StorageCache.OVERHEAD); - } - - @Test - public void testWeightValueFloatArrayObjects() { - StorageCache cache = StorageCache.initCache(_configuration); - cache.put(0, new float[]{1.0F, 2.0F}); - Assert.assertEquals(cache.getWeight(), 16 + 8 + StorageCache.OVERHEAD); - } - - @Test - public void testWeightValueBooleanArrayObjects() { - StorageCache cache = StorageCache.initCache(_configuration); - cache.put(0, new boolean[]{true, false}); - Assert.assertEquals(cache.getWeight(), 16 + 2 + StorageCache.OVERHEAD); - } - - @Test - public void testWeightValueByteArrayObjects() { - StorageCache cache = StorageCache.initCache(_configuration); - cache.put(0, new byte[]{1, 2}); - Assert.assertEquals(cache.getWeight(), 16 + 2 + StorageCache.OVERHEAD); - } - - @Test - public void testWeightValueShortArrayObjects() { - StorageCache cache = StorageCache.initCache(_configuration); - cache.put(0, new short[]{1, 2}); - Assert.assertEquals(cache.getWeight(), 16 + 4 + StorageCache.OVERHEAD); - } - - @Test - public void testWeightValueCharArrayObjects() { - StorageCache cache = StorageCache.initCache(_configuration); - cache.put(0, new char[]{'a', 'b'}); - Assert.assertEquals(cache.getWeight(), 16 + 4 + StorageCache.OVERHEAD); - } - - @Test - public void testWeightValueStringArrayObjects() { - StorageCache cache = StorageCache.initCache(_configuration); - cache.put(0, new String[]{"one", "two"}); - Assert.assertEquals(cache.getWeight(), 16 + 46 * 2 + StorageCache.OVERHEAD); - } - - @Test - public void testWeightValueInt2DArrayObjects() { - StorageCache cache = StorageCache.initCache(_configuration); - cache.put(0, new int[][]{{1, 2}, {3, 4}}); - Assert.assertEquals(cache.getWeight(), 16 + 8 * 2 + StorageCache.OVERHEAD); - } - - @Test - public void testWeightValueLong2DArrayObjects() { - StorageCache cache = StorageCache.initCache(_configuration); - cache.put(0, new long[][]{{1, 2}, {3, 4}}); - Assert.assertEquals(cache.getWeight(), 16 + 16 * 2 + StorageCache.OVERHEAD); - } - - @Test - public void testWeightValueStringObject() { - StorageCache cache = StorageCache.initCache(_configuration); - cache.put(0, new String("one")); - Assert.assertEquals(cache.getWeight(), 16 + 46 + StorageCache.OVERHEAD); - } - - @Test - public void testWeightValueObjectArrayObjects() { - StorageCache cache = StorageCache.initCache(_configuration); - cache.put(0, new Object[]{0, 1}); - Assert.assertEquals(cache.getWeight(), 16 + 32 + StorageCache.OVERHEAD); - } - - @Test - public void testNullValue() { - StorageCache cache = StorageCache.initCache(_configuration); - cache.put(0, null); - Assert.assertEquals(cache.size(), 1); - Assert.assertEquals(cache.get(0), StorageCache.NULL_VALUE); - } - - @Test - public void testDisabled() { - Configuration configuration = new Configuration(); - configuration.set(Configuration.CACHE_ENABLED, "false"); - StorageCache cache = StorageCache.initCache(configuration); - Assert.assertEquals(cache.size(), 0); - Assert.assertNull(cache.get("foo")); - Assert.assertFalse(cache.contains("foo")); - } - - @Test - public void testDisabledPut() { - Configuration configuration = new Configuration(); - configuration.set(Configuration.CACHE_ENABLED, "false"); - StorageCache cache = StorageCache.initCache(configuration); - cache.put(0, "foo"); - Assert.assertEquals(cache.size(), 0); - Assert.assertNull(cache.get("foo")); - Assert.assertFalse(cache.contains("foo")); - } -} diff --git a/paldb/src/test/java/com/linkedin/paldb/impl/TestStore.java b/paldb/src/test/java/com/linkedin/paldb/impl/TestStore.java deleted file mode 100644 index 6ff368b..0000000 --- a/paldb/src/test/java/com/linkedin/paldb/impl/TestStore.java +++ /dev/null @@ -1,598 +0,0 @@ -/* -* Copyright 2015 LinkedIn Corp. All rights reserved. -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -*/ - -package com.linkedin.paldb.impl; - -import com.linkedin.paldb.api.*; -import com.linkedin.paldb.api.PalDB; -import com.linkedin.paldb.utils.DataInputOutput; -import com.linkedin.paldb.utils.FormatVersion; -import com.linkedin.paldb.utils.LongPacker; -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.File; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.util.Arrays; -import java.util.HashSet; -import java.util.Iterator; -import java.util.Map; -import java.util.Set; -import org.testng.Assert; -import org.testng.annotations.AfterClass; -import org.testng.annotations.BeforeClass; -import org.testng.annotations.Test; - - -public class TestStore { - - private final File STORE_FOLDER = new File("data"); - private final File STORE_FILE = new File(STORE_FOLDER, "paldb.dat"); - - @BeforeClass - public void setUp() { - STORE_FILE.delete(); - STORE_FOLDER.delete(); - STORE_FOLDER.mkdir(); - } - - @AfterClass - public void cleanUp() { - STORE_FILE.delete(); - STORE_FOLDER.delete(); - } - - @Test - public void testEmpty() { - StoreWriter writer = PalDB.createWriter(STORE_FILE, new Configuration()); - writer.close(); - - Assert.assertTrue(STORE_FILE.exists()); - - StoreReader reader = PalDB.createReader(STORE_FILE, new Configuration()); - - Assert.assertEquals(reader.size(), 0); - Assert.assertNull(reader.get(1, null)); - - reader.close(); - } - - @Test - public void testEmptyStream() { - ByteArrayOutputStream bos = new ByteArrayOutputStream(); - StoreWriter writer = PalDB.createWriter(bos, new Configuration()); - writer.close(); - - Assert.assertTrue(bos.toByteArray().length > 0); - - ByteArrayInputStream bis = new ByteArrayInputStream(bos.toByteArray()); - StoreReader reader = PalDB.createReader(bis, new Configuration()); - reader.close(); - } - - @Test - public void testEmptyDefaultConfig() { - StoreWriter writer = PalDB.createWriter(STORE_FILE); - writer.close(); - - Assert.assertTrue(STORE_FILE.exists()); - - StoreReader reader = PalDB.createReader(STORE_FILE); - - Assert.assertEquals(reader.size(), 0); - Assert.assertNull(reader.get(1, null)); - - reader.close(); - } - - @Test - public void testNewConfiguration() { - Assert.assertNotNull(PalDB.newConfiguration()); - } - - @Test - public void testNoFolder() { - File file = new File("nofolder.store"); - file.deleteOnExit(); - StoreWriter writer = PalDB.createWriter(file, new Configuration()); - writer.close(); - - Assert.assertTrue(file.exists()); - } - - @Test(expectedExceptions = RuntimeException.class, expectedExceptionsMessageRegExp = ".*not found.*") - public void testReaderFileNotFound() { - PalDB.createReader(new File("notfound"), PalDB.newConfiguration()); - } - - @Test(expectedExceptions = NullPointerException.class) - public void testReaderNullFile() { - PalDB.createReader((File) null, PalDB.newConfiguration()); - } - - @Test(expectedExceptions = NullPointerException.class) - public void testReaderNullConfig() { - PalDB.createReader(new File("notfound"), null); - } - - @Test(expectedExceptions = NullPointerException.class) - public void testReaderNullStream() { - PalDB.createReader((InputStream) null, PalDB.newConfiguration()); - } - - @Test(expectedExceptions = NullPointerException.class) - public void testReaderNullConfigForStream() { - PalDB.createReader(new InputStream() { - @Override - public int read() - throws IOException { - return 0; - } - }, null); - } - - @Test(expectedExceptions = NullPointerException.class) - public void testWriterNullFile() { - PalDB.createWriter((File) null, PalDB.newConfiguration()); - } - - @Test(expectedExceptions = NullPointerException.class) - public void testWriterNullConfig() { - PalDB.createWriter(new File("notfound"), null); - } - - @Test(expectedExceptions = NullPointerException.class) - public void testWriterNullStream() { - PalDB.createWriter((OutputStream) null, PalDB.newConfiguration()); - } - - @Test(expectedExceptions = NullPointerException.class) - public void testWriterNullConfigForStream() { - PalDB.createWriter(new OutputStream() { - @Override - public void write(int i) - throws IOException { - - } - }, null); - } - - @Test(expectedExceptions = IllegalArgumentException.class) - public void testInvalidSegmentSize() { - StoreWriter writer = PalDB.createWriter(STORE_FILE); - writer.close(); - - Configuration config = new Configuration(); - config.set(Configuration.MMAP_SEGMENT_SIZE, String.valueOf(1 + (long) Integer.MAX_VALUE)); - PalDB.createReader(STORE_FILE, config); - } - - @Test - public void testByteMarkEmpty() - throws IOException { - FileOutputStream fos = new FileOutputStream(STORE_FILE); - fos.write(12345); - fos.write(FormatVersion.getPrefixBytes()[0]); - fos.write(3456); - StoreWriter writer = PalDB.createWriter(fos, new Configuration()); - writer.close(); - - StoreReader reader = PalDB.createReader(STORE_FILE, new Configuration()); - - Assert.assertEquals(reader.size(), 0); - Assert.assertNull(reader.get(1, null)); - - reader.close(); - } - - @Test - public void testOneKey() { - StoreWriter writer = PalDB.createWriter(STORE_FILE, new Configuration()); - writer.put(1, "foo"); - writer.close(); - - StoreReader reader = PalDB.createReader(STORE_FILE, new Configuration()); - Assert.assertEquals(reader.size(), 1); - Assert.assertEquals(reader.get(1), "foo"); - reader.close(); - } - - @Test - public void testPutSerializedKey() - throws IOException { - StorageSerialization storageSerialization = new StorageSerialization(new Configuration()); - byte[] serializedKey = storageSerialization.serializeKey(1); - byte[] serializedValue = storageSerialization.serializeValue("foo"); - - StoreWriter writer = PalDB.createWriter(STORE_FILE, new Configuration()); - writer.put(serializedKey, serializedValue); - writer.close(); - - StoreReader reader = PalDB.createReader(STORE_FILE, new Configuration()); - Assert.assertEquals(reader.size(), 1); - Assert.assertEquals(reader.get(1), "foo"); - reader.close(); - } - - @Test - public void testByteMarkOneKey() - throws IOException { - FileOutputStream fos = new FileOutputStream(STORE_FILE); - fos.write(12345); - fos.write(FormatVersion.getPrefixBytes()[0]); - fos.write(3456); - StoreWriter writer = PalDB.createWriter(fos, new Configuration()); - writer.put(1, "foo"); - writer.close(); - - StoreReader reader = PalDB.createReader(STORE_FILE, new Configuration()); - - Assert.assertEquals(reader.size(), 1); - Assert.assertEquals(reader.get(1), "foo"); - reader.close(); - } - - @Test - public void testTwoFirstKeyLength() - throws NotFoundException { - Integer key1 = 1; - Integer key2 = 245; - - //Test key length - testKeyLength(key1, 1); - testKeyLength(key2, 2); - - //Write - writeStore(STORE_FILE, new Object[]{key1, key2}, new Object[]{1, 6}); - - //Read - StoreReader reader = PalDB.createReader(STORE_FILE, new Configuration()); - Assert.assertEquals(reader.getInt(key1), 1); - Assert.assertEquals(reader.getInt(key2), 6); - Assert.assertNull(reader.get(0, null)); - Assert.assertNull(reader.get(6, null)); - Assert.assertNull(reader.get(244, null)); - Assert.assertNull(reader.get(246, null)); - Assert.assertNull(reader.get(1245, null)); - } - - @Test - public void testKeyLengthGap() - throws NotFoundException { - Integer key1 = 1; - Integer key2 = 2450; - - //Test key length - testKeyLength(key1, 1); - testKeyLength(key2, 3); - - //Write - writeStore(STORE_FILE, new Object[]{key1, key2}, new Object[]{1, 6}); - - //Read - StoreReader reader = PalDB.createReader(STORE_FILE, new Configuration()); - Assert.assertEquals(reader.getInt(key1), 1); - Assert.assertEquals(reader.getInt(key2), 6); - Assert.assertNull(reader.get(0, null)); - Assert.assertNull(reader.get(6, null)); - Assert.assertNull(reader.get(244, null)); - Assert.assertNull(reader.get(267, null)); - Assert.assertNull(reader.get(2449, null)); - Assert.assertNull(reader.get(2451, null)); - Assert.assertNull(reader.get(2454441, null)); - } - - @Test - public void testKeyLengthStartTwo() - throws NotFoundException { - Integer key1 = 245; - Integer key2 = 2450; - - //Test key length - testKeyLength(key1, 2); - testKeyLength(key2, 3); - - //Write - writeStore(STORE_FILE, new Object[]{key1, key2}, new Object[]{1, 6}); - - //Read - StoreReader reader = PalDB.createReader(STORE_FILE, new Configuration()); - Assert.assertEquals(reader.getInt(key1), 1); - Assert.assertEquals(reader.getInt(key2), 6); - Assert.assertNull(reader.get(6, null)); - Assert.assertNull(reader.get(244, null)); - Assert.assertNull(reader.get(267, null)); - Assert.assertNull(reader.get(2449, null)); - Assert.assertNull(reader.get(2451, null)); - Assert.assertNull(reader.get(2454441, null)); - } - - @Test(expectedExceptions = RuntimeException.class, expectedExceptionsMessageRegExp = ".*duplicate.*") - public void testDuplicateKeys() { - StoreWriter writer = PalDB.createWriter(STORE_FILE, new Configuration()); - writer.put(0, "ABC"); - writer.put(0, "DGE"); - writer.close(); - } - - @Test - public void testDataOnTwoBuffers() - throws IOException { - Object[] keys = new Object[]{1, 2, 3}; - Object[] values = new Object[]{GenerateTestData.generateStringData(100), GenerateTestData - .generateStringData(10000), GenerateTestData.generateStringData(100)}; - - StorageSerialization serialization = new StorageSerialization(new Configuration()); - int byteSize = serialization.serialize(values[0]).length + serialization.serialize(values[1]).length; - - //Write - writeStore(STORE_FILE, keys, values); - - //Read - Configuration configuration = new Configuration(); - configuration.set(Configuration.MMAP_SEGMENT_SIZE, String.valueOf(byteSize - 100)); - StoreReader reader = PalDB.createReader(STORE_FILE, configuration); - for (int i = 0; i < keys.length; i++) { - Assert.assertEquals(reader.get(keys[i], null), values[i]); - } - } - - @Test - public void testDataSizeOnTwoBuffers() - throws IOException { - Object[] keys = new Object[]{1, 2, 3}; - Object[] values = new Object[]{GenerateTestData.generateStringData(100), GenerateTestData - .generateStringData(10000), GenerateTestData.generateStringData(100)}; - - StorageSerialization serialization = new StorageSerialization(new Configuration()); - byte[] b1 = serialization.serialize(values[0]); - byte[] b2 = serialization.serialize(values[1]); - int byteSize = b1.length + b2.length; - int sizeSize = - LongPacker.packInt(new DataInputOutput(), b1.length) + LongPacker.packInt(new DataInputOutput(), b2.length); - - //Write - writeStore(STORE_FILE, keys, values); - - //Read - Configuration configuration = new Configuration(); - configuration.set(Configuration.MMAP_SEGMENT_SIZE, String.valueOf(byteSize + sizeSize + 3)); - StoreReader reader = PalDB.createReader(STORE_FILE, configuration); - for (int i = 0; i < keys.length; i++) { - Assert.assertEquals(reader.get(keys[i], null), values[i]); - } - } - - @Test - public void testReadStringToString() { - testReadKeyToString(GenerateTestData.generateStringKeys(100)); - } - - @Test - public void testReadIntToString() { - testReadKeyToString(GenerateTestData.generateIntKeys(100)); - } - - @Test - public void testReadDoubleToString() { - testReadKeyToString(GenerateTestData.generateDoubleKeys(100)); - } - - @Test - public void testReadLongToString() { - testReadKeyToString(GenerateTestData.generateLongKeys(100)); - } - - @Test - public void testReadStringToInt() { - testReadKeyToInt(GenerateTestData.generateStringKeys(100)); - } - - @Test - public void testReadByteToInt() { - testReadKeyToInt(GenerateTestData.generateByteKeys(100)); - } - - @Test - public void testReadIntToInt() { - testReadKeyToInt(GenerateTestData.generateIntKeys(100)); - } - - @Test - public void testReadIntToIntArray() { - testReadKeyToIntArray(GenerateTestData.generateIntKeys(100)); - } - - @Test - public void testReadCompoundToString() { - testReadKeyToString(GenerateTestData.generateCompoundKeys(100)); - } - - @Test - public void testReadCompoundByteToString() { - testReadKeyToString(new Object[]{GenerateTestData.generateCompoundByteKey()}); - } - - @Test - public void testReadIntToNull() { - testReadKeyToNull(GenerateTestData.generateIntKeys(100)); - } - - @Test - public void testReadDisk() { - Integer[] keys = GenerateTestData.generateIntKeys(10000); - Configuration configuration = new Configuration(); - - //Write - StoreWriter writer = PalDB.createWriter(STORE_FILE, configuration); - Object[] values = GenerateTestData.generateStringData(keys.length, 1000); - writer.putAll(keys, values); - writer.close(); - - //Read - configuration.set(Configuration.MMAP_DATA_ENABLED, "false"); - StoreReader reader = PalDB.createReader(STORE_FILE, configuration); - Assert.assertEquals(reader.size(), keys.length); - - for (int i = 0; i < keys.length; i++) { - Object key = keys[i]; - Object val = reader.getString(key, null); - Assert.assertNotNull(val); - Assert.assertEquals(val, values[i]); - } - reader.close(); - } - - @Test - public void testIterate() { - Integer[] keys = GenerateTestData.generateIntKeys(100); - String[] values = GenerateTestData.generateStringData(keys.length, 12); - - //Write - writeStore(STORE_FILE, keys, values); - - //Sets - Set keysSet = new HashSet(Arrays.asList(keys)); - Set valuesSet = new HashSet(Arrays.asList(values)); - - //Read - StoreReader reader = PalDB.createReader(STORE_FILE, new Configuration()); - Iterator> itr = reader.iterable().iterator(); - for (int i = 0; i < keys.length; i++) { - Assert.assertTrue(itr.hasNext()); - Map.Entry entry = itr.next(); - Assert.assertNotNull(entry); - Assert.assertTrue(keysSet.remove(entry.getKey())); - Assert.assertTrue(valuesSet.remove(entry.getValue())); - - Object valSearch = reader.get(entry.getKey(), null); - Assert.assertNotNull(valSearch); - Assert.assertEquals(valSearch, entry.getValue()); - } - Assert.assertFalse(itr.hasNext()); - reader.close(); - - Assert.assertTrue(keysSet.isEmpty()); - Assert.assertTrue(valuesSet.isEmpty()); - } - - // UTILITY - - private void testReadKeyToString(Object[] keys) { - // Write - StoreWriter writer = PalDB.createWriter(STORE_FILE, new Configuration()); - Object[] values = GenerateTestData.generateStringData(keys.length, 10); - writer.putAll(keys, values); - writer.close(); - - // Read - StoreReader reader = PalDB.createReader(STORE_FILE, new Configuration()); - Assert.assertEquals(reader.size(), keys.length); - - for (int i = 0; i < keys.length; i++) { - Object key = keys[i]; - Object val = reader.getString(key, null); - Assert.assertNotNull(val); - Assert.assertEquals(val, values[i]); - } - - reader.close(); - } - - private void testReadKeyToInt(Object[] keys) { - // Write - StoreWriter writer = PalDB.createWriter(STORE_FILE, new Configuration()); - Integer[] values = GenerateTestData.generateIntData(keys.length); - writer.putAll(keys, values); - writer.close(); - - // Read - StoreReader reader = PalDB.createReader(STORE_FILE, new Configuration()); - Assert.assertEquals(reader.size(), keys.length); - - for (int i = 0; i < keys.length; i++) { - Object key = keys[i]; - Object val = reader.getInt(key, 0); - Assert.assertNotNull(val); - Assert.assertEquals(val, values[i]); - } - - reader.close(); - } - - private void testReadKeyToNull(Object[] keys) { - //Write - StoreWriter writer = PalDB.createWriter(STORE_FILE, new Configuration()); - Object[] values = new Object[keys.length]; - writer.putAll(keys, values); - writer.close(); - - //Read - StoreReader reader = PalDB.createReader(STORE_FILE, new Configuration()); - Assert.assertEquals(reader.size(), keys.length); - - for (int i = 0; i < keys.length; i++) { - Object key = keys[i]; - Object val = reader.get(key, 0); - Assert.assertNull(val); - } - for (int i = 0; i < keys.length; i++) { - Object key = keys[i]; - Object val = reader.get(key, 0); - Assert.assertNull(val); - } - - reader.close(); - } - - private void testReadKeyToIntArray(Object[] keys) { - //Write - StoreWriter writer = PalDB.createWriter(STORE_FILE, new Configuration()); - int[][] values = GenerateTestData.generateIntArrayData(keys.length, 100); - writer.putAll(keys, values); - writer.close(); - - //Read - StoreReader reader = PalDB.createReader(STORE_FILE, new Configuration()); - Assert.assertEquals(reader.size(), keys.length); - - for (int i = 0; i < keys.length; i++) { - Object key = keys[i]; - int[] val = reader.getIntArray(key, null); - Assert.assertNotNull(val); - Assert.assertEquals(val, values[i]); - } - - reader.close(); - } - - private void writeStore(File location, Object[] keys, Object[] values) { - StoreWriter writer = PalDB.createWriter(location, new Configuration()); - writer.putAll(keys, values); - writer.close(); - } - - private void testKeyLength(Object key, int expectedLength) { - StorageSerialization serializationImpl = new StorageSerialization(new Configuration()); - int keyLength = 0; - try { - keyLength = serializationImpl.serializeKey(key).length; - } catch (IOException e) { - throw new RuntimeException(e); - } - Assert.assertEquals(keyLength, expectedLength); - } -} diff --git a/paldb/src/test/java/com/linkedin/paldb/impl/TestStoreReader.java b/paldb/src/test/java/com/linkedin/paldb/impl/TestStoreReader.java deleted file mode 100644 index 9d1920d..0000000 --- a/paldb/src/test/java/com/linkedin/paldb/impl/TestStoreReader.java +++ /dev/null @@ -1,422 +0,0 @@ -/* -* Copyright 2015 LinkedIn Corp. All rights reserved. -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -*/ - -package com.linkedin.paldb.impl; - -import com.linkedin.paldb.api.*; -import com.linkedin.paldb.api.PalDB; - -import java.awt.*; -import java.io.DataInput; -import java.io.DataOutput; -import java.io.File; -import java.io.IOException; -import java.util.HashSet; -import java.util.Iterator; -import java.util.Map; -import java.util.Set; - -import org.testng.Assert; -import org.testng.annotations.AfterMethod; -import org.testng.annotations.BeforeMethod; -import org.testng.annotations.Test; - - -public class TestStoreReader { - - private final File STORE_FOLDER = new File("data"); - private final File STORE_FILE = new File(STORE_FOLDER, "paldb.dat"); - private StoreReader reader; - - private final Object[] testValues = - new Object[]{true, (byte) 1, 'a', 1.0, 1f, (short) 1, 1, 1l, "foo", new boolean[]{true}, new byte[]{1}, new char[]{'a'}, new double[]{1.0}, new float[]{1f}, new short[]{1}, new int[]{1}, new long[]{1l}, new String[]{"foo"}, new Object[]{"foo"}, new Point( - 4, 56)}; - - @BeforeMethod - public void setUp() { - STORE_FILE.delete(); - STORE_FOLDER.delete(); - STORE_FOLDER.mkdir(); - - Configuration configuration = new Configuration(); - configuration.registerSerializer(new PointSerializer()); - StoreWriter writer = PalDB.createWriter(STORE_FILE, configuration); - for (int i = 0; i < testValues.length; i++) { - writer.put(i, testValues[i]); - } - writer.close(); - - reader = PalDB.createReader(STORE_FILE, new Configuration()); - } - - @AfterMethod - public void cleanUp() { - try { - reader.close(); - } catch (Exception e) { - } - STORE_FILE.delete(); - STORE_FOLDER.delete(); - } - - @Test - public void testFile() { - Assert.assertEquals(reader.getFile(), STORE_FILE); - } - - @Test - public void testSize() { - Assert.assertEquals(reader.size(), testValues.length); - } - - @Test(expectedExceptions = IllegalStateException.class) - public void testStoreClosed() { - reader.close(); - reader.get(0); - } - - @Test - public void testGetBoolean() - throws Throwable { - Assert.assertTrue(reader.getBoolean(0)); - Assert.assertTrue(reader.getBoolean(0, false)); - Assert.assertFalse(reader.getBoolean(-1, false)); - } - - @Test(expectedExceptions = NotFoundException.class) - public void testGetBooleanMissing() - throws Throwable { - reader.getBoolean(-1); - } - - @Test - public void testGetByte() - throws Throwable { - Assert.assertEquals(reader.getByte(1), (byte) 1); - Assert.assertEquals(reader.getByte(1, (byte) 5), (byte) 1); - Assert.assertEquals(reader.getByte(-1, (byte) 5), (byte) 5); - } - - @Test(expectedExceptions = NotFoundException.class) - public void testGetByteMissing() - throws Throwable { - reader.getByte(-1); - } - - @Test - public void testGetChar() - throws Throwable { - Assert.assertEquals(reader.getChar(2), (char) 'a'); - Assert.assertEquals(reader.getChar(2, (char) 'b'), (char) 'a'); - Assert.assertEquals(reader.getChar(-1, (char) 'b'), (char) 'b'); - } - - @Test(expectedExceptions = NotFoundException.class) - public void testGetCharMissing() - throws Throwable { - reader.getChar(-1); - } - - @Test - public void testGetDouble() - throws Throwable { - Assert.assertEquals(reader.getDouble(3), 1.0); - Assert.assertEquals(reader.getDouble(3, 2.0), 1.0); - Assert.assertEquals(reader.getDouble(-1, 2.0), 2.0); - } - - @Test(expectedExceptions = NotFoundException.class) - public void testGetDoubleMissing() - throws Throwable { - reader.getDouble(-1); - } - - @Test - public void testGetFloat() - throws Throwable { - Assert.assertEquals(reader.getFloat(4), 1f); - Assert.assertEquals(reader.getFloat(4, 2f), 1f); - Assert.assertEquals(reader.getFloat(-1, 2f), 2f); - } - - @Test(expectedExceptions = NotFoundException.class) - public void testGetFloatMissing() - throws Throwable { - reader.getFloat(-1); - } - - @Test - public void testGetShort() - throws Throwable { - Assert.assertEquals(reader.getShort(5), (short) 1); - Assert.assertEquals(reader.getShort(5, (short) 2), (short) 1); - Assert.assertEquals(reader.getShort(-1, (short) 2), (short) 2); - } - - @Test(expectedExceptions = NotFoundException.class) - public void testGetShortMissing() - throws Throwable { - reader.getShort(-1); - } - - @Test - public void testGetInt() - throws Throwable { - Assert.assertEquals(reader.getInt(6), 1); - Assert.assertEquals(reader.getInt(6, 2), 1); - Assert.assertEquals(reader.getInt(-1, 2), 2); - } - - @Test(expectedExceptions = NotFoundException.class) - public void testGetIntMissing() - throws Throwable { - reader.getInt(-1); - } - - @Test - public void testGetLong() - throws Throwable { - Assert.assertEquals(reader.getLong(7), 1l); - Assert.assertEquals(reader.getLong(7, 2l), 1l); - Assert.assertEquals(reader.getLong(-1, 2l), 2l); - } - - @Test(expectedExceptions = NotFoundException.class) - public void testGetLongMissing() - throws Throwable { - reader.getLong(-1); - } - - @Test - public void testGetString() - throws Throwable { - Assert.assertEquals(reader.getString(8), "foo"); - Assert.assertEquals(reader.getString(8, "bar"), "foo"); - Assert.assertEquals(reader.getString(-1, "bar"), "bar"); - } - - @Test - public void testGetStringMissing() - throws Throwable { - Assert.assertNull(reader.getString(-1)); - } - - @Test - public void testGetBooleanArray() - throws Throwable { - Assert.assertEquals(reader.getBooleanArray(9), new boolean[]{true}); - Assert.assertEquals(reader.getBooleanArray(9, new boolean[]{false}), new boolean[]{true}); - Assert.assertEquals(reader.getBooleanArray(-1, new boolean[]{false}), new boolean[]{false}); - } - - @Test(expectedExceptions = NotFoundException.class) - public void testGetBooleanArrayMissing() - throws Throwable { - reader.getBooleanArray(-1); - } - - @Test - public void testGetByteArray() - throws Throwable { - Assert.assertEquals(reader.getByteArray(10), new byte[]{1}); - Assert.assertEquals(reader.getByteArray(10, new byte[]{2}), new byte[]{1}); - Assert.assertEquals(reader.getByteArray(-1, new byte[]{2}), new byte[]{2}); - } - - @Test(expectedExceptions = NotFoundException.class) - public void testGetByteArrayMissing() - throws Throwable { - reader.getByteArray(-1); - } - - @Test - public void testGetCharArray() - throws Throwable { - Assert.assertEquals(reader.getCharArray(11), new char[]{'a'}); - Assert.assertEquals(reader.getCharArray(11, new char[]{'b'}), new char[]{'a'}); - Assert.assertEquals(reader.getCharArray(-1, new char[]{'b'}), new char[]{'b'}); - } - - @Test(expectedExceptions = NotFoundException.class) - public void testGetCharArrayMissing() - throws Throwable { - reader.getCharArray(-1); - } - - @Test - public void testGetDoubleArray() - throws Throwable { - Assert.assertEquals(reader.getDoubleArray(12), new double[]{1.0}); - Assert.assertEquals(reader.getDoubleArray(12, new double[]{2.0}), new double[]{1.0}); - Assert.assertEquals(reader.getDoubleArray(-1, new double[]{2.0}), new double[]{2.0}); - } - - @Test(expectedExceptions = NotFoundException.class) - public void testGetDoubleArrayMissing() - throws Throwable { - reader.getDoubleArray(-1); - } - - @Test - public void testGetFloatArray() - throws Throwable { - Assert.assertEquals(reader.getFloatArray(13), new float[]{1f}); - Assert.assertEquals(reader.getFloatArray(13, new float[]{2f}), new float[]{1f}); - Assert.assertEquals(reader.getFloatArray(-1, new float[]{2f}), new float[]{2f}); - } - - @Test(expectedExceptions = NotFoundException.class) - public void testGetFloatArrayMissing() - throws Throwable { - reader.getFloatArray(-1); - } - - @Test - public void testGetShortArray() - throws Throwable { - Assert.assertEquals(reader.getShortArray(14), new short[]{1}); - Assert.assertEquals(reader.getShortArray(14, new short[]{2}), new short[]{1}); - Assert.assertEquals(reader.getShortArray(-1, new short[]{2}), new short[]{2}); - } - - @Test(expectedExceptions = NotFoundException.class) - public void testGetShortArrayMissing() - throws Throwable { - reader.getShortArray(-1); - } - - @Test - public void testGetIntArray() - throws Throwable { - Assert.assertEquals(reader.getIntArray(15), new int[]{1}); - Assert.assertEquals(reader.getIntArray(15, new int[]{2}), new int[]{1}); - Assert.assertEquals(reader.getIntArray(-1, new int[]{2}), new int[]{2}); - } - - @Test(expectedExceptions = NotFoundException.class) - public void testGetIntArrayMissing() - throws Throwable { - reader.getIntArray(-1); - } - - @Test - public void testGetLongArray() - throws Throwable { - Assert.assertEquals(reader.getLongArray(16), new long[]{1l}); - Assert.assertEquals(reader.getLongArray(16, new long[]{2l}), new long[]{1l}); - Assert.assertEquals(reader.getLongArray(-1, new long[]{2l}), new long[]{2l}); - } - - @Test(expectedExceptions = NotFoundException.class) - public void testGetLongArrayMissing() - throws Throwable { - reader.getLongArray(-1); - } - - @Test - public void testGetStringArray() - throws Throwable { - Assert.assertEquals(reader.getStringArray(17), new String[]{"foo"}); - Assert.assertEquals(reader.getStringArray(17, new String[]{"bar"}), new String[]{"foo"}); - Assert.assertEquals(reader.getStringArray(-1, new String[]{"bar"}), new String[]{"bar"}); - } - - @Test - public void testGetStringArrayMissing() - throws Throwable { - Assert.assertNull(reader.getStringArray(-1)); - } - - @Test - public void testGetMissing() - throws Throwable { - Assert.assertNull(reader.get(-1)); - } - - @Test - public void testGetArray() - throws Throwable { - Assert.assertEquals(reader.getArray(18), new Object[]{"foo"}); - Assert.assertEquals(reader.getArray(18, new Object[]{"bar"}), new Object[]{"foo"}); - Assert.assertEquals(reader.getArray(-1, new Object[]{"bar"}), new Object[]{"bar"}); - } - - @Test - public void testGetArrayMissing() - throws Throwable { - Assert.assertNull(reader.getArray(-1)); - } - - @Test - public void testGetPoint() - throws Throwable { - Assert.assertEquals(reader.get(19), new Point(4, 56)); - } - - @Test - public void testIterator() { - Iterable> iter = reader.iterable(); - Assert.assertNotNull(iter); - Iterator> itr = iter.iterator(); - Assert.assertNotNull(itr); - - for (int i = 0; i < testValues.length; i++) { - Assert.assertTrue(itr.hasNext()); - Map.Entry v = itr.next(); - Object val = testValues[v.getKey()]; - Assert.assertEquals(v.getValue(), val); - } - } - - @Test - public void testKeyIterator() { - Iterable iter = reader.keys(); - Assert.assertNotNull(iter); - Iterator itr = iter.iterator(); - Assert.assertNotNull(itr); - - Set actual = new HashSet(); - Set expected = new HashSet(); - for (int i = 0; i < testValues.length; i++) { - Assert.assertTrue(itr.hasNext()); - Integer k = itr.next(); - actual.add(k); - expected.add(i); - } - Assert.assertEquals(actual, expected); - } - - // UTILITY - - public static class PointSerializer implements Serializer { - - @Override - public Point read(DataInput input) - throws IOException { - return new Point(input.readInt(), input.readInt()); - } - - @Override - public void write(DataOutput output, Point input) - throws IOException { - output.writeInt(input.x); - output.writeInt(input.y); - } - - @Override - public int getWeight(Point instance) { - return 8; - } - } -} diff --git a/pom.xml b/pom.xml new file mode 100644 index 0000000..1caba2a --- /dev/null +++ b/pom.xml @@ -0,0 +1,249 @@ + + + 4.0.0 + + net.soundvibe + paldb + 2.0.0 + jar + paldb + Embeddable persistent write-once key-value store + https://github.com/soundvibe/paldb + + + The Apache Software License, Version 2.0 + http://www.apache.org/licenses/LICENSE-2.0.txt + repo + + + + + scm:git:https://github.com/soundvibe/paldb.git + scm:git:https://github.com/soundvibe/paldb.git + https://github.com/soundvibe/paldb.git + HEAD + + + + linasnaginionis + Linas Naginionis + lnaginionis@gmail.com + + + + + 13 + UTF-8 + + + + + ossrh + https://oss.sonatype.org/content/repositories/snapshots + + + ossrh + https://oss.sonatype.org/service/local/staging/deploy/maven2/ + + + + + + + org.xerial.snappy + snappy-java + 1.1.7.3 + + + + org.slf4j + slf4j-api + 1.7.28 + + + + + + org.junit.jupiter + junit-jupiter + 5.4.2 + test + + + + commons-codec + commons-codec + 1.13 + test + + + + commons-lang + commons-lang + 2.6 + test + + + + ch.qos.logback + logback-classic + 1.2.3 + test + + + + org.fusesource.leveldbjni + leveldbjni-all + 1.8 + test + + + + org.rocksdb + rocksdbjni + 4.0 + test + + + + + + + + maven-surefire-plugin + 2.22.2 + + + **/Test*.java + **/*Test.java + **/*Tests.java + **/*TestCase.java + + + **/performance/** + + + + + + org.apache.maven.plugins + maven-compiler-plugin + 3.8.1 + + ${java.version} + ${java.version} + + + + + maven-source-plugin + 3.1.0 + + + attach-sources + verify + + jar + + + + + + + maven-release-plugin + 2.5.3 + + deploy + false + true + + + + + org.jacoco + jacoco-maven-plugin + 0.8.4 + + + + prepare-agent + + + + report + test + + report + + + + + + + org.sonatype.plugins + nexus-staging-maven-plugin + 1.6.8 + true + + ossrh + https://oss.sonatype.org/ + true + + + + + + + + + release + + + + performRelease + true + + + + + + org.apache.maven.plugins + maven-javadoc-plugin + 3.0.1 + + + attach-javadocs + + jar + + + false + -Xdoclint:none + + + + + + + org.apache.maven.plugins + maven-gpg-plugin + 1.6 + + + sign-artifacts + verify + + sign + + + + + + + + + + + diff --git a/settings.gradle b/settings.gradle deleted file mode 100644 index 4191fd1..0000000 --- a/settings.gradle +++ /dev/null @@ -1 +0,0 @@ -include ':paldb' diff --git a/paldb/src/main/java/com/linkedin/paldb/api/Configuration.java b/src/main/java/com/linkedin/paldb/api/Configuration.java similarity index 86% rename from paldb/src/main/java/com/linkedin/paldb/api/Configuration.java rename to src/main/java/com/linkedin/paldb/api/Configuration.java index 1298da4..86ff5cd 100644 --- a/paldb/src/main/java/com/linkedin/paldb/api/Configuration.java +++ b/src/main/java/com/linkedin/paldb/api/Configuration.java @@ -15,11 +15,8 @@ package com.linkedin.paldb.api; import com.linkedin.paldb.impl.Serializers; -import java.io.Serializable; -import java.util.Arrays; -import java.util.HashMap; -import java.util.List; -import java.util.Map; + +import java.util.*; /** @@ -40,31 +37,27 @@ * -Dpaldb.mmap.data.enabled=false). All property names should be prefixed * with paldb. */ -public class Configuration implements Serializable { +public class Configuration { // Buffer segment size - public final static String MMAP_SEGMENT_SIZE = "mmap.segment.size"; + public static final String MMAP_SEGMENT_SIZE = "mmap.segment.size"; // Enable memory mapping for data - public final static String MMAP_DATA_ENABLED = "mmap.data.enabled"; + public static final String MMAP_DATA_ENABLED = "mmap.data.enabled"; // Load factor - public final static String LOAD_FACTOR = "load.factor"; - // Cache enabled - public final static String CACHE_ENABLED = "cache.enabled"; - // Cache limit (in bytes) - public final static String CACHE_BYTES = "cache.bytes"; - // Cache initial capacity - public final static String CACHE_INITIAL_CAPACITY = "cache.initial.capacity"; - // Cache load factor - public final static String CACHE_LOAD_FACTOR = "cache.load.factor"; + public static final String LOAD_FACTOR = "load.factor"; // Enable compression - public final static String COMPRESSION_ENABLED = "compression.enabled"; + public static final String COMPRESSION_ENABLED = "compression.enabled"; + //Enable bloom filter + public static final String BLOOM_FILTER_ENABLED = "bloom.filter.enabled"; + //Bloom filter error rate + public static final String BLOOM_FILTER_ERROR_FACTOR = "bloom.filter.error.factor"; // Property map - protected final Map properties = new HashMap(); + private final Map properties = new HashMap<>(); // Read only - protected final boolean readOnly; + private final boolean readOnly; // Serializers - protected final Serializers serializers; + private final Serializers serializers; /** * Default constructor that initializes default values. @@ -76,14 +69,9 @@ public Configuration() { putWithSystemPropertyDefault(MMAP_SEGMENT_SIZE, "1073741824"); putWithSystemPropertyDefault(MMAP_DATA_ENABLED, "true"); putWithSystemPropertyDefault(LOAD_FACTOR, "0.75"); - putWithSystemPropertyDefault(CACHE_ENABLED, "false"); - putWithSystemPropertyDefault(CACHE_INITIAL_CAPACITY, "1000"); - putWithSystemPropertyDefault(CACHE_LOAD_FACTOR, "0.75"); putWithSystemPropertyDefault(COMPRESSION_ENABLED, "false"); - - //Default cache size: (Xmx - 100mo); - long cacheMemory = Math.max(0, Runtime.getRuntime().maxMemory() - (100 * 1024 * 1024)); - putWithSystemPropertyDefault(CACHE_BYTES, String.valueOf(cacheMemory)); + putWithSystemPropertyDefault(BLOOM_FILTER_ENABLED, "false"); + putWithSystemPropertyDefault(BLOOM_FILTER_ERROR_FACTOR, "0.01"); //Serializers serializers = new Serializers(); @@ -394,21 +382,12 @@ public Class getClass(String key) * The class for with the serializer is being registered is directly extracted from the class definition. * * @param serializer serializer to register + * @param serializer type */ - public void registerSerializer(Serializer serializer) { + public void registerSerializer(Serializer serializer) { serializers.registerSerializer(serializer); } - /** - * Gets the serializer for the given class or null if not found. - * - * @param cls object class - * @return serializer or null if not found - */ - public Serializer getSerializer(Class cls) { - return serializers.getSerializer(cls); - } - public Serializers getSerializers() { return serializers; } @@ -427,11 +406,7 @@ public boolean equals(Object o) { if (!properties.equals(that.properties)) { return false; } - if (!serializers.equals(that.serializers)) { - return false; - } - - return true; + return serializers.equals(that.serializers); } @Override diff --git a/paldb/src/main/java/com/linkedin/paldb/api/PalDB.java b/src/main/java/com/linkedin/paldb/api/PalDB.java similarity index 78% rename from paldb/src/main/java/com/linkedin/paldb/api/PalDB.java rename to src/main/java/com/linkedin/paldb/api/PalDB.java index 35feb30..2f40672 100644 --- a/paldb/src/main/java/com/linkedin/paldb/api/PalDB.java +++ b/src/main/java/com/linkedin/paldb/api/PalDB.java @@ -26,13 +26,7 @@ * This class is the entry point to obtain {@link com.linkedin.paldb.api.StoreReader} and * {@link com.linkedin.paldb.api.StoreWriter} interfaces. */ -public final class PalDB { - - /** - * This class is only static - */ - private PalDB() { - } +public interface PalDB { /** * Creates a store reader from the specified file with a default configuration. @@ -40,9 +34,11 @@ private PalDB() { * The file must exists. * * @param file a PalDB store file + * @param key type + * @param value type * @return a store reader */ - public static StoreReader createReader(File file) { + static StoreReader createReader(File file) { return StoreImpl.createReader(file, newConfiguration()); } @@ -53,9 +49,11 @@ public static StoreReader createReader(File file) { * * @param file a PalDB store file * @param config configuration + * @param key type + * @param value type * @return a store reader */ - public static StoreReader createReader(File file, Configuration config) { + static StoreReader createReader(File file, Configuration config) { return StoreImpl.createReader(file, config); } @@ -67,9 +65,11 @@ public static StoreReader createReader(File file, Configuration config) { * * @param stream an input stream on a PalDB store file * @param config configuration + * @param key type + * @param value type * @return a store reader */ - public static StoreReader createReader(InputStream stream, Configuration config) { + static StoreReader createReader(InputStream stream, Configuration config) { return StoreImpl.createReader(stream, config); } @@ -79,9 +79,11 @@ public static StoreReader createReader(InputStream stream, Configuration config) * The parent folder is created if missing. * * @param file location of the output file + * @param key type + * @param value type * @return a store writer */ - public static StoreWriter createWriter(File file) { + static StoreWriter createWriter(File file) { return StoreImpl.createWriter(file, newConfiguration()); } @@ -92,9 +94,11 @@ public static StoreWriter createWriter(File file) { * * @param file location of the output file * @param config configuration + * @param key type + * @param value type * @return a store writer */ - public static StoreWriter createWriter(File file, Configuration config) { + static StoreWriter createWriter(File file, Configuration config) { return StoreImpl.createWriter(file, config); } @@ -106,9 +110,11 @@ public static StoreWriter createWriter(File file, Configuration config) { * * @param stream output stream * @param config configuration + * @param key type + * @param value type * @return a store writer */ - public static StoreWriter createWriter(OutputStream stream, Configuration config) { + static StoreWriter createWriter(OutputStream stream, Configuration config) { return StoreImpl.createWriter(stream, config); } @@ -117,7 +123,7 @@ public static StoreWriter createWriter(OutputStream stream, Configuration config * * @return new configuration */ - public static Configuration newConfiguration() { + static Configuration newConfiguration() { return new Configuration(); } } diff --git a/src/main/java/com/linkedin/paldb/api/PalDBConfigBuilder.java b/src/main/java/com/linkedin/paldb/api/PalDBConfigBuilder.java new file mode 100644 index 0000000..1727256 --- /dev/null +++ b/src/main/java/com/linkedin/paldb/api/PalDBConfigBuilder.java @@ -0,0 +1,90 @@ +package com.linkedin.paldb.api; + +public final class PalDBConfigBuilder { + + private final Configuration palDbConfiguration; + + private PalDBConfigBuilder() { + this.palDbConfiguration = new Configuration(); + } + + public static PalDBConfigBuilder create() { + return new PalDBConfigBuilder(); + } + + /** + * PalDB configuration property. + *

+ * mmap.segment.size - memory map segment size [default: 1GB] + * @param bytes size in bytes + * @return this {@code CachemeerConfigBuilder} instance (for chaining) + */ + public PalDBConfigBuilder withMemoryMapSegmentSize(long bytes) { + palDbConfiguration.set(Configuration.MMAP_SEGMENT_SIZE, String.valueOf(bytes)); + return this; + } + + /** + * PalDB configuration property. + *

+ * mmap.data.enabled - enable memory mapping for data [default: true] + * @param enabled flag + * @return this {@code CachemeerConfigBuilder} instance (for chaining) + */ + public PalDBConfigBuilder withMemoryMapDataEnabled(boolean enabled) { + palDbConfiguration.set(Configuration.MMAP_DATA_ENABLED, String.valueOf(enabled)); + return this; + } + + /** + * PalDB configuration property. + *

+ * load.factor - index load factor [default: 0.75] + * @param loadFactor load factor value + * @return this {@code CachemeerConfigBuilder} instance (for chaining) + */ + public PalDBConfigBuilder withIndexLoadFactor(double loadFactor) { + palDbConfiguration.set(Configuration.LOAD_FACTOR, String.valueOf(loadFactor)); + return this; + } + + /** + * PalDB configuration property. + *

+ * compression.enabled - enable compression [default: false] + * @param enabled flag + * @return this {@code CachemeerConfigBuilder} instance (for chaining) + */ + public PalDBConfigBuilder withEnableCompression(boolean enabled) { + palDbConfiguration.set(Configuration.COMPRESSION_ENABLED, String.valueOf(enabled)); + return this; + } + + /** + * PalDB configuration property. + *

+ * compression.enabled - enable bloom filter [default: true] + * @param enabled flag + * @return this {@code CachemeerConfigBuilder} instance (for chaining) + */ + public PalDBConfigBuilder withEnableBloomFilter(boolean enabled) { + palDbConfiguration.set(Configuration.BLOOM_FILTER_ENABLED, String.valueOf(enabled)); + return this; + } + + /** + * PalDB configuration property. + *

+ * compression.enabled - bloom filter error rate [default: 0.01] + * @param errorFactor value, e.g. 0.01 equals 1% error rate + * @return this {@code CachemeerConfigBuilder} instance (for chaining) + */ + public PalDBConfigBuilder withBloomFilterErrorFactor(double errorFactor) { + palDbConfiguration.set(Configuration.BLOOM_FILTER_ERROR_FACTOR, String.valueOf(errorFactor)); + return this; + } + + public Configuration build() { + return new Configuration(palDbConfiguration); + } +} diff --git a/paldb/src/main/java/com/linkedin/paldb/api/Serializer.java b/src/main/java/com/linkedin/paldb/api/Serializer.java similarity index 74% rename from paldb/src/main/java/com/linkedin/paldb/api/Serializer.java rename to src/main/java/com/linkedin/paldb/api/Serializer.java index 04b00a1..064e7c5 100644 --- a/paldb/src/main/java/com/linkedin/paldb/api/Serializer.java +++ b/src/main/java/com/linkedin/paldb/api/Serializer.java @@ -39,8 +39,7 @@ public interface Serializer extends Serializable { * @param input instance * @throws IOException if an io error occurs */ - public void write(DataOutput dataOutput, K input) - throws IOException; + void write(DataOutput dataOutput, K input) throws IOException; /** * Reads the data input and creates the instance. @@ -49,15 +48,7 @@ public void write(DataOutput dataOutput, K input) * @return new instance of type K. * @throws IOException if an io error occurs */ - public K read(DataInput dataInput) - throws IOException; + K read(DataInput dataInput) throws IOException; - /** - * Returns the estimate number of bytes used to hold instance in memory. - *

- * This information is used by the cache so it can manages its memory usage. - * @param instance instance to get weight for - * @return the number of bytes the object uses in memory - */ - public int getWeight(K instance); + Class serializedClass(); } diff --git a/src/main/java/com/linkedin/paldb/api/StoreReader.java b/src/main/java/com/linkedin/paldb/api/StoreReader.java new file mode 100644 index 0000000..0e049a4 --- /dev/null +++ b/src/main/java/com/linkedin/paldb/api/StoreReader.java @@ -0,0 +1,101 @@ +/* +* Copyright 2015 LinkedIn Corp. All rights reserved. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +*/ + +package com.linkedin.paldb.api; + +import java.io.File; +import java.util.*; + + +/** + * Main interface to read data from a PalDB store. + *

+ * PalDB.createReader() method and then call the + * get() method to fetch. Call the + * close() to liberate resources when done. + */ +public interface StoreReader extends AutoCloseable, Iterable> { + + /** + * Closes the store reader and free resources. + *

+ * A closed reader can't be reopened. + */ + @Override + void close(); + + /** + * Returns the reader's configuration. + * + * @return the store configuration + */ + Configuration getConfiguration(); + + /** + * Returns the store file. + * + * @return file + */ + File getFile(); + + /** + * Returns the number of keys in the store. + * + * @return key count + */ + long size(); + + /** + * Gets the value for key or null if not found. + * + * @param key key to fetch + * @return value or null if not found + */ + V get(K key); + + /** + * Gets the value for key or defaultValue if not found. + * + * @param key key to fetch + * @param defaultValue default value + * @return value of defaultValue if not found + */ + V get(K key, V defaultValue); + + /** + * Gets the store iterable. + *

+ * Note that entry objects are reused. + * + * @return iterable over store + */ + Iterable> iterable(); + + /** + * Gets the store iterator. + *

+ * Note that entry objects are reused. + * + * @return iterable over store + */ + @Override + Iterator> iterator(); + + /** + * Gets the store keys iterable. + * + * @return iterable over keys + */ + Iterable keys(); +} diff --git a/paldb/src/main/java/com/linkedin/paldb/api/StoreWriter.java b/src/main/java/com/linkedin/paldb/api/StoreWriter.java similarity index 88% rename from paldb/src/main/java/com/linkedin/paldb/api/StoreWriter.java rename to src/main/java/com/linkedin/paldb/api/StoreWriter.java index c8d4b5d..31cc446 100644 --- a/paldb/src/main/java/com/linkedin/paldb/api/StoreWriter.java +++ b/src/main/java/com/linkedin/paldb/api/StoreWriter.java @@ -24,13 +24,14 @@ *

* Note that duplicates aren't allowed. */ -public interface StoreWriter { +public interface StoreWriter extends AutoCloseable { /** * Close the store writer and append the data to the final destination. A * closed writer can't be reopened. */ - public void close(); + @Override + void close(); /** * Return the writer configuration. Configuration values should always be @@ -39,7 +40,7 @@ public interface StoreWriter { * * @return the store configuration */ - public Configuration getConfiguration(); + Configuration getConfiguration(); /** * Put key-value to the store. @@ -49,7 +50,7 @@ public interface StoreWriter { * @throws NullPointerException if key or value is * null */ - public void put(Object key, Object value); + void put(K key, V value); /** * Put multiple key-values to the store. @@ -57,7 +58,7 @@ public interface StoreWriter { * @param keys a collection of keys * @param values a collection of values */ - public void putAll(Object[] keys, Object[] values); + void putAll(K[] keys, V[] values); /** * Put serialized key-value entry to the store.

Use only this method if @@ -68,5 +69,5 @@ public interface StoreWriter { * @throws NullPointerException if key or value is * null */ - public void put(byte[] key, byte[] value); + void put(byte[] key, byte[] value); } diff --git a/src/main/java/com/linkedin/paldb/api/errors/DuplicateKeyException.java b/src/main/java/com/linkedin/paldb/api/errors/DuplicateKeyException.java new file mode 100644 index 0000000..32a7466 --- /dev/null +++ b/src/main/java/com/linkedin/paldb/api/errors/DuplicateKeyException.java @@ -0,0 +1,8 @@ +package com.linkedin.paldb.api.errors; + +public class DuplicateKeyException extends RuntimeException { + + public DuplicateKeyException(String message) { + super(message); + } +} diff --git a/src/main/java/com/linkedin/paldb/api/errors/OutOfDiskSpace.java b/src/main/java/com/linkedin/paldb/api/errors/OutOfDiskSpace.java new file mode 100644 index 0000000..2b9d15e --- /dev/null +++ b/src/main/java/com/linkedin/paldb/api/errors/OutOfDiskSpace.java @@ -0,0 +1,8 @@ +package com.linkedin.paldb.api.errors; + +public class OutOfDiskSpace extends RuntimeException { + + public OutOfDiskSpace(String message) { + super(message); + } +} diff --git a/paldb/src/main/java/com/linkedin/paldb/api/UnsupportedTypeException.java b/src/main/java/com/linkedin/paldb/api/errors/UnsupportedTypeException.java similarity index 90% rename from paldb/src/main/java/com/linkedin/paldb/api/UnsupportedTypeException.java rename to src/main/java/com/linkedin/paldb/api/errors/UnsupportedTypeException.java index 21f719d..282589d 100644 --- a/paldb/src/main/java/com/linkedin/paldb/api/UnsupportedTypeException.java +++ b/src/main/java/com/linkedin/paldb/api/errors/UnsupportedTypeException.java @@ -12,14 +12,15 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */ -package com.linkedin.paldb.api; +package com.linkedin.paldb.api.errors; + +import com.linkedin.paldb.api.StoreReader; /** * Exception returned when a type isn't supported * * @see StoreReader */ -@SuppressWarnings("serial") public class UnsupportedTypeException extends RuntimeException { public UnsupportedTypeException(Object obj) { diff --git a/paldb/src/main/java/com/linkedin/paldb/api/package.html b/src/main/java/com/linkedin/paldb/api/package.html similarity index 100% rename from paldb/src/main/java/com/linkedin/paldb/api/package.html rename to src/main/java/com/linkedin/paldb/api/package.html diff --git a/src/main/java/com/linkedin/paldb/impl/ReaderImpl.java b/src/main/java/com/linkedin/paldb/impl/ReaderImpl.java new file mode 100644 index 0000000..af7ab00 --- /dev/null +++ b/src/main/java/com/linkedin/paldb/impl/ReaderImpl.java @@ -0,0 +1,145 @@ +/* +* Copyright 2015 LinkedIn Corp. All rights reserved. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +*/ + +package com.linkedin.paldb.impl; + +import com.linkedin.paldb.api.*; +import com.linkedin.paldb.utils.DataInputOutput; +import org.slf4j.*; + +import java.io.*; +import java.util.*; + + +/** + * Store reader implementation. + */ +public final class ReaderImpl implements StoreReader { + + // Logger + private static final Logger log = LoggerFactory.getLogger(ReaderImpl.class); + // Configuration + private final Configuration config; + // Storage + private final StorageReader storage; + // Serialization + private final StorageSerialization serialization; + // File + private final File file; + // Opened? + private boolean opened; + + /** + * Private constructor. + * + * @param config configuration + * @param file store file + */ + ReaderImpl(Configuration config, File file) { + this.config = config; + this.file = file; + + // Open storage + try { + log.info("Opening reader storage"); + serialization = new StorageSerialization(config); + storage = new StorageReader(config, file); + } catch (IOException ex) { + throw new UncheckedIOException(ex); + } + opened = true; + } + + @Override + public void close() { + checkOpen(); + try { + log.info("Closing reader storage"); + storage.close(); + opened = false; + } catch (IOException ex) { + throw new UncheckedIOException(ex); + } + } + + @Override + public long size() { + checkOpen(); + return storage.getKeyCount(); + } + + @Override + public Configuration getConfiguration() { + return config; + } + + @Override + public File getFile() { + return file; + } + + @Override + public V get(K key) { + return get(key, null); + } + + @Override + public V get(K key, V defaultValue) { + checkOpen(); + if (key == null) { + throw new NullPointerException("The key can't be null"); + } + + try { + byte[] valueBytes = storage.get(serialization.serializeKey(key)); + if (valueBytes != null) { + return (V) serialization.deserialize(new DataInputOutput(valueBytes)); + } else { + return defaultValue; + } + } catch (IOException ex) { + throw new UncheckedIOException(ex); + } catch (ClassNotFoundException e) { + throw new RuntimeException(e); + } + } + + @Override + public Iterable> iterable() { + checkOpen(); + return new ReaderIterable<>(storage, serialization); + } + + @Override + public Iterator> iterator() { + return iterable().iterator(); + } + + @Override + public Iterable keys() { + checkOpen(); + return new ReaderKeyIterable<>(storage, serialization); + } + + // UTILITIES + + /** + * Checks if the store is open and throws an exception otherwise. + */ + private void checkOpen() { + if (!opened) { + throw new IllegalStateException("The store is closed"); + } + } +} diff --git a/paldb/src/main/java/com/linkedin/paldb/impl/ReaderIterable.java b/src/main/java/com/linkedin/paldb/impl/ReaderIterable.java similarity index 94% rename from paldb/src/main/java/com/linkedin/paldb/impl/ReaderIterable.java rename to src/main/java/com/linkedin/paldb/impl/ReaderIterable.java index 4497152..694e17c 100644 --- a/paldb/src/main/java/com/linkedin/paldb/impl/ReaderIterable.java +++ b/src/main/java/com/linkedin/paldb/impl/ReaderIterable.java @@ -43,7 +43,7 @@ public final class ReaderIterable implements Iterable> { @Override public Iterator> iterator() { - return new ReaderIterator(byteIterable.iterator(), serialization); + return new ReaderIterator<>(byteIterable.iterator(), serialization); } /** @@ -54,7 +54,7 @@ public Iterator> iterator() { private static final class ReaderIterator implements Iterator> { // Reusable entry - private final FastEntry entry = new FastEntry(); + private final FastEntry entry = new FastEntry<>(); // Iterator private final Iterator> byteIterator; // Buffer @@ -125,7 +125,7 @@ public V getValue() { } @Override - public Object setValue(Object value) { + public V setValue(V value) { throw new UnsupportedOperationException("Not supported."); } } diff --git a/paldb/src/main/java/com/linkedin/paldb/impl/ReaderKeyIterable.java b/src/main/java/com/linkedin/paldb/impl/ReaderKeyIterable.java similarity index 92% rename from paldb/src/main/java/com/linkedin/paldb/impl/ReaderKeyIterable.java rename to src/main/java/com/linkedin/paldb/impl/ReaderKeyIterable.java index 505615a..5e33d61 100644 --- a/paldb/src/main/java/com/linkedin/paldb/impl/ReaderKeyIterable.java +++ b/src/main/java/com/linkedin/paldb/impl/ReaderKeyIterable.java @@ -44,7 +44,7 @@ public final class ReaderKeyIterable implements Iterable { @Override public Iterator iterator() { - return new ReaderKeyIterator(byteIterable.iterator(), serialization); + return new ReaderKeyIterator<>(byteIterable.iterator(), serialization); } /** @@ -79,8 +79,7 @@ public boolean hasNext() { public K next() { Map.Entry byteEntry = byteIterator.next(); try { - K key = (K) serialization.deserialize(dataInputOutput.reset(byteEntry.getKey())); - return key; + return (K) serialization.deserialize(dataInputOutput.reset(byteEntry.getKey())); } catch (Exception ex) { throw new RuntimeException(ex); } diff --git a/src/main/java/com/linkedin/paldb/impl/Serializers.java b/src/main/java/com/linkedin/paldb/impl/Serializers.java new file mode 100644 index 0000000..b068be8 --- /dev/null +++ b/src/main/java/com/linkedin/paldb/impl/Serializers.java @@ -0,0 +1,77 @@ +/* +* Copyright 2015 LinkedIn Corp. All rights reserved. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +*/ + +package com.linkedin.paldb.impl; + +import com.linkedin.paldb.api.Serializer; +import org.slf4j.*; + +import java.util.*; + + +/** + * Manages the custom serializers. + */ +public final class Serializers { + + // Logger + private static final Logger log = LoggerFactory.getLogger(Serializers.class); + private final Map> serializerMap; + + /** + * Default constructor. + */ + public Serializers() { + serializerMap = new HashMap<>(); + } + + /** + * Registers the serializer. + * + * @param serializer serializer + * @param serialized class type + */ + public synchronized void registerSerializer(Serializer serializer) { + var className = serializer.serializedClass().getName(); + serializerMap.putIfAbsent(className, serializer); + log.info("Registered new serializer '{}' for '{}'", serializer.getClass().getName(), className); + } + + /** + * Get the serializer instance associated with cls or null if not found. + * + * @param cls object class + * @return serializer instance or null if not found + */ + public Serializer getSerializer(Class cls) { + return getSerializer(cls.getName()); +} + + public Serializer getSerializer(String className) { + return serializerMap.get(className); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (!(o instanceof Serializers)) return false; + final Serializers that = (Serializers) o; + return Objects.equals(serializerMap, that.serializerMap); + } + + @Override + public int hashCode() { + return Objects.hash(serializerMap); + } +} diff --git a/paldb/src/main/java/com/linkedin/paldb/impl/StorageReader.java b/src/main/java/com/linkedin/paldb/impl/StorageReader.java similarity index 55% rename from paldb/src/main/java/com/linkedin/paldb/impl/StorageReader.java rename to src/main/java/com/linkedin/paldb/impl/StorageReader.java index 2a1e22a..7c287d8 100644 --- a/paldb/src/main/java/com/linkedin/paldb/impl/StorageReader.java +++ b/src/main/java/com/linkedin/paldb/impl/StorageReader.java @@ -15,29 +15,16 @@ package com.linkedin.paldb.impl; import com.linkedin.paldb.api.Configuration; -import com.linkedin.paldb.utils.DataInputOutput; -import com.linkedin.paldb.utils.FormatVersion; -import com.linkedin.paldb.utils.HashUtils; -import com.linkedin.paldb.utils.LongPacker; -import java.io.BufferedInputStream; -import java.io.DataInputStream; -import java.io.EOFException; -import java.io.File; -import java.io.FileInputStream; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.io.RandomAccessFile; -import java.nio.ByteBuffer; -import java.nio.MappedByteBuffer; +import com.linkedin.paldb.utils.*; +import org.slf4j.*; + +import java.io.*; +import java.nio.*; import java.nio.channels.FileChannel; -import java.text.DecimalFormat; -import java.text.SimpleDateFormat; -import java.util.Arrays; -import java.util.Calendar; -import java.util.Iterator; -import java.util.Map; -import java.util.logging.Level; -import java.util.logging.Logger; +import java.nio.charset.MalformedInputException; +import java.text.*; +import java.time.Instant; +import java.util.*; /** @@ -46,11 +33,7 @@ public class StorageReader implements Iterable> { // Logger - private final static Logger LOGGER = Logger.getLogger(StorageReader.class.getName()); - // Configuration - private final Configuration config; - // File path - private File path; + private static final Logger log = LoggerFactory.getLogger(StorageReader.class); // Buffer segment size private final long segmentSize; // Number of keys in the index @@ -61,12 +44,6 @@ public class StorageReader implements Iterable> { private final int[] slotSizes; // Number of slots for each key length private final int[] slots; - // Number of different key length - private final int keyLengthCount; - // Max key length - private final int maxKeyLength; - // Offset of the index in the channel - private final int indexOffset; // Offset of the index for different key length private final int[] indexOffsets; // Offset of the data in the channel @@ -75,33 +52,26 @@ public class StorageReader implements Iterable> { private final long[] dataOffsets; // Data size private final long dataSize; - // Index and data buffers - private MappedByteBuffer indexBuffer; - private MappedByteBuffer[] dataBuffers; // FileChannel - private RandomAccessFile mappedFile; - private FileChannel channel; + private final RandomAccessFile mappedFile; + private final FileChannel channel; // Use MMap for data? private final boolean mMapData; // Buffers - private final DataInputOutput sizeBuffer = new DataInputOutput(new byte[5]); - private final byte[] slotBuffer; - - private final HashUtils hashUtils; + private final BloomFilter bloomFilter; + private final MappedByteBuffer indexBuffer; + private final MappedByteBuffer[] dataBuffers; - StorageReader(Configuration configuration, File file) - throws IOException { - path = file; - config = configuration; + StorageReader(Configuration configuration, File file) throws IOException { + // File path + // Configuration if (!file.exists()) { throw new FileNotFoundException("File " + file.getAbsolutePath() + " not found"); } - LOGGER.log(Level.INFO, "Opening file {0}", file.getName()); + log.info("Opening file {}", file.getName()); //Config - segmentSize = config.getLong(Configuration.MMAP_SEGMENT_SIZE); - - hashUtils = new HashUtils(); + segmentSize = configuration.getLong(Configuration.MMAP_SEGMENT_SIZE); // Check valid segmentSize if (segmentSize > Integer.MAX_VALUE) { @@ -112,9 +82,14 @@ public class StorageReader implements Iterable> { //Open file and read metadata long createdAt = 0; FormatVersion formatVersion = null; - FileInputStream inputStream = new FileInputStream(path); - DataInputStream dataInputStream = new DataInputStream(new BufferedInputStream(inputStream)); - try { + + // Offset of the index in the channel + int indexOffset; + int bloomFilterBitSize = 0; + int bloomFilterHashFunctions = 0; + + try (FileInputStream inputStream = new FileInputStream(file); + DataInputStream dataInputStream = new DataInputStream(new BufferedInputStream(inputStream))) { int ignoredBytes = -2; //Byte mark @@ -146,8 +121,28 @@ public class StorageReader implements Iterable> { //Metadata counters keyCount = dataInputStream.readInt(); - keyLengthCount = dataInputStream.readInt(); - maxKeyLength = dataInputStream.readInt(); + + //read bloom filter bit size + bloomFilterBitSize = dataInputStream.readInt(); + //read bloom filter long array size + int bloomFilterLength = dataInputStream.readInt(); + //read bloom filter hash functions + bloomFilterHashFunctions = dataInputStream.readInt(); + if (bloomFilterLength > 0) { + //read bloom filter long array + long[] bits = new long[bloomFilterLength]; + for (int i = 0; i < bloomFilterLength; i++) { + bits[i] = dataInputStream.readLong(); + } + bloomFilter = new BloomFilter(bloomFilterHashFunctions, bloomFilterBitSize, bits); + } else { + bloomFilter = null; + } + + // Number of different key length + final int keyLengthCount = dataInputStream.readInt(); + // Max key length + final int maxKeyLength = dataInputStream.readInt(); //Read offset counts and keys indexOffsets = new int[maxKeyLength + 1]; @@ -156,7 +151,7 @@ public class StorageReader implements Iterable> { slots = new int[maxKeyLength + 1]; slotSizes = new int[maxKeyLength + 1]; - int maxSlotSize = 0; + int mSlotSize = 0; for (int i = 0; i < keyLengthCount; i++) { int keyLength = dataInputStream.readInt(); @@ -166,180 +161,182 @@ public class StorageReader implements Iterable> { indexOffsets[keyLength] = dataInputStream.readInt(); dataOffsets[keyLength] = dataInputStream.readLong(); - maxSlotSize = Math.max(maxSlotSize, slotSizes[keyLength]); - } - - slotBuffer = new byte[maxSlotSize]; - - //Read serializers - try { - Serializers.deserialize(dataInputStream, config.getSerializers()); - } catch (Exception e) { - throw new RuntimeException(); + mSlotSize = Math.max(mSlotSize, slotSizes[keyLength]); } - //Read index and data offset indexOffset = dataInputStream.readInt() + ignoredBytes; dataOffset = dataInputStream.readLong() + ignoredBytes; - } finally { - //Close metadata - dataInputStream.close(); - inputStream.close(); } + //Close metadata //Create Mapped file in read-only mode - mappedFile = new RandomAccessFile(path, "r"); + mappedFile = new RandomAccessFile(file, "r"); channel = mappedFile.getChannel(); - long fileSize = path.length(); - - //Create index buffer - indexBuffer = channel.map(FileChannel.MapMode.READ_ONLY, indexOffset, dataOffset - indexOffset); + long fileSize = file.length(); //Create data buffers dataSize = fileSize - dataOffset; //Check if data size fits in memory map limit - if (!config.getBoolean(Configuration.MMAP_DATA_ENABLED)) { - //Use classical disk read - mMapData = false; - dataBuffers = null; - } else { - //Use Mmap - mMapData = true; - - //Build data buffers - int bufArraySize = (int) (dataSize / segmentSize) + ((dataSize % segmentSize != 0) ? 1 : 0); - dataBuffers = new MappedByteBuffer[bufArraySize]; - int bufIdx = 0; - for (long offset = 0; offset < dataSize; offset += segmentSize) { - long remainingFileSize = dataSize - offset; - long thisSegmentSize = Math.min(segmentSize, remainingFileSize); - dataBuffers[bufIdx++] = channel.map(FileChannel.MapMode.READ_ONLY, dataOffset + offset, thisSegmentSize); - } - } + mMapData = configuration.getBoolean(Configuration.MMAP_DATA_ENABLED); + indexBuffer = initIndexBuffer(channel, indexOffset); + dataBuffers = initDataBuffers(channel); //logging - DecimalFormat integerFormat = new DecimalFormat("#,##0.00"); - StringBuilder statMsg = new StringBuilder("Storage metadata\n"); - statMsg.append(" Created at: " + formatCreatedAt(createdAt) + "\n"); - statMsg.append(" Format version: " + formatVersion.name() + "\n"); - statMsg.append(" Key count: " + keyCount + "\n"); - for (int i = 0; i < keyCounts.length; i++) { - if (keyCounts[i] > 0) { - statMsg.append(" Key count for key length " + i + ": " + keyCounts[i] + "\n"); + if (log.isDebugEnabled()) { + DecimalFormat integerFormat = new DecimalFormat("#,##0.00"); + StringBuilder statMsg = new StringBuilder("Storage metadata\n"); + statMsg.append(" Created at: ").append(formatCreatedAt(createdAt)).append("\n"); + statMsg.append(" Format version: ").append(formatVersion.name()).append("\n"); + statMsg.append(" Key count: ").append(keyCount).append("\n"); + for (int i = 0; i < keyCounts.length; i++) { + if (keyCounts[i] > 0) { + statMsg.append(" Key count for key length ").append(i).append(": ").append(keyCounts[i]).append("\n"); + } } + statMsg.append(" Index size: ").append(integerFormat.format((dataOffset - indexOffset) / (1024.0 * 1024.0))).append(" Mb\n"); + statMsg.append(" Data size: ").append(integerFormat.format((fileSize - dataOffset) / (1024.0 * 1024.0))).append(" Mb\n"); + statMsg.append(" Bloom filter size: ").append(integerFormat.format((bloomFilterBitSize / 8.0) / (1024.0 * 1024.0))).append(" Mb\n"); + statMsg.append(" Bloom filter hashes: ").append(bloomFilterHashFunctions).append("\n"); + log.debug(statMsg.toString()); } - statMsg.append(" Index size: " + integerFormat.format((dataOffset - indexOffset) / (1024.0 * 1024.0)) + " Mb\n"); - statMsg.append(" Data size: " + integerFormat.format((fileSize - dataOffset) / (1024.0 * 1024.0)) + " Mb\n"); - if (mMapData) { - statMsg.append(" Number of memory mapped data buffers: " + dataBuffers.length); - } else { - statMsg.append(" Memory mapped data disabled, using disk"); + } + + private MappedByteBuffer initIndexBuffer(FileChannel channel, int indexOffset) { + try { + return channel.map(FileChannel.MapMode.READ_ONLY, indexOffset, dataOffset - indexOffset); + } catch (IOException e) { + throw new UncheckedIOException(e); } - LOGGER.info(statMsg.toString()); + } + + private MappedByteBuffer[] initDataBuffers(FileChannel channel) { + int bufArraySize = (int) (dataSize / segmentSize) + ((dataSize % segmentSize != 0) ? 1 : 0); + MappedByteBuffer[] result = new MappedByteBuffer[bufArraySize]; + int bufIdx = 0; + for (long offset = 0; offset < dataSize; offset += segmentSize) { + long remainingFileSize = dataSize - offset; + long thisSegmentSize = Math.min(segmentSize, remainingFileSize); + try { + result[bufIdx++] = channel.map(FileChannel.MapMode.READ_ONLY, dataOffset + offset, thisSegmentSize); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } + return result; } //Get the value for the given key or null - public byte[] get(byte[] key) - throws IOException { + public byte[] get(byte[] key) throws IOException { int keyLength = key.length; if (keyLength >= slots.length || keyCounts[keyLength] == 0) { return null; } - long hash = (long) hashUtils.hash(key); + + if (bloomFilter != null && !bloomFilter.mightContain(key)) { + return null; + } + int numSlots = slots[keyLength]; int slotSize = slotSizes[keyLength]; - int indexOffset = indexOffsets[keyLength]; - long dataOffset = dataOffsets[keyLength]; - + int ixOffset = indexOffsets[keyLength]; + long dtOffset = dataOffsets[keyLength]; + long hash = Murmur3.hash(key); + var slotBuffer = new byte[slotSize]; for (int probe = 0; probe < numSlots; probe++) { int slot = (int) ((hash + probe) % numSlots); - indexBuffer.position(indexOffset + slot * slotSize); - indexBuffer.get(slotBuffer, 0, slotSize); - + indexBuffer.get(ixOffset + slot * slotSize, slotBuffer, 0, slotSize); long offset = LongPacker.unpackLong(slotBuffer, keyLength); - if (offset == 0) { + if (offset == 0L) { return null; } if (isKey(slotBuffer, key)) { - byte[] value = mMapData ? getMMapBytes(dataOffset + offset) : getDiskBytes(dataOffset + offset); - return value; + return mMapData ? getMMapBytes(dtOffset + offset) : getDiskBytes(dtOffset + offset); } } return null; } private boolean isKey(byte[] slotBuffer, byte[] key) { - for (int i = 0; i < key.length; i++) { - if (slotBuffer[i] != key[i]) { - return false; - } - } - return true; + return Arrays.compare(slotBuffer, 0, key.length, key, 0, key.length) == 0; } //Close the reader channel - public void close() - throws IOException { + public void close() throws IOException { channel.close(); mappedFile.close(); - indexBuffer = null; - dataBuffers = null; - mappedFile = null; - channel = null; - System.gc(); } public int getKeyCount() { return keyCount; } + private static int remaining(ByteBuffer buffer, int pos) { + return buffer.limit() - pos; + } + //Read the data at the given offset, the data can be spread over multiple data buffers - private byte[] getMMapBytes(long offset) - throws IOException { - //Read the first 4 bytes to get the size of the data - ByteBuffer buf = getDataBuffer(offset); - int maxLen = (int) Math.min(5, dataSize - offset); + private byte[] getMMapBytes(long offset) throws IOException { + var buf = dataBuffers[(int) (offset / segmentSize)]; + var pos = (int) (offset % segmentSize); - int size; - if (buf.remaining() >= maxLen) { + int maxLen = (int) Math.min(5, dataSize - offset); + //Read the first 4 bytes to get the size of the data + int size = -1; + if (remaining(buf, pos) >= maxLen) { //Continuous read - int pos = buf.position(); - size = LongPacker.unpackInt(buf); + int oldPos = pos; + + //unpack int + for (int i = 0, result = 0; i < 32; i += 7) { + int b = buf.get(pos++) & 0xffff; + result |= (b & 0x7F) << i; + if ((b & 0x80) == 0) { + size = result; + break; + } + } + if (size == -1) throw new MalformedInputException(Integer.BYTES); // Used in case of data is spread over multiple buffers - offset += buf.position() - pos; + offset += pos - oldPos; } else { //The size of the data is spread over multiple buffers int len = maxLen; int off = 0; - sizeBuffer.reset(); - while (len > 0) { - buf = getDataBuffer(offset + off); - int count = Math.min(len, buf.remaining()); - buf.get(sizeBuffer.getBuf(), off, count); - off += count; - len -= count; + + try (var sizeBuffer = new DataInputOutput(new byte[5])) { + while (len > 0) { + buf = dataBuffers[(int) ( (offset + off) / segmentSize)]; + pos = (int) ( (offset + off) % segmentSize); + int count = Math.min(len, remaining(buf, pos)); + buf.get(pos, sizeBuffer.getBuf(), off, count); + off += count; + len -= count; + } + size = LongPacker.unpackInt(sizeBuffer); + offset += sizeBuffer.getPos(); + + buf = dataBuffers[(int) (offset / segmentSize)]; + pos = (int) (offset % segmentSize); } - size = LongPacker.unpackInt(sizeBuffer); - offset += sizeBuffer.getPos(); - buf = getDataBuffer(offset); } //Create output bytes byte[] res = new byte[size]; //Check if the data is one buffer - if (buf.remaining() >= size) { + if (remaining(buf, pos) >= size) { //Continuous read - buf.get(res, 0, size); + buf.get(pos, res, 0, size); } else { int len = size; int off = 0; while (len > 0) { - buf = getDataBuffer(offset); - int count = Math.min(len, buf.remaining()); - buf.get(res, off, count); + buf = dataBuffers[(int) (offset / segmentSize)]; + pos = (int) (offset % segmentSize); + int count = Math.min(len, remaining(buf, pos)); + buf.get(pos, res, off, count); offset += count; off += count; len -= count; @@ -350,8 +347,7 @@ private byte[] getMMapBytes(long offset) } //Get data from disk - private byte[] getDiskBytes(long offset) - throws IOException { + private synchronized byte[] getDiskBytes(long offset) throws IOException { mappedFile.seek(dataOffset + offset); //Get size of data @@ -368,18 +364,8 @@ private byte[] getDiskBytes(long offset) return res; } - //Return the data buffer for the given position - private ByteBuffer getDataBuffer(long index) { - ByteBuffer buf = dataBuffers[(int) (index / segmentSize)]; - buf.position((int) (index % segmentSize)); - return buf; - } - private String formatCreatedAt(long createdAt) { - SimpleDateFormat sdf = new SimpleDateFormat("yyyy.MM.dd G 'at' HH:mm:ss z"); - Calendar cl = Calendar.getInstance(); - cl.setTimeInMillis(createdAt); - return sdf.format(cl.getTime()); + return Instant.ofEpochMilli(createdAt).toString(); } @Override @@ -430,11 +416,9 @@ public boolean hasNext() { @Override public FastEntry next() { try { - indexBuffer.position(currentIndexOffset); - long offset = 0; while (offset == 0) { - indexBuffer.get(currentSlotBuffer); + indexBuffer.get(currentIndexOffset, currentSlotBuffer); offset = LongPacker.unpackLong(currentSlotBuffer, currentKeyLength); currentIndexOffset += currentSlotBuffer.length; } @@ -454,7 +438,7 @@ public FastEntry next() { } return entry; } catch (IOException ex) { - throw new RuntimeException(ex); + throw new UncheckedIOException(ex); } } diff --git a/paldb/src/main/java/com/linkedin/paldb/impl/StorageSerialization.java b/src/main/java/com/linkedin/paldb/impl/StorageSerialization.java similarity index 89% rename from paldb/src/main/java/com/linkedin/paldb/impl/StorageSerialization.java rename to src/main/java/com/linkedin/paldb/impl/StorageSerialization.java index 97afdd3..a369569 100644 --- a/paldb/src/main/java/com/linkedin/paldb/impl/StorageSerialization.java +++ b/src/main/java/com/linkedin/paldb/impl/StorageSerialization.java @@ -14,20 +14,15 @@ package com.linkedin.paldb.impl; -import com.linkedin.paldb.api.Configuration; -import com.linkedin.paldb.api.Serializer; -import com.linkedin.paldb.api.UnsupportedTypeException; -import com.linkedin.paldb.utils.DataInputOutput; -import com.linkedin.paldb.utils.LongPacker; -import java.io.DataInput; -import java.io.DataOutput; -import java.io.EOFException; -import java.io.IOException; -import java.lang.reflect.Array; -import java.math.BigDecimal; -import java.math.BigInteger; +import com.linkedin.paldb.api.*; +import com.linkedin.paldb.api.errors.UnsupportedTypeException; +import com.linkedin.paldb.utils.*; import org.xerial.snappy.Snappy; +import java.io.*; +import java.lang.reflect.Array; +import java.math.*; + /** * Internal serialization implementation. */ @@ -57,16 +52,17 @@ public StorageSerialization(Configuration config) { * Serializes the key object and returns it as a byte array. * * @param key key to serialize + * @param key type * @return key as byte array * @throws IOException if an io error occurs */ - public byte[] serializeKey(Object key) - throws IOException { + public byte[] serializeKey(K key) throws IOException { if (key == null) { throw new NullPointerException(); } - serializeObject(key, dataInputOutput.reset(), false); - return dataInputOutput.toByteArray(); + var dataIO = new DataInputOutput(); + serializeObject(key, dataIO, false); + return dataIO.toByteArray(); } /** @@ -76,8 +72,7 @@ public byte[] serializeKey(Object key) * @param dataOutput data output * @throws IOException if an io error occurs */ - public void serializeKey(Object key, DataOutput dataOutput) - throws IOException { + public void serializeKey(Object key, DataOutput dataOutput) throws IOException { serializeObject(key, dataOutput, false); } @@ -88,8 +83,7 @@ public void serializeKey(Object key, DataOutput dataOutput) * @return value as byte array * @throws IOException if an io error occurs */ - public byte[] serializeValue(Object value) - throws IOException { + public byte[] serializeValue(Object value) throws IOException { serializeObject(value, dataInputOutput.reset(), compression); return dataInputOutput.toByteArray(); @@ -102,8 +96,7 @@ public byte[] serializeValue(Object value) * @param dataOutput data output * @throws IOException if an io error occurs */ - public void serializeValue(Object value, DataOutput dataOutput) - throws IOException { + public void serializeValue(Object value, DataOutput dataOutput) throws IOException { serializeObject(value, dataOutput, compression); } @@ -112,15 +105,13 @@ public void serializeValue(Object value, DataOutput dataOutput) * * @param obj object to serialize * @param useCompression use compression - * @return serialized object in bytes * @throws IOException if an io error occurs */ - private void serializeObject(Object obj, DataOutput dataOutput, boolean useCompression) - throws IOException { + private void serializeObject(Object obj, DataOutput dataOutput, boolean useCompression) throws IOException { //Cast to primitive arrays if necessary if (obj != null && obj.getClass().isArray()) { if (obj instanceof Integer[]) { - obj = (int[]) getPrimitiveArray((Object[]) obj); + obj = (int[]) getPrimitiveArray((Integer[]) obj); } else if (obj instanceof Boolean[]) { obj = (boolean[]) getPrimitiveArray((Object[]) obj); } else if (obj instanceof Byte[]) { @@ -196,7 +187,7 @@ private static Object getPrimitiveArray(Object[][] array) { return null; } - private static Object getPrimitiveArray(Object[] array) { + private static Object getPrimitiveArray(T[] array) { Class arrayClass = array.getClass().getComponentType(); if (!arrayClass.isPrimitive()) { Class primitiveClass = getPrimitiveType(arrayClass); @@ -329,13 +320,11 @@ private static Class getPrimitiveType(Class type) { final static int CUSTOM = 114; final static String EMPTY_STRING = ""; - byte[] serialize(Object obj) - throws IOException { + byte[] serialize(Object obj) throws IOException { return serialize(obj, false); } - byte[] serialize(Object obj, boolean compress) - throws IOException { + byte[] serialize(Object obj, boolean compress) throws IOException { DataInputOutput ba = new DataInputOutput(); serialize(ba, obj, compress); @@ -343,19 +332,17 @@ byte[] serialize(Object obj, boolean compress) return ba.toByteArray(); } - private void serialize(final DataOutput out, final Object obj) - throws IOException { + private void serialize(final DataOutput out, final Object obj) throws IOException { serialize(out, obj, false); } - private void serialize(final DataOutput out, final Object obj, boolean compress) - throws IOException { + private void serialize(final DataOutput out, final Object obj, boolean compress) throws IOException { final Class clazz = obj != null ? obj.getClass() : null; if (obj == null) { out.write(NULL); } else if (clazz == Boolean.class) { - if (((Boolean) obj).booleanValue()) { + if ((boolean) obj) { out.write(BOOLEAN_TRUE); } else { out.write(BOOLEAN_FALSE); @@ -406,10 +393,11 @@ private void serialize(final DataOutput out, final Object obj, boolean compress) serializeLongLongArray(out, (long[][]) obj, compress); } else { // Custom - Serializer serializer = serializers.getSerializer(obj.getClass()); + var serializer = serializers.getSerializer(obj.getClass()); if (serializer != null) { - int index = serializers.getIndex(obj.getClass()); - out.write(CUSTOM + index); + var className = serializer.serializedClass().getName(); + out.write(CUSTOM); + out.writeChars(className); serializer.write(out, obj); } else if (obj instanceof Object[]) { serializeObjectArray(out, (Object[]) obj); @@ -419,8 +407,7 @@ private void serialize(final DataOutput out, final Object obj, boolean compress) } } - private static void serializeInt(final DataOutput out, final int val) - throws IOException { + private static void serializeInt(final DataOutput out, final int val) throws IOException { if (val == -1) { out.write(INTEGER_MINUS_1); } else if (val == 0) { @@ -455,8 +442,7 @@ private static void serializeInt(final DataOutput out, final int val) } } - private static void serializeDouble(final DataOutput out, final double val) - throws IOException { + private static void serializeDouble(final DataOutput out, final double val) throws IOException { if (val == -1d) { out.write(DOUBLE_MINUS_1); } else if (val == 0d) { @@ -475,8 +461,7 @@ private static void serializeDouble(final DataOutput out, final double val) } } - private static void serializeFloat(final DataOutput out, final float val) - throws IOException { + private static void serializeFloat(final DataOutput out, final float val) throws IOException { if (val == -1f) { out.write(FLOAT_MINUS_1); } else if (val == 0f) { @@ -495,8 +480,7 @@ private static void serializeFloat(final DataOutput out, final float val) } } - private static void serializeShort(final DataOutput out, final short val) - throws IOException { + private static void serializeShort(final DataOutput out, final short val) throws IOException { if (val == -1) { out.write(SHORT_MINUS_1); } else if (val == 0) { @@ -512,8 +496,7 @@ private static void serializeShort(final DataOutput out, final short val) } } - private static void serializeByte(final DataOutput out, final byte val) - throws IOException { + private static void serializeByte(final DataOutput out, final byte val) throws IOException { if (val == -1) { out.write(BYTE_MINUS_1); } else if (val == 0) { @@ -526,8 +509,7 @@ private static void serializeByte(final DataOutput out, final byte val) } } - private static void serializeLong(final DataOutput out, final long val) - throws IOException { + private static void serializeLong(final DataOutput out, final long val) throws IOException { if (val == -1) { out.write(LONG_MINUS_1); } else if (val == 0) { @@ -562,14 +544,12 @@ private static void serializeLong(final DataOutput out, final long val) } } - private static void serializeChar(final DataOutput out, final char val) - throws IOException { + private static void serializeChar(final DataOutput out, final char val) throws IOException { out.write(CHAR); out.writeChar(val); } - private static void serializeString(final DataOutput out, final String val) - throws IOException { + private static void serializeString(final DataOutput out, final String val) throws IOException { if (val.length() == 0) { out.write(STRING_EMPTY); } else { @@ -583,28 +563,24 @@ private static void serializeString(final DataOutput out, final String val) } } - private static void serializeBigInteger(final DataOutput out, final BigInteger val) - throws IOException { + private static void serializeBigInteger(final DataOutput out, final BigInteger val) throws IOException { out.write(BIGINTEGER); byte[] buf = val.toByteArray(); serializeByteArray(out, buf, false); } - private static void serializeBigDecimal(final DataOutput out, final BigDecimal val) - throws IOException { + private static void serializeBigDecimal(final DataOutput out, final BigDecimal val) throws IOException { out.write(BIGDECIMAL); serializeByteArray(out, val.unscaledValue().toByteArray(), false); LongPacker.packInt(out, val.scale()); } - private static void serializeClass(final DataOutput out, final Class val) - throws IOException { + private static void serializeClass(final DataOutput out, final Class val) throws IOException { out.write(CLASS); serializeString(out, val.getName()); } - private static void serializeBooleanArray(final DataOutput out, final boolean[] val) - throws IOException { + private static void serializeBooleanArray(final DataOutput out, final boolean[] val) throws IOException { out.write(BOOLEAN_ARRAY); LongPacker.packInt(out, val.length); for (boolean s : val) { @@ -612,8 +588,7 @@ private static void serializeBooleanArray(final DataOutput out, final boolean[] } } - private static void serializeShortArray(final DataOutput out, final short[] val, boolean compress) - throws IOException { + private static void serializeShortArray(final DataOutput out, final short[] val, boolean compress) throws IOException { if (compress && val.length > 250) { out.write(SHORT_ARRAY_C); byte[] b = Snappy.compress(val); @@ -628,8 +603,7 @@ private static void serializeShortArray(final DataOutput out, final short[] val, } } - private static void serializeDoubleArray(final DataOutput out, final double[] val, boolean compress) - throws IOException { + private static void serializeDoubleArray(final DataOutput out, final double[] val, boolean compress) throws IOException { if (compress && val.length > 250) { out.write(DOUBLE_ARRAY_C); byte[] b = Snappy.compress(val); @@ -644,8 +618,7 @@ private static void serializeDoubleArray(final DataOutput out, final double[] va } } - private static void serializeFloatArray(final DataOutput out, final float[] val, boolean compress) - throws IOException { + private static void serializeFloatArray(final DataOutput out, final float[] val, boolean compress) throws IOException { if (compress && val.length > 250) { out.write(FLOAT_ARRAY_C); byte[] b = Snappy.compress(val); @@ -660,8 +633,7 @@ private static void serializeFloatArray(final DataOutput out, final float[] val, } } - private static void serializeCharArray(final DataOutput out, final char[] val, boolean compress) - throws IOException { + private static void serializeCharArray(final DataOutput out, final char[] val, boolean compress) throws IOException { if (compress && val.length > 250) { out.write(CHAR_ARRAY_C); byte[] b = Snappy.compress(val); @@ -676,8 +648,7 @@ private static void serializeCharArray(final DataOutput out, final char[] val, b } } - private static void serializeIntArray(final DataOutput out, final int[] val, boolean compress) - throws IOException { + private static void serializeIntArray(final DataOutput out, final int[] val, boolean compress) throws IOException { int max = Integer.MIN_VALUE; int min = Integer.MAX_VALUE; for (int i : val) { @@ -717,8 +688,7 @@ private static void serializeIntArray(final DataOutput out, final int[] val, boo } } - private static void serializeIntIntArray(final DataOutput out, final int[][] val, boolean compress) - throws IOException { + private static void serializeIntIntArray(final DataOutput out, final int[][] val, boolean compress) throws IOException { out.write(INT_INT_ARRAY); LongPacker.packInt(out, val.length); @@ -727,8 +697,7 @@ private static void serializeIntIntArray(final DataOutput out, final int[][] val } } - private static void serializeLongArray(final DataOutput out, final long[] val, boolean compress) - throws IOException { + private static void serializeLongArray(final DataOutput out, final long[] val, boolean compress) throws IOException { long max = Long.MIN_VALUE; long min = Long.MAX_VALUE; for (long i : val) { @@ -774,8 +743,7 @@ private static void serializeLongArray(final DataOutput out, final long[] val, b } } - private static void serializeLongLongArray(final DataOutput out, final long[][] val, boolean compress) - throws IOException { + private static void serializeLongLongArray(final DataOutput out, final long[][] val, boolean compress) throws IOException { out.write(LONG_LONG_ARRAY); LongPacker.packInt(out, val.length); @@ -784,8 +752,7 @@ private static void serializeLongLongArray(final DataOutput out, final long[][] } } - private static void serializeByteArray(final DataOutput out, final byte[] val, boolean compress) - throws IOException { + private static void serializeByteArray(final DataOutput out, final byte[] val, boolean compress) throws IOException { if (compress && val.length > 250) { out.write(BYTE_ARRAY_C); byte[] b = Snappy.compress(val); @@ -798,8 +765,7 @@ private static void serializeByteArray(final DataOutput out, final byte[] val, b } } - private static void serializeStringArray(final DataOutput out, final String[] val) - throws IOException { + private static void serializeStringArray(final DataOutput out, final String[] val) throws IOException { out.write(STRING_ARRAY); LongPacker.packInt(out, val.length); for (String s : val) { @@ -807,8 +773,7 @@ private static void serializeStringArray(final DataOutput out, final String[] va } } - private void serializeObjectArray(final DataOutput out, final Object[] val) - throws IOException { + private void serializeObjectArray(final DataOutput out, final Object[] val) throws IOException { out.write(ARRAY_OBJECT); LongPacker.packInt(out, val.length); for (Object o : val) { @@ -816,8 +781,7 @@ private void serializeObjectArray(final DataOutput out, final Object[] val) } } - public Object deserialize(byte[] buf) - throws ClassNotFoundException, IOException { + public Object deserialize(byte[] buf) throws ClassNotFoundException, IOException { DataInputOutput bs = new DataInputOutput(buf); Object ret = deserialize(bs); if (bs.available() != 0) { @@ -827,14 +791,17 @@ public Object deserialize(byte[] buf) return ret; } - public Object deserialize(DataInput is) - throws IOException, ClassNotFoundException { + public Object deserialize(DataInput is) throws IOException, ClassNotFoundException { Object ret = null; final int head = is.readUnsignedByte(); if (head >= CUSTOM) { - Serializer serializer = serializers.getSerializer(head - CUSTOM); + var className = is.readUTF(); + Serializer serializer = serializers.getSerializer(className); + if (serializer == null) { + throw new ClassNotFoundException("Serializer not registered: " + className); + } ret = serializer.read(is); } else { switch (head) { @@ -1107,16 +1074,13 @@ private static String deserializeString(DataInput buf) return new String(b); } - private static Class deserializeClass(DataInput is) - throws IOException, ClassNotFoundException { + private static Class deserializeClass(DataInput is) throws IOException, ClassNotFoundException { is.readByte(); - String className = (String) deserializeString(is); - Class cls = Class.forName(className); - return cls; + String className = deserializeString(is); + return Class.forName(className); } - private static short[] deserializeShortArray(DataInput is) - throws IOException { + private static short[] deserializeShortArray(DataInput is) throws IOException { int size = LongPacker.unpackInt(is); short[] ret = new short[size]; for (int i = 0; i < size; i++) { @@ -1125,8 +1089,7 @@ private static short[] deserializeShortArray(DataInput is) return ret; } - private static short[] deserializeShortCompressedArray(DataInput is) - throws IOException { + private static short[] deserializeShortCompressedArray(DataInput is) throws IOException { int size = LongPacker.unpackInt(is); byte[] b = new byte[size]; is.readFully(b); @@ -1143,16 +1106,14 @@ private static float[] deserializeFloatArray(DataInput is) return ret; } - private static float[] deserializeFloatCompressedArray(DataInput is) - throws IOException { + private static float[] deserializeFloatCompressedArray(DataInput is) throws IOException { int size = LongPacker.unpackInt(is); byte[] b = new byte[size]; is.readFully(b); return Snappy.uncompressFloatArray(b); } - private static double[] deserializeDoubleArray(DataInput is) - throws IOException { + private static double[] deserializeDoubleArray(DataInput is) throws IOException { int size = LongPacker.unpackInt(is); double[] ret = new double[size]; for (int i = 0; i < size; i++) { @@ -1161,16 +1122,14 @@ private static double[] deserializeDoubleArray(DataInput is) return ret; } - private static double[] deserializeDoubleCompressedArray(DataInput is) - throws IOException { + private static double[] deserializeDoubleCompressedArray(DataInput is) throws IOException { int size = LongPacker.unpackInt(is); byte[] b = new byte[size]; is.readFully(b); return Snappy.uncompressDoubleArray(b); } - private static char[] deserializeCharArray(DataInput is) - throws IOException { + private static char[] deserializeCharArray(DataInput is) throws IOException { int size = LongPacker.unpackInt(is); char[] ret = new char[size]; for (int i = 0; i < size; i++) { @@ -1179,16 +1138,14 @@ private static char[] deserializeCharArray(DataInput is) return ret; } - private static char[] deserializeCharCompressedArray(DataInput is) - throws IOException { + private static char[] deserializeCharCompressedArray(DataInput is) throws IOException { int size = LongPacker.unpackInt(is); byte[] b = new byte[size]; is.readFully(b); return Snappy.uncompressCharArray(b); } - private static boolean[] deserializeBooleanArray(DataInput is) - throws IOException { + private static boolean[] deserializeBooleanArray(DataInput is) throws IOException { int size = LongPacker.unpackInt(is); boolean[] ret = new boolean[size]; for (int i = 0; i < size; i++) { @@ -1197,8 +1154,7 @@ private static boolean[] deserializeBooleanArray(DataInput is) return ret; } - private static String[] deserializeStringArray(DataInput is) - throws IOException { + private static String[] deserializeStringArray(DataInput is) throws IOException { int size = LongPacker.unpackInt(is); String[] ret = new String[size]; for (int i = 0; i < size; i++) { @@ -1217,16 +1173,19 @@ private static String[] deserializeStringArray(DataInput is) return ret; } - private static byte[] deserializeByteArray(DataInput is) - throws IOException { + private static final byte[] EMPTY_BYTES = new byte[0]; + + private static byte[] deserializeByteArray(DataInput is) throws IOException { int size = LongPacker.unpackInt(is); + if (size == 0) { + return EMPTY_BYTES; + } byte[] b = new byte[size]; is.readFully(b); return b; } - private static byte[] deserializeByteCompressedArray(DataInput is) - throws IOException { + private static byte[] deserializeByteCompressedArray(DataInput is) throws IOException { int size = LongPacker.unpackInt(is); byte[] b = new byte[size]; is.readFully(b); @@ -1244,8 +1203,7 @@ private Object[] deserializeArrayObject(DataInput is) return s; } - private static long[] deserializeArrayLongL(DataInput is) - throws IOException { + private static long[] deserializeArrayLongL(DataInput is) throws IOException { int size = LongPacker.unpackInt(is); long[] ret = new long[size]; for (int i = 0; i < size; i++) { @@ -1254,8 +1212,7 @@ private static long[] deserializeArrayLongL(DataInput is) return ret; } - private static long[] deserializeArrayLongI(DataInput is) - throws IOException { + private static long[] deserializeArrayLongI(DataInput is) throws IOException { int size = LongPacker.unpackInt(is); long[] ret = new long[size]; for (int i = 0; i < size; i++) { @@ -1264,8 +1221,7 @@ private static long[] deserializeArrayLongI(DataInput is) return ret; } - private static long[] deserializeArrayLongS(DataInput is) - throws IOException { + private static long[] deserializeArrayLongS(DataInput is) throws IOException { int size = LongPacker.unpackInt(is); long[] ret = new long[size]; for (int i = 0; i < size; i++) { @@ -1274,8 +1230,7 @@ private static long[] deserializeArrayLongS(DataInput is) return ret; } - private static long[] deserializeArrayLongB(DataInput is) - throws IOException { + private static long[] deserializeArrayLongB(DataInput is) throws IOException { int size = LongPacker.unpackInt(is); long[] ret = new long[size]; for (int i = 0; i < size; i++) { @@ -1287,16 +1242,14 @@ private static long[] deserializeArrayLongB(DataInput is) return ret; } - private static long[] deserializeArrayLongCompressed(DataInput is) - throws IOException { + private static long[] deserializeArrayLongCompressed(DataInput is) throws IOException { int size = LongPacker.unpackInt(is); byte[] b = new byte[size]; is.readFully(b); return Snappy.uncompressLongArray(b); } - private static long[][] deserializeLongLongArray(DataInput is) - throws IOException { + private static long[][] deserializeLongLongArray(DataInput is) throws IOException { int size = LongPacker.unpackInt(is); long[][] res = new long[size][]; for (int i = 0; i < size; i++) { @@ -1327,8 +1280,7 @@ private static long[][] deserializeLongLongArray(DataInput is) return res; } - private static int[][] deserializeIntIntArray(DataInput is) - throws IOException { + private static int[][] deserializeIntIntArray(DataInput is) throws IOException { int size = LongPacker.unpackInt(is); int[][] res = new int[size][]; for (int i = 0; i < size; i++) { @@ -1356,8 +1308,7 @@ private static int[][] deserializeIntIntArray(DataInput is) return res; } - private static int[] deserializeArrayIntI(DataInput is) - throws IOException { + private static int[] deserializeArrayIntI(DataInput is) throws IOException { int size = LongPacker.unpackInt(is); int[] ret = new int[size]; for (int i = 0; i < size; i++) { @@ -1366,8 +1317,7 @@ private static int[] deserializeArrayIntI(DataInput is) return ret; } - private static int[] deserializeArrayIntS(DataInput is) - throws IOException { + private static int[] deserializeArrayIntS(DataInput is) throws IOException { int size = LongPacker.unpackInt(is); int[] ret = new int[size]; for (int i = 0; i < size; i++) { @@ -1376,8 +1326,7 @@ private static int[] deserializeArrayIntS(DataInput is) return ret; } - private static int[] deserializeArrayIntB(DataInput is) - throws IOException { + private static int[] deserializeArrayIntB(DataInput is) throws IOException { int size = LongPacker.unpackInt(is); int[] ret = new int[size]; for (int i = 0; i < size; i++) { @@ -1389,8 +1338,7 @@ private static int[] deserializeArrayIntB(DataInput is) return ret; } - private static int[] deserializeArrayIntPack(DataInput is) - throws IOException { + private static int[] deserializeArrayIntPack(DataInput is) throws IOException { int size = LongPacker.unpackInt(is); if (size < 0) { throw new EOFException(); @@ -1403,16 +1351,14 @@ private static int[] deserializeArrayIntPack(DataInput is) return ret; } - private static int[] deserializeArrayIntCompressed(DataInput is) - throws IOException { + private static int[] deserializeArrayIntCompressed(DataInput is) throws IOException { int size = LongPacker.unpackInt(is); byte[] b = new byte[size]; is.readFully(b); return Snappy.uncompressIntArray(b); } - private static long[] deserializeArrayLongPack(DataInput is) - throws IOException { + private static long[] deserializeArrayLongPack(DataInput is) throws IOException { int size = LongPacker.unpackInt(is); if (size < 0) { throw new EOFException(); diff --git a/paldb/src/main/java/com/linkedin/paldb/impl/StorageWriter.java b/src/main/java/com/linkedin/paldb/impl/StorageWriter.java similarity index 64% rename from paldb/src/main/java/com/linkedin/paldb/impl/StorageWriter.java rename to src/main/java/com/linkedin/paldb/impl/StorageWriter.java index 69437ea..4e0e622 100644 --- a/paldb/src/main/java/com/linkedin/paldb/impl/StorageWriter.java +++ b/src/main/java/com/linkedin/paldb/impl/StorageWriter.java @@ -15,35 +15,22 @@ package com.linkedin.paldb.impl; import com.linkedin.paldb.api.Configuration; -import com.linkedin.paldb.utils.FormatVersion; -import com.linkedin.paldb.utils.HashUtils; -import com.linkedin.paldb.utils.LongPacker; -import com.linkedin.paldb.utils.TempUtils; -import java.io.BufferedInputStream; -import java.io.BufferedOutputStream; -import java.io.DataInputStream; -import java.io.DataOutputStream; -import java.io.File; -import java.io.FileInputStream; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.OutputStream; -import java.io.RandomAccessFile; +import com.linkedin.paldb.api.errors.*; +import com.linkedin.paldb.utils.*; +import org.slf4j.*; + +import java.io.*; import java.nio.MappedByteBuffer; import java.nio.channels.FileChannel; import java.text.DecimalFormat; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.logging.Level; -import java.util.logging.Logger; +import java.util.*; /** * Internal write implementation. */ public class StorageWriter { - private final static Logger LOGGER = Logger.getLogger(StorageWriter.class.getName()); + private static final Logger log = LoggerFactory.getLogger(StorageWriter.class); // Configuration private final Configuration config; private final double loadFactor; @@ -73,8 +60,6 @@ public class StorageWriter { // Number of collisions private int collisions; - private HashUtils hashUtils; - StorageWriter(Configuration configuration, OutputStream stream) { config = configuration; loadFactor = config.getDouble(Configuration.LOAD_FACTOR); @@ -85,7 +70,7 @@ public class StorageWriter { // Create temp path folder tempFolder = TempUtils.createTempDir("paldbtempwriter"); tempFolder.deleteOnExit(); - LOGGER.log(Level.INFO, "Creating temporary folder at {0}", tempFolder.toString()); + log.info("Creating temporary folder at {}", tempFolder); outputStream = stream instanceof BufferedOutputStream ? stream : new BufferedOutputStream(stream); indexStreams = new DataOutputStream[0]; dataStreams = new DataOutputStream[0]; @@ -96,8 +81,7 @@ public class StorageWriter { dataLengths = new long[0]; maxOffsetLengths = new int[0]; keyCounts = new int[0]; - hashUtils = new HashUtils(); - } + } public void put(byte[] key, byte[] value) throws IOException { @@ -161,33 +145,37 @@ public void close() } // Stats - LOGGER.log(Level.INFO, "Number of keys: {0}", keyCount); - LOGGER.log(Level.INFO, "Number of values: {0}", valueCount); + log.info("Number of keys: {}", keyCount); + log.info("Number of values: {}", valueCount); - // Prepare files to merge - List filesToMerge = new ArrayList(); + var bloomFilter = config.getBoolean(Configuration.BLOOM_FILTER_ENABLED) ? + new BloomFilter(keyCount, config.getDouble(Configuration.BLOOM_FILTER_ERROR_FACTOR, 0.01)) : + null; - try { - //Write metadata file - File metadataFile = new File(tempFolder, "metadata.dat"); - metadataFile.deleteOnExit(); - FileOutputStream metadataOututStream = new FileOutputStream(metadataFile); - DataOutputStream metadataDataOutputStream = new DataOutputStream(metadataOututStream); - writeMetadata(metadataDataOutputStream); - metadataDataOutputStream.close(); - metadataOututStream.close(); - filesToMerge.add(metadataFile); + // Prepare files to merge + List filesToMerge = new ArrayList<>(); + try { // Build index file for (int i = 0; i < indexFiles.length; i++) { if (indexFiles[i] != null) { - filesToMerge.add(buildIndex(i)); + filesToMerge.add(buildIndex(i, bloomFilter)); } } + //Write metadata file + File metadataFile = new File(tempFolder, "metadata.dat"); + metadataFile.deleteOnExit(); + try (FileOutputStream metadataOutputStream = new FileOutputStream(metadataFile); + DataOutputStream metadataDataOutputStream = new DataOutputStream(metadataOutputStream)) { + writeMetadata(metadataDataOutputStream, bloomFilter); + } + + filesToMerge.add(0, metadataFile); + // Stats collisions - LOGGER.log(Level.INFO, "Number of collisions: {0}", collisions); + log.info("Number of collisions: {}", collisions); // Add data files for (File dataFile : dataFiles) { @@ -205,8 +193,7 @@ public void close() } } - private void writeMetadata(DataOutputStream dataOutputStream) - throws IOException { + private void writeMetadata(DataOutputStream dataOutputStream, BloomFilter bloomFilter) throws IOException { //Write format version dataOutputStream.writeUTF(FormatVersion.getLatestVersion().name()); @@ -220,6 +207,19 @@ private void writeMetadata(DataOutputStream dataOutputStream) //Write size (number of keys) dataOutputStream.writeInt(keyCount); + //write bloom filter bit size + dataOutputStream.writeInt(bloomFilter != null ? bloomFilter.bitSize() : 0); + //write bloom filter long array size + dataOutputStream.writeInt(bloomFilter != null ? bloomFilter.bits().length : 0); + //write bloom filter hash functions + dataOutputStream.writeInt(bloomFilter != null ? bloomFilter.hashFunctions() : 0); + //write bloom filter bits + if (bloomFilter != null) { + for (final long bit : bloomFilter.bits()) { + dataOutputStream.writeLong(bit); + } + } + //Write the number of different key length dataOutputStream.writeInt(keyLengthCount); @@ -227,7 +227,7 @@ private void writeMetadata(DataOutputStream dataOutputStream) dataOutputStream.writeInt(maxKeyLength); // For each keyLength - long datasLength = 0l; + long datasLength = 0L; for (int i = 0; i < keyCounts.length; i++) { if (keyCounts[i] > 0) { // Write the key length @@ -258,21 +258,13 @@ private void writeMetadata(DataOutputStream dataOutputStream) } } - //Write serializers - try { - Serializers.serialize(dataOutputStream, config.getSerializers()); - } catch (Exception e) { - throw new RuntimeException(); - } - //Write the position of the index and the data int indexOffset = dataOutputStream.size() + (Integer.SIZE / Byte.SIZE) + (Long.SIZE / Byte.SIZE); dataOutputStream.writeInt(indexOffset); dataOutputStream.writeLong(indexOffset + indexesLength); } - private File buildIndex(int keyLength) - throws IOException { + private File buildIndex(int keyLength, BloomFilter bloomFilter) throws IOException { long count = keyCounts[keyLength]; int slots = (int) Math.round(count / loadFactor); int offsetLength = maxOffsetLengths[keyLength]; @@ -280,84 +272,73 @@ private File buildIndex(int keyLength) // Init index File indexFile = new File(tempFolder, "index" + keyLength + ".dat"); - RandomAccessFile indexAccessFile = new RandomAccessFile(indexFile, "rw"); - try { - indexAccessFile.setLength(slots * slotSize); - FileChannel indexChannel = indexAccessFile.getChannel(); - MappedByteBuffer byteBuffer = indexChannel.map(FileChannel.MapMode.READ_WRITE, 0, indexAccessFile.length()); + try (RandomAccessFile indexAccessFile = new RandomAccessFile(indexFile, "rw")) { + indexAccessFile.setLength((long) slots * slotSize); + try (FileChannel indexChannel = indexAccessFile.getChannel()) { + MappedByteBuffer byteBuffer = indexChannel.map(FileChannel.MapMode.READ_WRITE, 0, indexAccessFile.length()); // Init reading stream - File tempIndexFile = indexFiles[keyLength]; - DataInputStream tempIndexStream = new DataInputStream(new BufferedInputStream(new FileInputStream(tempIndexFile))); - try { - byte[] keyBuffer = new byte[keyLength]; - byte[] slotBuffer = new byte[slotSize]; - byte[] offsetBuffer = new byte[offsetLength]; - - // Read all keys - for (int i = 0; i < count; i++) { - // Read key - tempIndexStream.readFully(keyBuffer); - - // Read offset - long offset = LongPacker.unpackLong(tempIndexStream); - - // Hash - long hash = (long) hashUtils.hash(keyBuffer); - - boolean collision = false; - for (int probe = 0; probe < count; probe++) { - int slot = (int) ((hash + probe) % slots); - byteBuffer.position(slot * slotSize); - byteBuffer.get(slotBuffer); - - long found = LongPacker.unpackLong(slotBuffer, keyLength); - if (found == 0) { - // The spot is empty use it + File tempIndexFile = indexFiles[keyLength]; + try (DataInputStream tempIndexStream = new DataInputStream(new BufferedInputStream(new FileInputStream(tempIndexFile)))) { + byte[] keyBuffer = new byte[keyLength]; + byte[] slotBuffer = new byte[slotSize]; + byte[] offsetBuffer = new byte[offsetLength]; + + // Read all keys + for (int i = 0; i < count; i++) { + // Read key + tempIndexStream.readFully(keyBuffer); + + // Read offset + long offset = LongPacker.unpackLong(tempIndexStream); + + // Hash + long hash = Murmur3.hash(keyBuffer); + if (bloomFilter != null) { + bloomFilter.add(keyBuffer); + } + + boolean collision = false; + for (int probe = 0; probe < count; probe++) { + int slot = (int) ((hash + probe) % slots); byteBuffer.position(slot * slotSize); - byteBuffer.put(keyBuffer); - int pos = LongPacker.packLong(offsetBuffer, offset); - byteBuffer.put(offsetBuffer, 0, pos); - break; - } else { - collision = true; - // Check for duplicates - if (Arrays.equals(keyBuffer, Arrays.copyOf(slotBuffer, keyLength))) { - throw new RuntimeException( - String.format("A duplicate key has been found for for key bytes %s", Arrays.toString(keyBuffer))); + byteBuffer.get(slotBuffer); + + long found = LongPacker.unpackLong(slotBuffer, keyLength); + if (found == 0) { + // The spot is empty use it + byteBuffer.position(slot * slotSize); + byteBuffer.put(keyBuffer); + int pos = LongPacker.packLong(offsetBuffer, offset); + byteBuffer.put(offsetBuffer, 0, pos); + break; + } else { + collision = true; + // Check for duplicates + if (Arrays.equals(keyBuffer, Arrays.copyOf(slotBuffer, keyLength))) { + throw new DuplicateKeyException( + String.format("A duplicate key has been found for key bytes %s", Arrays.toString(keyBuffer))); + } } } - } - if (collision) { - collisions++; + if (collision) { + collisions++; + } } - } - String msg = " Max offset length: " + offsetLength + " bytes" + - "\n Slot size: " + slotSize + " bytes"; + String msg = " Max offset length: " + offsetLength + " bytes" + + "\n Slot size: " + slotSize + " bytes"; - LOGGER.log(Level.INFO, "Built index file {0}\n" + msg, indexFile.getName()); - } finally { - // Close input - tempIndexStream.close(); + log.info("Built index file {} \n{}", indexFile.getName(), msg); - // Close index and make sure resources are liberated - indexChannel.close(); - indexChannel = null; - byteBuffer = null; - - // Delete temp index file - if (tempIndexFile.delete()) { - LOGGER.log(Level.INFO, "Temporary index file {0} has been deleted", tempIndexFile.getName()); + } finally { + if (tempIndexFile.delete()) { + log.info("Temporary index file {} has been deleted", tempIndexFile.getName()); + } } } - } finally{ - indexAccessFile.close(); - indexAccessFile = null; - System.gc(); } - return indexFile; } @@ -372,12 +353,12 @@ private void checkFreeDiskSpace(List inputFiles) { usableSpace = f.getUsableSpace(); } } - LOGGER.log(Level.INFO, "Total expected store size is {0} Mb", + log.info("Total expected store size is {} Mb", new DecimalFormat("#,##0.0").format(totalSize / (1024 * 1024))); - LOGGER.log(Level.INFO, "Usable free space on the system is {0} Mb", + log.info("Usable free space on the system is {} Mb", new DecimalFormat("#,##0.0").format(usableSpace / (1024 * 1024))); if (totalSize / (double) usableSpace >= 0.66) { - throw new RuntimeException("Aborting because there isn' enough free disk space"); + throw new OutOfDiskSpace("Aborting because there isn't enough free disk space"); } } @@ -389,39 +370,32 @@ private void mergeFiles(List inputFiles, OutputStream outputStream) //Merge files for (File f : inputFiles) { if (f.exists()) { - FileInputStream fileInputStream = new FileInputStream(f); - BufferedInputStream bufferedInputStream = new BufferedInputStream(fileInputStream); - try { - LOGGER.log(Level.INFO, "Merging {0} size={1}", new Object[]{f.getName(), f.length()}); + try (FileInputStream fileInputStream = new FileInputStream(f); + BufferedInputStream bufferedInputStream = new BufferedInputStream(fileInputStream)) { + log.info("Merging {} size={}", f.getName(), f.length()); byte[] buffer = new byte[8192]; int length; while ((length = bufferedInputStream.read(buffer)) > 0) { outputStream.write(buffer, 0, length); } - } finally { - bufferedInputStream.close(); - fileInputStream.close(); } } else { - LOGGER.log(Level.INFO, "Skip merging file {0} because it doesn't exist", f.getName()); + log.info("Skip merging file {} because it doesn't exist", f.getName()); } } - - LOGGER.log(Level.INFO, "Time to merge {0} s", ((System.nanoTime() - startTime) / 1000000000.0)); + log.debug("Time to merge {} s", ((System.nanoTime() - startTime) / 1000000000.0)); } //Cleanup files private void cleanup(List inputFiles) { for (File f : inputFiles) { - if (f.exists()) { - if (f.delete()) { - LOGGER.log(Level.INFO, "Deleted temporary file {0}", f.getName()); - } + if (f.exists() && f.delete()) { + log.info("Deleted temporary file {}", f.getName()); } } if (tempFolder.delete()) { - LOGGER.log(Level.INFO, "Deleted temporary folder at {0}", tempFolder.getAbsolutePath()); + log.info("Deleted temporary folder at {}", tempFolder.getAbsolutePath()); } } @@ -480,8 +454,8 @@ private DataOutputStream getIndexStream(int keyLength) private int getNumKeyCount() { int res = 0; - for (int i = 0; i < keyCounts.length; i++) { - if (keyCounts[i] != 0) { + for (final int count : keyCounts) { + if (count != 0) { res++; } } diff --git a/paldb/src/main/java/com/linkedin/paldb/impl/StoreImpl.java b/src/main/java/com/linkedin/paldb/impl/StoreImpl.java similarity index 50% rename from paldb/src/main/java/com/linkedin/paldb/impl/StoreImpl.java rename to src/main/java/com/linkedin/paldb/impl/StoreImpl.java index 6ac350b..e5bcc65 100644 --- a/paldb/src/main/java/com/linkedin/paldb/impl/StoreImpl.java +++ b/src/main/java/com/linkedin/paldb/impl/StoreImpl.java @@ -14,16 +14,11 @@ package com.linkedin.paldb.impl; -import com.linkedin.paldb.api.Configuration; -import com.linkedin.paldb.api.StoreReader; -import com.linkedin.paldb.api.StoreWriter; +import com.linkedin.paldb.api.*; import com.linkedin.paldb.utils.TempUtils; -import java.io.File; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.util.logging.Level; -import java.util.logging.Logger; +import org.slf4j.*; + +import java.io.*; /** @@ -31,58 +26,58 @@ */ public final class StoreImpl { - private final static Logger LOGGER = Logger.getLogger(StoreImpl.class.getName()); + private static final Logger log = LoggerFactory.getLogger(StoreImpl.class); private StoreImpl() { } - public static StoreReader createReader(File file, Configuration config) { + public static StoreReader createReader(File file, Configuration config) { if (file == null || config == null) { throw new NullPointerException(); } - LOGGER.log(Level.INFO, "Initialize reader from file {0}", file.getName()); - return new ReaderImpl(config, file); + log.info("Initialize reader from file {}", file.getName()); + return new ReaderImpl<>(config, file); } - public static StoreReader createReader(InputStream stream, Configuration config) { + public static StoreReader createReader(InputStream stream, Configuration config) { if (stream == null || config == null) { throw new NullPointerException(); } - LOGGER.log(Level.INFO, "Initialize reader from stream, copying into temp folder"); + log.info("Initialize reader from stream, copying into temp folder"); try { File file = TempUtils.copyIntoTempFile("paldbtempreader", stream); - LOGGER.log(Level.INFO, "Copied stream into temp file {0}", file.getName()); - return new ReaderImpl(config, file); + log.info("Copied stream into temp file {}", file.getName()); + return new ReaderImpl<>(config, file); } catch (IOException ex) { - throw new RuntimeException(ex); + throw new UncheckedIOException(ex); } } - public static StoreWriter createWriter(File file, Configuration config) { + public static StoreWriter createWriter(File file, Configuration config) { if (file == null || config == null) { throw new NullPointerException(); } try { - LOGGER.log(Level.INFO, "Initialize writer from file {0}", file.getName()); + log.info("Initialize writer from file {}", file.getName()); File parent = file.getParentFile(); if (parent != null && !parent.exists()) { if (parent.mkdirs()) { - LOGGER.log(Level.INFO, "Creating directories for path {0}", file.getName()); + log.info("Creating directories for path {}", file.getName()); } else { throw new RuntimeException(String.format("Couldn't create directory %s", parent)); } } - return new WriterImpl(config, file); + return new WriterImpl<>(config, file); } catch (IOException ex) { - throw new RuntimeException(ex); + throw new UncheckedIOException(ex); } } - public static StoreWriter createWriter(OutputStream stream, Configuration config) { + public static StoreWriter createWriter(OutputStream stream, Configuration config) { if (stream == null || config == null) { throw new NullPointerException(); } - LOGGER.info("Initialize writer from stream"); - return new WriterImpl(config, stream); + log.info("Initialize writer from stream"); + return new WriterImpl<>(config, stream); } } diff --git a/paldb/src/main/java/com/linkedin/paldb/impl/WriterImpl.java b/src/main/java/com/linkedin/paldb/impl/WriterImpl.java similarity index 78% rename from paldb/src/main/java/com/linkedin/paldb/impl/WriterImpl.java rename to src/main/java/com/linkedin/paldb/impl/WriterImpl.java index 41421f0..d392533 100644 --- a/paldb/src/main/java/com/linkedin/paldb/impl/WriterImpl.java +++ b/src/main/java/com/linkedin/paldb/impl/WriterImpl.java @@ -14,23 +14,19 @@ package com.linkedin.paldb.impl; -import com.linkedin.paldb.api.Configuration; -import com.linkedin.paldb.api.StoreWriter; -import java.io.File; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.OutputStream; -import java.util.logging.Level; -import java.util.logging.Logger; +import com.linkedin.paldb.api.*; +import org.slf4j.*; + +import java.io.*; /** * Store writer implementation. */ -public final class WriterImpl implements StoreWriter { +public final class WriterImpl implements StoreWriter { // Logger - private final static Logger LOGGER = Logger.getLogger(WriterImpl.class.getName()); + private static final Logger log = LoggerFactory.getLogger(WriterImpl.class); // Configuration private final Configuration config; // Storage @@ -77,7 +73,7 @@ private WriterImpl(Configuration config, OutputStream stream, File file) { this.file = file; // Open storage - LOGGER.log(Level.INFO, "Opening writer storage"); + log.debug("Opening writer storage"); serialization = new StorageSerialization(config); storage = new StorageWriter(config, outputStream); opened = true; @@ -88,16 +84,16 @@ public void close() { checkOpen(); try { if (file != null) { - LOGGER.log(Level.INFO, "Closing writer storage, writing to file at " + file.getAbsolutePath()); + log.info("Closing writer storage, writing to file at {}", file.getAbsolutePath()); } else { - LOGGER.log(Level.INFO, "Closing writer storage, writing to stream"); + log.info("Closing writer storage, writing to stream"); } storage.close(); outputStream.close(); opened = false; } catch (IOException ex) { - throw new RuntimeException(ex); + throw new UncheckedIOException(ex); } } @@ -107,7 +103,7 @@ public Configuration getConfiguration() { } @Override - public void put(Object key, Object value) { + public void put(K key, V value) { checkOpen(); if (key == null) { throw new NullPointerException(); @@ -116,12 +112,12 @@ public void put(Object key, Object value) { byte[] keyBytes = serialization.serializeKey(key); storage.put(keyBytes, serialization.serializeValue(value)); } catch (IOException ex) { - throw new RuntimeException(ex); + throw new UncheckedIOException(ex); } } @Override - public void putAll(Object[] keys, Object[] values) { + public void putAll(K[] keys, V[] values) { checkOpen(); if (keys == null || values == null) { throw new NullPointerException(); @@ -144,7 +140,7 @@ public void put(byte[] key, byte[] value) { try { storage.put(key, value); } catch (IOException ex) { - throw new RuntimeException(ex); + throw new UncheckedIOException(ex); } } diff --git a/paldb/src/main/java/com/linkedin/paldb/impl/package.html b/src/main/java/com/linkedin/paldb/impl/package.html similarity index 100% rename from paldb/src/main/java/com/linkedin/paldb/impl/package.html rename to src/main/java/com/linkedin/paldb/impl/package.html diff --git a/src/main/java/com/linkedin/paldb/utils/BloomFilter.java b/src/main/java/com/linkedin/paldb/utils/BloomFilter.java new file mode 100644 index 0000000..78c12d9 --- /dev/null +++ b/src/main/java/com/linkedin/paldb/utils/BloomFilter.java @@ -0,0 +1,88 @@ +package com.linkedin.paldb.utils; + +import java.util.Arrays; + +import static java.lang.Math.log; + +public class BloomFilter { + private static final int BITS_IN_LONG = 64; + private final long[] bits; + private final int hashFunctions; // Number of hash functions + private static final double LN2 = 0.6931471805599453; // ln(2) + private final int sizeInBits; + + public BloomFilter(int elements, int sizeInBits) { + this.sizeInBits = sizeInBits; + this.hashFunctions = Math.max(1, (int) Math.round(LN2 * sizeInBits / elements)); + this.bits = new long[Math.max(1, (int) Math.ceil((double) sizeInBits / BITS_IN_LONG))]; + } + + public BloomFilter(int expectedElements, double errorRate) { + this.sizeInBits = Math.max(BITS_IN_LONG, (int) Math.ceil( (-1 * expectedElements * log(errorRate)) / (LN2 * LN2))); + this.hashFunctions = Math.max(1, (int) Math.round(((double) sizeInBits / expectedElements) * LN2)); + this.bits = new long[Math.max(1, (int) Math.ceil((double) sizeInBits / BITS_IN_LONG))]; + } + + public BloomFilter(int hashFunctions, int bitSize, long[] bits) { + this.sizeInBits = bitSize; + this.bits = bits; + this.hashFunctions = hashFunctions; + } + + public void add(byte[] bytes) { + for (int i = 0; i < hashFunctions; i++) { + int value = Math.abs(Murmur3.hash(bytes, i) % sizeInBits); + setBit(value); + } + } + + public boolean mightContain(byte[] bytes) { + for (int i = 0; i < hashFunctions; i++) { + int value = Math.abs(Murmur3.hash(bytes, i) % sizeInBits); + if (!getBit(value)) return false; + } + return true; + } + + public long[] bits() { + return bits; + } + + private void setBit(int position) { + int flagIndex = position / BITS_IN_LONG; + int bitIndexInFlag = position % BITS_IN_LONG; + bits[flagIndex] |= (1L << bitIndexInFlag); + } + + private boolean getBit(int position) { + int flagIndex = position / BITS_IN_LONG; + int bitIndexInFlag = position % BITS_IN_LONG; + return ((bits[flagIndex] >> bitIndexInFlag) & 1L) == 1; + } + + public void clear() { + Arrays.fill(bits, 0L); + } + + public int bitSize() { + return sizeInBits; + } + + public int hashFunctions() { + return hashFunctions; + } + + @Override + public int hashCode() { + return Arrays.hashCode(bits) ^ hashFunctions; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (!(o instanceof BloomFilter)) return false; + final var that = (BloomFilter) o; + return Arrays.equals(this.bits, that.bits) && this.hashFunctions == that.hashFunctions; + } +} + diff --git a/paldb/src/main/java/com/linkedin/paldb/utils/DataInputOutput.java b/src/main/java/com/linkedin/paldb/utils/DataInputOutput.java similarity index 75% rename from paldb/src/main/java/com/linkedin/paldb/utils/DataInputOutput.java rename to src/main/java/com/linkedin/paldb/utils/DataInputOutput.java index 83aae98..a6c6669 100644 --- a/paldb/src/main/java/com/linkedin/paldb/utils/DataInputOutput.java +++ b/src/main/java/com/linkedin/paldb/utils/DataInputOutput.java @@ -78,85 +78,72 @@ public int available() { } @Override - public void readFully(byte[] b) - throws IOException { + public void readFully(byte[] b) { readFully(b, 0, b.length); } @Override - public void readFully(byte[] b, int off, int len) - throws IOException { + public void readFully(byte[] b, int off, int len) { System.arraycopy(buf, pos, b, off, len); pos += len; } @Override - public int skipBytes(int n) - throws IOException { + public int skipBytes(int n) { pos += n; return n; } @Override - public boolean readBoolean() - throws IOException { + public boolean readBoolean() { return buf[pos++] == 1; } @Override - public byte readByte() - throws IOException { + public byte readByte() { return buf[pos++]; } @Override - public int readUnsignedByte() - throws IOException { + public int readUnsignedByte() { return buf[pos++] & 0xff; } @Override - public short readShort() - throws IOException { + public short readShort() { return (short) (((short) (buf[pos++] & 0xff) << 8) | ((short) (buf[pos++] & 0xff) << 0)); } @Override - public int readUnsignedShort() - throws IOException { + public int readUnsignedShort() { return (((int) (buf[pos++] & 0xff) << 8) | ((int) (buf[pos++] & 0xff) << 0)); } @Override - public char readChar() - throws IOException { + public char readChar() { return (char) readInt(); } @Override - public int readInt() - throws IOException { + public int readInt() { return (((buf[pos++] & 0xff) << 24) | ((buf[pos++] & 0xff) << 16) | ((buf[pos++] & 0xff) << 8) | ( (buf[pos++] & 0xff) << 0)); } @Override - public long readLong() - throws IOException { + public long readLong() { return (((long) (buf[pos++] & 0xff) << 56) | ((long) (buf[pos++] & 0xff) << 48) | ((long) (buf[pos++] & 0xff) << 40) | ((long) (buf[pos++] & 0xff) << 32) | ((long) (buf[pos++] & 0xff) << 24) | ((long) (buf[pos++] & 0xff) << 16) | ((long) (buf[pos++] & 0xff) << 8) | ((long) (buf[pos++] & 0xff) << 0)); } @Override - public float readFloat() - throws IOException { + public float readFloat() { return Float.intBitsToFloat(readInt()); } @Override - public double readDouble() - throws IOException { + public double readDouble() { return Double.longBitsToDouble(readLong()); } @@ -189,57 +176,49 @@ private void ensureAvail(int n) { } @Override - public void write(int b) - throws IOException { + public void write(int b) { ensureAvail(1); buf[pos++] = (byte) b; } @Override - public void write(byte[] b) - throws IOException { + public void write(byte[] b) { write(b, 0, b.length); } @Override - public void write(byte[] b, int off, int len) - throws IOException { + public void write(byte[] b, int off, int len) { ensureAvail(len); System.arraycopy(b, off, buf, pos, len); pos += len; } @Override - public void writeBoolean(boolean v) - throws IOException { + public void writeBoolean(boolean v) { ensureAvail(1); buf[pos++] = (byte) (v ? 1 : 0); } @Override - public void writeByte(int v) - throws IOException { + public void writeByte(int v) { ensureAvail(1); buf[pos++] = (byte) (v); } @Override - public void writeShort(int v) - throws IOException { + public void writeShort(int v) { ensureAvail(2); buf[pos++] = (byte) (0xff & (v >> 8)); buf[pos++] = (byte) (0xff & (v >> 0)); } @Override - public void writeChar(int v) - throws IOException { + public void writeChar(int v) { writeInt(v); } @Override - public void writeInt(int v) - throws IOException { + public void writeInt(int v) { ensureAvail(4); buf[pos++] = (byte) (0xff & (v >> 24)); buf[pos++] = (byte) (0xff & (v >> 16)); @@ -248,8 +227,7 @@ public void writeInt(int v) } @Override - public void writeLong(long v) - throws IOException { + public void writeLong(long v) { ensureAvail(8); buf[pos++] = (byte) (0xff & (v >> 56)); buf[pos++] = (byte) (0xff & (v >> 48)); @@ -262,15 +240,13 @@ public void writeLong(long v) } @Override - public void writeFloat(float v) - throws IOException { + public void writeFloat(float v) { ensureAvail(4); writeInt(Float.floatToIntBits(v)); } @Override - public void writeDouble(double v) - throws IOException { + public void writeDouble(double v) { ensureAvail(8); writeLong(Double.doubleToLongBits(v)); } @@ -299,59 +275,51 @@ public void writeUTF(String s) } @Override - public int read() - throws IOException { + public int read() { //is here just to implement ObjectInput return readUnsignedByte(); } @Override - public int read(byte[] b) - throws IOException { + public int read(byte[] b) { //is here just to implement ObjectInput readFully(b); return b.length; } @Override - public int read(byte[] b, int off, int len) - throws IOException { + public int read(byte[] b, int off, int len) { //is here just to implement ObjectInput readFully(b, off, len); return len; } @Override - public long skip(long n) - throws IOException { + public long skip(long n) { //is here just to implement ObjectInput pos += n; return n; } @Override - public void close() - throws IOException { + public void close() { //is here just to implement ObjectInput //do nothing } @Override - public void flush() - throws IOException { + public void flush() { //is here just to implement ObjectOutput //do nothing } @Override - public Object readObject() - throws ClassNotFoundException, IOException { + public Object readObject() { throw new UnsupportedOperationException("Not supported"); } @Override - public void writeObject(Object o) - throws IOException { + public void writeObject(Object o) { throw new UnsupportedOperationException("Not supported"); } } diff --git a/paldb/src/main/java/com/linkedin/paldb/utils/FormatVersion.java b/src/main/java/com/linkedin/paldb/utils/FormatVersion.java similarity index 82% rename from paldb/src/main/java/com/linkedin/paldb/utils/FormatVersion.java rename to src/main/java/com/linkedin/paldb/utils/FormatVersion.java index ff251dd..64be1a4 100644 --- a/paldb/src/main/java/com/linkedin/paldb/utils/FormatVersion.java +++ b/src/main/java/com/linkedin/paldb/utils/FormatVersion.java @@ -14,7 +14,7 @@ package com.linkedin.paldb.utils; -import java.io.IOException; +import java.io.*; import java.util.Arrays; @@ -42,13 +42,12 @@ public boolean is(FormatVersion fv) { * @return format version byte representation */ public byte[] getBytes() { - try { - DataInputOutput dio = new DataInputOutput(); + try (DataInputOutput dio = new DataInputOutput()) { dio.writeUTF(this.name()); byte[] res = dio.toByteArray(); return Arrays.copyOfRange(res, 1, res.length); } catch (IOException e) { - throw new RuntimeException(e); + throw new UncheckedIOException(e); } } @@ -60,14 +59,13 @@ public byte[] getBytes() { */ public static FormatVersion fromBytes(byte[] bytes) { String version = null; - try { - byte[] withSize = new byte[bytes.length + 1]; - withSize[0] = (byte) bytes.length; - System.arraycopy(bytes, 0, withSize, 1, bytes.length); - DataInputOutput dio = new DataInputOutput(withSize); + byte[] withSize = new byte[bytes.length + 1]; + withSize[0] = (byte) bytes.length; + System.arraycopy(bytes, 0, withSize, 1, bytes.length); + try (DataInputOutput dio = new DataInputOutput(withSize)) { version = dio.readUTF(); } catch (IOException e) { - throw new RuntimeException(e); + throw new UncheckedIOException(e); } try { return FormatVersion.valueOf(version); @@ -82,13 +80,12 @@ public static FormatVersion fromBytes(byte[] bytes) { * @return prefix byte representation */ public static byte[] getPrefixBytes() { - try { - DataInputOutput dio = new DataInputOutput(); + try (DataInputOutput dio = new DataInputOutput()) { dio.writeUTF("PALDB"); byte[] res = dio.toByteArray(); return Arrays.copyOfRange(res, 1, res.length); } catch (IOException e) { - throw new RuntimeException(e); + throw new UncheckedIOException(e); } } diff --git a/paldb/src/main/java/com/linkedin/paldb/utils/LongPacker.java b/src/main/java/com/linkedin/paldb/utils/LongPacker.java similarity index 86% rename from paldb/src/main/java/com/linkedin/paldb/utils/LongPacker.java rename to src/main/java/com/linkedin/paldb/utils/LongPacker.java index dd2eebf..fd798c1 100644 --- a/paldb/src/main/java/com/linkedin/paldb/utils/LongPacker.java +++ b/src/main/java/com/linkedin/paldb/utils/LongPacker.java @@ -42,11 +42,11 @@ private LongPacker() { * @return the number of bytes written * @throws IOException if an error occurs with the stream */ - static public int packLong(DataOutput os, long value) + public static int packLong(DataOutput os, long value) throws IOException { if (value < 0) { - throw new IllegalArgumentException("negative value: v=" + value); + throw new IllegalArgumentException("Negative value: v=" + value); } int i = 1; @@ -66,13 +66,11 @@ static public int packLong(DataOutput os, long value) * @param ba the byte array * @param value the long value * @return the number of bytes written - * @throws IOException if an error occurs with the stream */ - static public int packLong(byte[] ba, long value) - throws IOException { + public static int packLong(byte[] ba, long value) { if (value < 0) { - throw new IllegalArgumentException("negative value: v=" + value); + throw new IllegalArgumentException("Negative value: v=" + value); } int i = 1; @@ -92,7 +90,7 @@ static public int packLong(byte[] ba, long value) * @return the long value * @throws IOException if an error occurs with the stream */ - static public long unpackLong(DataInput is) + public static long unpackLong(DataInput is) throws IOException { long result = 0; @@ -112,7 +110,7 @@ static public long unpackLong(DataInput is) * @param ba byte array * @return the long value */ - static public long unpackLong(byte[] ba) { + public static long unpackLong(byte[] ba) { return unpackLong(ba, 0); } @@ -125,7 +123,7 @@ static public long unpackLong(byte[] ba) { * @param index index in ba * @return the long value */ - static public long unpackLong(byte[] ba, int index) { + public static long unpackLong(byte[] ba, int index) { long result = 0; for (int offset = 0; offset < 64; offset += 7) { long b = ba[index++]; @@ -146,7 +144,7 @@ static public long unpackLong(byte[] ba, int index) { * @return the number of bytes written * @throws IOException if an error occurs with the stream */ - static public int packInt(DataOutput os, int value) + public static int packInt(DataOutput os, int value) throws IOException { if (value < 0) { @@ -171,7 +169,7 @@ static public int packInt(DataOutput os, int value) * @return the long value * @throws IOException if an error occurs with the stream */ - static public int unpackInt(DataInput is) + public static int unpackInt(DataInput is) throws IOException { for (int offset = 0, result = 0; offset < 32; offset += 7) { int b = is.readUnsignedByte(); @@ -188,10 +186,8 @@ static public int unpackInt(DataInput is) * * @param bb The byte buffer * @return the long value - * @throws IOException if an error occurs with the stream */ - static public int unpackInt(ByteBuffer bb) - throws IOException { + public static int unpackInt(ByteBuffer bb) { for (int offset = 0, result = 0; offset < 32; offset += 7) { int b = bb.get() & 0xffff; result |= (b & 0x7F) << offset; diff --git a/src/main/java/com/linkedin/paldb/utils/Murmur3.java b/src/main/java/com/linkedin/paldb/utils/Murmur3.java new file mode 100644 index 0000000..4b08fdf --- /dev/null +++ b/src/main/java/com/linkedin/paldb/utils/Murmur3.java @@ -0,0 +1,140 @@ +/* +* Copyright 2015 LinkedIn Corp. All rights reserved. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +*/ + +package com.linkedin.paldb.utils; + +import java.lang.invoke.*; +import java.nio.ByteOrder; + + +/** + * Hashing utility. + */ +public final class Murmur3 { + + private Murmur3() { } + + /** + * Returns the positive hash for the given bytes. + * + * @param bytes bytes to hash + * @return hash + */ + public static long hash(byte[] bytes) { + return hash32(bytes) & 0x7fffffff; + } + + public static int hash(byte[] bytes, int seed) { + return hash32(bytes, bytes.length, seed); + } + + // Constants for 32 bit variant + private static final int C1_32 = 0xcc9e2d51; + private static final int C2_32 = 0x1b873593; + private static final int R1_32 = 15; + private static final int R2_32 = 13; + private static final int M_32 = 5; + private static final int N_32 = 0xe6546b64; + + public static final int DEFAULT_SEED = 104729; + + //** MurMur3 ** + /** + * Generates 32 bit hash from byte array with the default seed. + * + * @param data - input byte array + * @return 32 bit hash + */ + public static int hash32(final byte[] data) { + return hash32(data, 0, data.length, DEFAULT_SEED); + } + + /** + * Generates 32 bit hash from byte array with the given length and seed. + * + * @param data - input byte array + * @param length - length of array + * @param seed - seed. (default 0) + * @return 32 bit hash + */ + public static int hash32(final byte[] data, final int length, final int seed) { + return hash32(data, 0, length, seed); + } + + private static final VarHandle INT_HANDLE = MethodHandles.byteArrayViewVarHandle(int[].class, ByteOrder.LITTLE_ENDIAN); + + private static int getIntLE(byte[] array, int offset) { + return (int)INT_HANDLE.get(array, offset); + } + + /** + * Generates 32 bit hash from byte array with the given length, offset and seed. + * + * @param data - input byte array + * @param offset - offset of data + * @param length - length of array + * @param seed - seed. (default 0) + * @return 32 bit hash + */ + public static int hash32(final byte[] data, final int offset, final int length, final int seed) { + int hash = seed; + final int nblocks = length >> 2; + + // body + for (int i = 0; i < nblocks; i++) { + final int i_4 = i << 2; + final int k = getIntLE(data, offset + i_4); + hash = mix32(k, hash); + } + + // tail + final int idx = nblocks << 2; + int k1 = 0; + switch (length - idx) { + case 3: + k1 ^= data[offset + idx + 2] << 16; + case 2: + k1 ^= data[offset + idx + 1] << 8; + case 1: + k1 ^= data[offset + idx]; + + // mix functions + k1 *= C1_32; + k1 = Integer.rotateLeft(k1, R1_32); + k1 *= C2_32; + hash ^= k1; + } + + return fmix32(length, hash); + } + + private static int mix32(int k, int hash) { + k *= C1_32; + k = Integer.rotateLeft(k, R1_32); + k *= C2_32; + hash ^= k; + return Integer.rotateLeft(hash, R2_32) * M_32 + N_32; + } + + private static int fmix32(final int length, int hash) { + hash ^= length; + hash ^= (hash >>> 16); + hash *= 0x85ebca6b; + hash ^= (hash >>> 13); + hash *= 0xc2b2ae35; + hash ^= (hash >>> 16); + + return hash; + } +} diff --git a/paldb/src/main/java/com/linkedin/paldb/utils/TempUtils.java b/src/main/java/com/linkedin/paldb/utils/TempUtils.java similarity index 63% rename from paldb/src/main/java/com/linkedin/paldb/utils/TempUtils.java rename to src/main/java/com/linkedin/paldb/utils/TempUtils.java index 5ea361e..b1ddaee 100644 --- a/paldb/src/main/java/com/linkedin/paldb/utils/TempUtils.java +++ b/src/main/java/com/linkedin/paldb/utils/TempUtils.java @@ -14,12 +14,7 @@ package com.linkedin.paldb.utils; -import java.io.BufferedInputStream; -import java.io.BufferedOutputStream; -import java.io.File; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.InputStream; +import java.io.*; /** @@ -63,28 +58,17 @@ public static File createTempDir(String prefix) { */ public static File copyIntoTempFile(String fileName, InputStream inputStream) throws IOException { - BufferedInputStream bufferedStream = inputStream instanceof BufferedInputStream ? (BufferedInputStream) inputStream - : new BufferedInputStream(inputStream); - File destFile = null; - try { - destFile = File.createTempFile(fileName, null); - destFile.deleteOnExit(); - - FileOutputStream fileOutputStream = new FileOutputStream(destFile); - BufferedOutputStream bufferedOutputStream = new BufferedOutputStream(fileOutputStream); - try { - byte[] buffer = new byte[8192]; - int length; - while ((length = bufferedStream.read(buffer)) > 0) { - bufferedOutputStream.write(buffer, 0, length); - } - } finally { - bufferedOutputStream.close(); - fileOutputStream.close(); + File destFile = File.createTempFile(fileName, null); + destFile.deleteOnExit(); + try (BufferedInputStream bufferedStream = inputStream instanceof BufferedInputStream ? (BufferedInputStream) inputStream + : new BufferedInputStream(inputStream); + FileOutputStream fileOutputStream = new FileOutputStream(destFile); + BufferedOutputStream bufferedOutputStream = new BufferedOutputStream(fileOutputStream)) { + byte[] buffer = new byte[8192]; + int length; + while ((length = bufferedStream.read(buffer)) > 0) { + bufferedOutputStream.write(buffer, 0, length); } - } finally { - bufferedStream.close(); - inputStream.close(); } return destFile; } diff --git a/paldb/src/main/java/com/linkedin/paldb/utils/package.html b/src/main/java/com/linkedin/paldb/utils/package.html similarity index 100% rename from paldb/src/main/java/com/linkedin/paldb/utils/package.html rename to src/main/java/com/linkedin/paldb/utils/package.html diff --git a/src/test/java/com/linkedin/paldb/api/PalDBConfigBuilderTest.java b/src/test/java/com/linkedin/paldb/api/PalDBConfigBuilderTest.java new file mode 100644 index 0000000..5cdf09c --- /dev/null +++ b/src/test/java/com/linkedin/paldb/api/PalDBConfigBuilderTest.java @@ -0,0 +1,27 @@ +package com.linkedin.paldb.api; + +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.*; + +public class PalDBConfigBuilderTest { + + @Test + public void testAllPropertiesSet() { + var config = PalDBConfigBuilder.create() + .withMemoryMapSegmentSize(500) + .withMemoryMapDataEnabled(false) + .withIndexLoadFactor(0.5) + .withEnableCompression(true) + .withEnableBloomFilter(true) + .withBloomFilterErrorFactor(0.01) + .build(); + + assertEquals(500, config.getInt(Configuration.MMAP_SEGMENT_SIZE)); + assertFalse(config.getBoolean(Configuration.MMAP_DATA_ENABLED)); + assertEquals(0.5, config.getDouble(Configuration.LOAD_FACTOR)); + assertTrue(config.getBoolean(Configuration.COMPRESSION_ENABLED)); + assertTrue(config.getBoolean(Configuration.BLOOM_FILTER_ENABLED)); + assertEquals(0.01, config.getDouble(Configuration.BLOOM_FILTER_ERROR_FACTOR)); + } +} \ No newline at end of file diff --git a/paldb/src/test/java/com/linkedin/paldb/api/TestConfiguration.java b/src/test/java/com/linkedin/paldb/api/TestConfiguration.java similarity index 50% rename from paldb/src/test/java/com/linkedin/paldb/api/TestConfiguration.java rename to src/test/java/com/linkedin/paldb/api/TestConfiguration.java index 39f1441..0988470 100644 --- a/paldb/src/test/java/com/linkedin/paldb/api/TestConfiguration.java +++ b/src/test/java/com/linkedin/paldb/api/TestConfiguration.java @@ -14,16 +14,15 @@ package com.linkedin.paldb.api; + +import org.junit.jupiter.api.Test; + import java.awt.*; -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.DataInput; -import java.io.DataOutput; -import java.io.ObjectInputStream; -import java.io.ObjectOutputStream; -import java.util.Arrays; -import org.testng.Assert; -import org.testng.annotations.Test; +import java.io.*; +import java.util.*; + +import static java.util.Collections.singletonList; +import static org.junit.jupiter.api.Assertions.*; public class TestConfiguration { @@ -32,8 +31,8 @@ public class TestConfiguration { public void testConfiguration() { Configuration c = new Configuration(); c.set("foo", "bar"); - Assert.assertEquals(c.get("foo", null), "bar"); - Assert.assertEquals(c.get("bar", "foo"), "foo"); + assertEquals(c.get("foo", null), "bar"); + assertEquals(c.get("bar", "foo"), "foo"); } @Test @@ -42,24 +41,26 @@ public void testConfigurationCopy() { c.set("foo", "bar"); Configuration r = new Configuration(c); - Assert.assertEquals(r.get("foo", null), "bar"); + assertEquals(r.get("foo", null), "bar"); c.set("foo", ""); - Assert.assertEquals(r.get("foo", null), "bar"); + assertEquals(r.get("foo", null), "bar"); } - @Test(expectedExceptions = UnsupportedOperationException.class) + @Test public void testConfigurationReadOnly() { Configuration c = new Configuration(); c.set("foo", "bar"); - Configuration r = new Configuration(c); - r.set("foo", "bar"); + assertThrows(UnsupportedOperationException.class, () -> { + Configuration r = new Configuration(c); + r.set("foo", "bar"); + }); } @Test public void testEqualsEmpty() { - Assert.assertEquals(new Configuration(), new Configuration()); + assertEquals(new Configuration(), new Configuration()); } @Test @@ -73,8 +74,8 @@ public void testEquals() { Configuration c3 = new Configuration(); c3.set("foo", "notbar"); - Assert.assertEquals(c1, c2); - Assert.assertNotEquals(c1, c3); + assertEquals(c1, c2); + assertNotEquals(c1, c3); } @Test @@ -83,13 +84,13 @@ public void testGetBoolean() { c.set("foo", "true"); c.set("bar", "false"); - Assert.assertTrue(c.getBoolean("foo")); - Assert.assertFalse(c.getBoolean("bar")); + assertTrue(c.getBoolean("foo")); + assertFalse(c.getBoolean("bar")); } - @Test(expectedExceptions = IllegalArgumentException.class) + @Test public void testGetBooleanMissing() { - new Configuration().getBoolean("foo"); + assertThrows(IllegalArgumentException.class, () -> new Configuration().getBoolean("foo")); } @Test @@ -97,8 +98,8 @@ public void testGetBooleanDefault() { Configuration c = new Configuration(); c.set("foo", "true"); - Assert.assertTrue(c.getBoolean("foo", false)); - Assert.assertTrue(c.getBoolean("bar", true)); + assertTrue(c.getBoolean("foo", false)); + assertTrue(c.getBoolean("bar", true)); } @Test @@ -106,12 +107,12 @@ public void testGetDouble() { Configuration c = new Configuration(); c.set("foo", "1.0"); - Assert.assertEquals(c.getDouble("foo"), 1.0); + assertEquals(c.getDouble("foo"), 1.0); } - @Test(expectedExceptions = IllegalArgumentException.class) + @Test public void testGetDoubleMissing() { - new Configuration().getDouble("foo"); + assertThrows(IllegalArgumentException.class, () -> new Configuration().getDouble("foo")); } @Test @@ -119,8 +120,8 @@ public void testGetDoubleDefault() { Configuration c = new Configuration(); c.set("foo", "1.0"); - Assert.assertEquals(c.getDouble("foo", 2.0), 1.0); - Assert.assertEquals(c.getDouble("bar", 2.0), 2.0); + assertEquals(c.getDouble("foo", 2.0), 1.0); + assertEquals(c.getDouble("bar", 2.0), 2.0); } @Test @@ -128,12 +129,12 @@ public void testGetFloat() { Configuration c = new Configuration(); c.set("foo", "1.0"); - Assert.assertEquals(c.getFloat("foo"), 1f); + assertEquals(c.getFloat("foo"), 1f); } - @Test(expectedExceptions = IllegalArgumentException.class) + @Test public void testGetFloatMissing() { - new Configuration().getFloat("foo"); + assertThrows(IllegalArgumentException.class, () -> new Configuration().getFloat("foo")); } @Test @@ -141,8 +142,8 @@ public void testGetFloatDefault() { Configuration c = new Configuration(); c.set("foo", "1.0"); - Assert.assertEquals(c.getFloat("foo", 2f), 1f); - Assert.assertEquals(c.getFloat("bar", 2f), 2f); + assertEquals(c.getFloat("foo", 2f), 1f); + assertEquals(c.getFloat("bar", 2f), 2f); } @Test @@ -150,12 +151,12 @@ public void testGetInt() { Configuration c = new Configuration(); c.set("foo", "1"); - Assert.assertEquals(c.getInt("foo"), 1); + assertEquals(c.getInt("foo"), 1); } - @Test(expectedExceptions = IllegalArgumentException.class) + @Test public void testGetIntMissing() { - new Configuration().getInt("foo"); + assertThrows(IllegalArgumentException.class, () -> new Configuration().getInt("foo")); } @Test @@ -163,8 +164,8 @@ public void testGetIntDefault() { Configuration c = new Configuration(); c.set("foo", "1"); - Assert.assertEquals(c.getInt("foo", 2), 1); - Assert.assertEquals(c.getInt("bar", 2), 2); + assertEquals(c.getInt("foo", 2), 1); + assertEquals(c.getInt("bar", 2), 2); } @Test @@ -172,12 +173,12 @@ public void testGetShort() { Configuration c = new Configuration(); c.set("foo", "1"); - Assert.assertEquals(c.getShort("foo"), (short) 1); + assertEquals(c.getShort("foo"), (short) 1); } - @Test(expectedExceptions = IllegalArgumentException.class) + @Test public void testGetShortMissing() { - new Configuration().getShort("foo"); + assertThrows(IllegalArgumentException.class, () -> new Configuration().getShort("foo")); } @Test @@ -185,8 +186,13 @@ public void testGetShortDefault() { Configuration c = new Configuration(); c.set("foo", "1"); - Assert.assertEquals(c.getShort("foo", (short) 2), (short) 1); - Assert.assertEquals(c.getShort("bar", (short) 2), (short) 2); + assertEquals(c.getShort("foo", (short) 2), (short) 1); + assertEquals(c.getShort("bar", (short) 2), (short) 2); + } + + @Test + public void testGetLongMissing() { + assertThrows(IllegalArgumentException.class, () -> new Configuration().getLong("foo")); } @Test @@ -194,12 +200,7 @@ public void testGetLong() { Configuration c = new Configuration(); c.set("foo", "1"); - Assert.assertEquals(c.getLong("foo"), 1l); - } - - @Test(expectedExceptions = IllegalArgumentException.class) - public void testGetLongMissing() { - new Configuration().getLong("foo"); + assertEquals(c.getLong("foo"), 1L); } @Test @@ -207,8 +208,8 @@ public void testGetLongDefault() { Configuration c = new Configuration(); c.set("foo", "1"); - Assert.assertEquals(c.getLong("foo", 2l), 1l); - Assert.assertEquals(c.getLong("bar", 2l), 2l); + assertEquals(c.getLong("foo", 2L), 1L); + assertEquals(c.getLong("bar", 2L), 2L); } @Test @@ -217,13 +218,12 @@ public void testGetClass() Configuration c = new Configuration(); c.set("foo", Integer.class.getName()); - Assert.assertEquals(c.getClass("foo"), Integer.class); + assertEquals(c.getClass("foo"), Integer.class); } - @Test(expectedExceptions = IllegalArgumentException.class) - public void testGetClassMissing() - throws ClassNotFoundException { - new Configuration().getClass("foo"); + @Test + public void testGetClassMissing() { + assertThrows(IllegalArgumentException.class, () -> new Configuration().getClass("foo")); } @Test @@ -231,12 +231,12 @@ public void testGetList() { Configuration c = new Configuration(); c.set("foo", "foo,bar"); - Assert.assertEquals(c.getList("foo"), Arrays.asList("foo", "bar")); + assertEquals(c.getList("foo"), Arrays.asList("foo", "bar")); } - @Test(expectedExceptions = IllegalArgumentException.class) + @Test public void testGetListMissing() { - new Configuration().getList("foo"); + assertThrows(IllegalArgumentException.class, () -> new Configuration().getList("foo")); } @Test @@ -244,31 +244,8 @@ public void testGetListDefault() { Configuration c = new Configuration(); c.set("foo", "foo,bar"); - Assert.assertEquals(c.getList("foo", Arrays.asList("that")), Arrays.asList("foo", "bar")); - Assert.assertEquals(c.getList("bar", Arrays.asList("that")), Arrays.asList("that")); - } - - @Test - public void testSerialization() - throws Throwable { - Configuration c = new Configuration(); - c.set("foo", "bar"); - c.registerSerializer(new PointSerializer()); - - ByteArrayOutputStream bos = new ByteArrayOutputStream(); - ObjectOutputStream out = new ObjectOutputStream(bos); - out.writeObject(c); - out.close(); - bos.close(); - - byte[] bytes = bos.toByteArray(); - ByteArrayInputStream bis = new ByteArrayInputStream(bytes); - ObjectInputStream in = new ObjectInputStream(bis); - Configuration sc = (Configuration) in.readObject(); - in.close(); - bis.close(); - - Assert.assertEquals(sc, c); + assertEquals(c.getList("foo", singletonList("that")), Arrays.asList("foo", "bar")); + assertEquals(c.getList("bar", singletonList("that")), singletonList("that")); } // UTILITY @@ -281,13 +258,14 @@ public Point read(DataInput input) { } @Override - public void write(DataOutput output, Point input) { - + public Class serializedClass() { + return Point.class; } @Override - public int getWeight(Point instance) { - return 0; + public void write(DataOutput output, Point input) { + } + } } diff --git a/paldb/src/test/java/com/linkedin/paldb/impl/GenerateTestData.java b/src/test/java/com/linkedin/paldb/impl/GenerateTestData.java similarity index 94% rename from paldb/src/test/java/com/linkedin/paldb/impl/GenerateTestData.java rename to src/test/java/com/linkedin/paldb/impl/GenerateTestData.java index a851c19..2f1a79c 100644 --- a/paldb/src/test/java/com/linkedin/paldb/impl/GenerateTestData.java +++ b/src/test/java/com/linkedin/paldb/impl/GenerateTestData.java @@ -14,11 +14,10 @@ package com.linkedin.paldb.impl; -import java.util.HashSet; -import java.util.Random; -import java.util.Set; import org.apache.commons.lang.RandomStringUtils; +import java.util.*; + /** * Utility to generate test data. @@ -53,7 +52,7 @@ public static Integer[] generateRandomIntKeys(int count, long seed) { public static Integer[] generateRandomIntKeys(int count, int range, long seed) { Random random = new Random(seed); - Set set = new HashSet(count); + Set set = new HashSet<>(count); while (set.size() < count) { set.add(random.nextInt(range)); } @@ -99,7 +98,7 @@ public static Object[] generateCompoundKeys(int count) { Object[] res = new Object[count]; Random random = new Random(345); for (int i = 0; i < count; i++) { - Object[] k = new Object[]{new Byte((byte) random.nextInt(10)), new Integer(i)}; + Object[] k = new Object[]{Byte.valueOf((byte) random.nextInt(10)), Integer.valueOf(i)}; res[i] = k; } return res; diff --git a/src/test/java/com/linkedin/paldb/impl/TestSerializers.java b/src/test/java/com/linkedin/paldb/impl/TestSerializers.java new file mode 100644 index 0000000..185d1f8 --- /dev/null +++ b/src/test/java/com/linkedin/paldb/impl/TestSerializers.java @@ -0,0 +1,148 @@ +/* +* Copyright 2015 LinkedIn Corp. All rights reserved. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +*/ + +package com.linkedin.paldb.impl; + +import com.linkedin.paldb.api.Serializer; +import org.junit.jupiter.api.*; + +import java.awt.*; +import java.io.*; + +import static org.junit.jupiter.api.Assertions.*; + + +public class TestSerializers { + + private Serializers serializers; + + @BeforeEach + public void setUp() { + serializers = new Serializers(); + } + + @Test + public void testRegister() { + ColorSerializer i = new ColorSerializer(); + serializers.registerSerializer(i); + assertSame(serializers.getSerializer(Color.class), i); + } + + @Test + public void testRegisterTwice() { + ColorSerializer i1 = new ColorSerializer(); + ColorSerializer i2 = new ColorSerializer(); + serializers.registerSerializer(i1); + serializers.registerSerializer(i2); + assertSame(serializers.getSerializer(Color.class), i1); + } + + @Test + public void testRegisterTwo() { + ColorSerializer i = new ColorSerializer(); + PointSerializer f = new PointSerializer(); + serializers.registerSerializer(i); + serializers.registerSerializer(f); + assertSame(serializers.getSerializer(Color.class), i); + assertSame(serializers.getSerializer(Point.class), f); + } + + @Test + public void testGetSerializer() { + ColorSerializer i = new ColorSerializer(); + serializers.registerSerializer(i); + assertNull(serializers.getSerializer(Point.class)); + assertNotNull(serializers.getSerializer(Color.class)); + } + + @Test + public void testSerialize() { + serializers.registerSerializer(new ColorSerializer()); + assertNotNull(serializers.getSerializer(Color.class)); + } + + @Test + public void testInterfaceType() { + SerializerWithInterface i = new SerializerWithInterface(); + serializers.registerSerializer(i); + assertSame(serializers.getSerializer(AnInterface.class), i); + } + + // HELPER + + public static class ColorSerializer implements Serializer { + + @Override + public Color read(DataInput input) { + return null; + } + + @Override + public Class serializedClass() { + return Color.class; + } + + @Override + public void write(DataOutput output, Color input) { + + } + + } + + public static class PointSerializer implements Serializer { + + @Override + public Point read(DataInput input) { + return null; + } + + @Override + public Class serializedClass() { + return Point.class; + } + + @Override + public void write(DataOutput output, Point input) { + + } + + } + + public interface AnInterface { + + } + + public static class AClass implements AnInterface { + + } + + public static class SerializerWithInterface implements Serializer { + + @Override + public AnInterface read(DataInput input) { + return null; + } + + @Override + public Class serializedClass() { + return AnInterface.class; + } + + @Override + public void write(DataOutput output, AnInterface input) { + + } + + } +} diff --git a/paldb/src/test/java/com/linkedin/paldb/impl/TestStorageSerialization.java b/src/test/java/com/linkedin/paldb/impl/TestStorageSerialization.java similarity index 77% rename from paldb/src/test/java/com/linkedin/paldb/impl/TestStorageSerialization.java rename to src/test/java/com/linkedin/paldb/impl/TestStorageSerialization.java index d7dfe4b..505ee11 100644 --- a/paldb/src/test/java/com/linkedin/paldb/impl/TestStorageSerialization.java +++ b/src/test/java/com/linkedin/paldb/impl/TestStorageSerialization.java @@ -14,33 +14,23 @@ package com.linkedin.paldb.impl; -import com.linkedin.paldb.api.Configuration; -import com.linkedin.paldb.api.Serializer; -import com.linkedin.paldb.api.UnsupportedTypeException; +import com.linkedin.paldb.api.*; +import com.linkedin.paldb.api.errors.UnsupportedTypeException; +import org.junit.jupiter.api.*; import java.awt.*; -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.DataInput; -import java.io.DataInputStream; -import java.io.DataOutput; -import java.io.DataOutputStream; -import java.io.IOException; -import java.math.BigDecimal; -import java.math.BigInteger; +import java.io.*; +import java.math.*; import java.util.Arrays; -import org.testng.Assert; -import org.testng.annotations.BeforeMethod; -import org.testng.annotations.Test; - +import static org.junit.jupiter.api.Assertions.*; public class TestStorageSerialization { private Configuration configuration; private StorageSerialization serialization; - @BeforeMethod + @BeforeEach public void setUp() { configuration = new Configuration(); serialization = new StorageSerialization(configuration); @@ -48,18 +38,18 @@ public void setUp() { @Test public void testCompressionEnabled() { - Assert.assertFalse(serialization.isCompressionEnabled()); + assertFalse(serialization.isCompressionEnabled()); Configuration config = new Configuration(); config.set(Configuration.COMPRESSION_ENABLED, "true"); StorageSerialization s = new StorageSerialization(config); - Assert.assertTrue(s.isCompressionEnabled()); + assertTrue(s.isCompressionEnabled()); } @Test public void testSerializeKey() throws IOException, ClassNotFoundException { Integer l = 1; Object d = serialization.deserialize(serialization.serializeKey(l)); - Assert.assertEquals(d, l); + assertEquals(d, l); } @Test @@ -73,7 +63,7 @@ public void testSerializeKeyDataOutput() throws IOException, ClassNotFoundExcept ByteArrayInputStream bis = new ByteArrayInputStream(bos.toByteArray()); DataInputStream dis = new DataInputStream(bis); - Assert.assertEquals(serialization.deserialize(dis), l); + assertEquals(serialization.deserialize(dis), l); } @Test @@ -87,12 +77,12 @@ public void testSerializeValueDataOutput() throws IOException, ClassNotFoundExce ByteArrayInputStream bis = new ByteArrayInputStream(bos.toByteArray()); DataInputStream dis = new DataInputStream(bis); - Assert.assertEquals(serialization.deserialize(dis), l); + assertEquals(serialization.deserialize(dis), l); } - @Test(expectedExceptions = NullPointerException.class) - public void testSerializeKeyNull() throws IOException, ClassNotFoundException { - serialization.serializeKey(null); + @Test + public void testSerializeKeyNull() { + assertThrows(NullPointerException.class, () -> serialization.serializeKey(null)); } @Test @@ -100,7 +90,7 @@ public void testTransformValue() throws ClassNotFoundException, IOException { Integer l = 1; Object deserialize = serialization.deserialize(serialization.serializeValue(l)); - Assert.assertEquals(deserialize, l); + assertEquals(deserialize, l); } @Test @@ -108,8 +98,8 @@ public void testTransformList() throws ClassNotFoundException, IOException { Integer[] l = new Integer[]{1, 2}; Object deserialize = serialization.deserialize(serialization.serializeValue(l)); - Assert.assertEquals(deserialize.getClass(), int[].class); - Assert.assertEquals(deserialize, new int[]{1, 2}); + assertEquals(deserialize.getClass(), int[].class); + assertArrayEquals((int[]) deserialize, new int[]{1, 2}); } @Test @@ -117,8 +107,8 @@ public void testTransformListWithNull() throws ClassNotFoundException, IOException { Integer[] l = new Integer[]{1, null, 2}; Object deserialize = serialization.deserialize(serialization.serializeValue(l)); - Assert.assertEquals(deserialize.getClass(), int[].class); - Assert.assertEquals(deserialize, new int[]{1, 0, 2}); + assertEquals(deserialize.getClass(), int[].class); + assertArrayEquals((int[])deserialize, new int[]{1, 0, 2}); } @Test @@ -126,8 +116,8 @@ public void testTransformListOfList() throws ClassNotFoundException, IOException { Integer[][] l = new Integer[][]{{1}, {2}}; Object deserialize = serialization.deserialize(serialization.serializeValue(l)); - Assert.assertEquals(deserialize.getClass(), int[][].class); - Assert.assertEquals(deserialize, new int[][]{{1}, {2}}); + assertEquals(deserialize.getClass(), int[][].class); + assertArrayEquals((int[][])deserialize, new int[][]{{1}, {2}}); } @Test @@ -148,13 +138,14 @@ public Point read(DataInput input) } @Override - public int getWeight(Point instance) { - return 0; + public Class serializedClass() { + return Point.class; } + }); Point p = new Point(42, 9); byte[] buf = serialization.serialize(p); - Assert.assertEquals(serialization.deserialize(buf), p); + assertEquals(serialization.deserialize(buf), p); } @Test @@ -175,13 +166,14 @@ public Point[] read(DataInput input) } @Override - public int getWeight(Point[] instance) { - return 0; + public Class serializedClass() { + return Point[].class; } + }); Point[] p = new Point[]{new Point(42, 9)}; byte[] buf = serialization.serialize(p); - Assert.assertEquals(serialization.deserialize(buf), p); + assertArrayEquals((Point[])serialization.deserialize(buf), p); } @Test @@ -194,18 +186,19 @@ public ImplementsA read(DataInput dataInput) throws IOException { } @Override - public void write(DataOutput dataOutput, ImplementsA input) throws IOException { - dataOutput.writeInt(input.getVal()); + public Class serializedClass() { + return ImplementsA.class; } @Override - public int getWeight(ImplementsA instance) { - return 0; + public void write(DataOutput dataOutput, ImplementsA input) throws IOException { + dataOutput.writeInt(input.getVal()); } + }); ImplementsA a = new ImplementsA(42); byte[] buf = serialization.serialize(a); - Assert.assertEquals(serialization.deserialize(buf), a); + assertEquals(serialization.deserialize(buf), a); } @Test @@ -218,25 +211,27 @@ public A read(DataInput dataInput) throws IOException { } @Override - public void write(DataOutput dataOutput, A input) throws IOException { - dataOutput.writeInt(input.getVal()); + public Class serializedClass() { + return A.class; } @Override - public int getWeight(A instance) { - return 0; + public void write(DataOutput dataOutput, A input) throws IOException { + dataOutput.writeInt(input.getVal()); } + + }); + assertThrows(UnsupportedTypeException.class, () -> { + ImplementsA a = new ImplementsA(42); + byte[] buf = serialization.serialize(a); + assertEquals(serialization.deserialize(buf), a); }); - ImplementsA a = new ImplementsA(42); - byte[] buf = serialization.serialize(a); - Assert.assertEquals(serialization.deserialize(buf), a); } @Test - public void testNull() - throws Throwable { + public void testNull() throws Throwable { byte[] buf = serialization.serialize(null); - Assert.assertNull(serialization.deserialize(buf)); + assertNull(serialization.deserialize(buf)); } @Test @@ -246,15 +241,14 @@ public void testByte() for (byte val : vals) { byte[] buf = serialization.serialize(val); Object l2 = serialization.deserialize(buf); - Assert.assertTrue(l2.getClass() == Byte.class); - Assert.assertEquals(l2, val); + assertTrue(l2.getClass() == Byte.class); + assertEquals(l2, val); } } - @Test(expectedExceptions = UnsupportedTypeException.class) - public void testNotSupported() - throws Throwable { - serialization.serialize(new Color(0, 0, 0)); + @Test + public void testNotSupported() { + assertThrows(UnsupportedTypeException.class, () -> serialization.serialize(new Color(0, 0, 0))); } @Test @@ -267,8 +261,8 @@ public void testInt() for (int i : vals) { byte[] buf = serialization.serialize(i); Object l2 = serialization.deserialize(buf); - Assert.assertTrue(l2.getClass() == Integer.class); - Assert.assertEquals(l2, i); + assertTrue(l2.getClass() == Integer.class); + assertEquals(l2, i); } } @@ -281,8 +275,8 @@ public void testShort() for (short i : vals) { byte[] buf = serialization.serialize(i); Object l2 = serialization.deserialize(buf); - Assert.assertTrue(l2.getClass() == Short.class); - Assert.assertEquals(l2, i); + assertTrue(l2.getClass() == Short.class); + assertEquals(l2, i); } } @@ -293,8 +287,8 @@ public void testDouble() for (double i : vals) { byte[] buf = serialization.serialize(i); Object l2 = serialization.deserialize(buf); - Assert.assertTrue(l2.getClass() == Double.class); - Assert.assertEquals(l2, i); + assertTrue(l2.getClass() == Double.class); + assertEquals(l2, i); } } @@ -305,8 +299,8 @@ public void testFloat() for (float i : vals) { byte[] buf = serialization.serialize(i); Object l2 = serialization.deserialize(buf); - Assert.assertTrue(l2.getClass() == Float.class); - Assert.assertEquals(l2, i); + assertTrue(l2.getClass() == Float.class); + assertEquals(l2, i); } } @@ -317,8 +311,8 @@ public void testChar() for (char i : vals) { byte[] buf = serialization.serialize(i); Object l2 = serialization.deserialize(buf); - Assert.assertTrue(l2.getClass() == Character.class); - Assert.assertEquals(l2, i); + assertTrue(l2.getClass() == Character.class); + assertEquals(l2, i); } } @@ -334,8 +328,8 @@ public void testLong() for (long i : vals) { byte[] buf = serialization.serialize(i); Object l2 = serialization.deserialize(buf); - Assert.assertTrue(l2.getClass() == Long.class); - Assert.assertEquals(l2, i); + assertTrue(l2.getClass() == Long.class); + assertEquals(l2, i); } } @@ -344,13 +338,13 @@ public void testBoolean() throws IOException, ClassNotFoundException { byte[] buf = serialization.serialize(true); Object l2 = serialization.deserialize(buf); - Assert.assertTrue(l2.getClass() == Boolean.class); - Assert.assertEquals(l2, true); + assertTrue(l2.getClass() == Boolean.class); + assertEquals(l2, true); byte[] buf2 = serialization.serialize(false); Object l22 = serialization.deserialize(buf2); - Assert.assertTrue(l22.getClass() == Boolean.class); - Assert.assertEquals(l22, false); + assertTrue(l22.getClass() == Boolean.class); + assertEquals(l22, false); } @Test @@ -358,7 +352,7 @@ public void testString() throws IOException, ClassNotFoundException { byte[] buf = serialization.serialize("Abcd"); String l2 = (String) serialization.deserialize(buf); - Assert.assertEquals(l2, "Abcd"); + assertEquals(l2, "Abcd"); } @Test @@ -366,7 +360,7 @@ public void testEmptyString() throws IOException, ClassNotFoundException { byte[] buf = serialization.serialize(""); String l2 = (String) serialization.deserialize(buf); - Assert.assertEquals(l2, ""); + assertEquals(l2, ""); } @Test @@ -378,7 +372,7 @@ public void testBigString() } byte[] buf = serialization.serialize(bigString); String l2 = (String) serialization.deserialize(buf); - Assert.assertEquals(l2, bigString); + assertEquals(l2, bigString); } @Test @@ -386,7 +380,7 @@ public void testClass() throws IOException, ClassNotFoundException { byte[] buf = serialization.serialize(String.class); Class l2 = (Class) serialization.deserialize(buf); - Assert.assertEquals(l2, String.class); + assertEquals(l2, String.class); } @Test @@ -394,7 +388,7 @@ public void testClass2() throws IOException, ClassNotFoundException { byte[] buf = serialization.serialize(long[].class); Class l2 = (Class) serialization.deserialize(buf); - Assert.assertEquals(l2, long[].class); + assertEquals(l2, long[].class); } @Test @@ -403,7 +397,7 @@ public void testUnicodeString() String s = "Ciudad Bolíva"; byte[] buf = serialization.serialize(s); Object l2 = serialization.deserialize(buf); - Assert.assertEquals(l2, s); + assertEquals(l2, s); } @Test @@ -411,7 +405,7 @@ public void testStringArray() throws ClassNotFoundException, IOException { String[] l = new String[]{"foo", "bar", ""}; Object deserialize = serialization.deserialize(serialization.serialize(l)); - Assert.assertTrue(Arrays.equals(l, (String[]) deserialize)); + assertTrue(Arrays.equals(l, (String[]) deserialize)); } @Test @@ -419,7 +413,7 @@ public void testObjectArray() throws ClassNotFoundException, IOException { Object[] l = new Object[]{"foo", 2, Boolean.TRUE}; Object deserialize = serialization.deserialize(serialization.serialize(l)); - Assert.assertTrue(Arrays.equals(l, (Object[]) deserialize)); + assertTrue(Arrays.equals(l, (Object[]) deserialize)); } @Test @@ -427,7 +421,7 @@ public void testBooleanArray() throws ClassNotFoundException, IOException { boolean[] l = new boolean[]{true, false}; Object deserialize = serialization.deserialize(serialization.serialize(l)); - Assert.assertTrue(Arrays.equals(l, (boolean[]) deserialize)); + assertTrue(Arrays.equals(l, (boolean[]) deserialize)); } @Test @@ -435,7 +429,7 @@ public void testDoubleArray() throws ClassNotFoundException, IOException { double[] l = new double[]{Math.PI, 1D}; Object deserialize = serialization.deserialize(serialization.serialize(l)); - Assert.assertTrue(Arrays.equals(l, (double[]) deserialize)); + assertTrue(Arrays.equals(l, (double[]) deserialize)); } @Test @@ -443,7 +437,7 @@ public void testFloatArray() throws ClassNotFoundException, IOException { float[] l = new float[]{1F, 1.234235F}; Object deserialize = serialization.deserialize(serialization.serialize(l)); - Assert.assertTrue(Arrays.equals(l, (float[]) deserialize)); + assertTrue(Arrays.equals(l, (float[]) deserialize)); } @Test @@ -451,7 +445,7 @@ public void testByteArray() throws ClassNotFoundException, IOException { byte[] l = new byte[]{1, 34, -5}; Object deserialize = serialization.deserialize(serialization.serialize(l)); - Assert.assertTrue(Arrays.equals(l, (byte[]) deserialize)); + assertTrue(Arrays.equals(l, (byte[]) deserialize)); } @Test @@ -459,7 +453,7 @@ public void testShortArray() throws ClassNotFoundException, IOException { short[] l = new short[]{1, 345, -5000}; Object deserialize = serialization.deserialize(serialization.serialize(l)); - Assert.assertTrue(Arrays.equals(l, (short[]) deserialize)); + assertTrue(Arrays.equals(l, (short[]) deserialize)); } @Test @@ -467,7 +461,7 @@ public void testCharArray() throws ClassNotFoundException, IOException { char[] l = new char[]{'1', 'a', '&'}; Object deserialize = serialization.deserialize(serialization.serialize(l)); - Assert.assertTrue(Arrays.equals(l, (char[]) deserialize)); + assertTrue(Arrays.equals(l, (char[]) deserialize)); } @Test @@ -476,7 +470,7 @@ public void testIntArray() int[][] l = new int[][]{{3, 5}, {-1200, 29999}, {3, 100000}, {-43999, 100000}}; for (int[] a : l) { Object deserialize = serialization.deserialize(serialization.serialize(a)); - Assert.assertTrue(Arrays.equals(a, (int[]) deserialize)); + assertTrue(Arrays.equals(a, (int[]) deserialize)); } } @@ -486,7 +480,7 @@ public void testLongArray() long[][] l = new long[][]{{3l, 5l}, {-1200l, 29999l}, {3l, 100000l}, {-43999l, 100000l}, {-123l, 12345678901234l}}; for (long[] a : l) { Object deserialize = serialization.deserialize(serialization.serialize(a)); - Assert.assertTrue(Arrays.equals(a, (long[]) deserialize)); + assertTrue(Arrays.equals(a, (long[]) deserialize)); } } @@ -495,7 +489,7 @@ public void testDoubleCompressedArray() throws ClassNotFoundException, IOException { double[] l = generateDoubleArray(500); Object deserialize = serialization.deserialize(serialization.serialize(l, true)); - Assert.assertTrue(Arrays.equals(l, (double[]) deserialize)); + assertTrue(Arrays.equals(l, (double[]) deserialize)); } @Test @@ -503,7 +497,7 @@ public void testFloatCompressedArray() throws ClassNotFoundException, IOException { float[] l = generateFloatArray(500); Object deserialize = serialization.deserialize(serialization.serialize(l, true)); - Assert.assertTrue(Arrays.equals(l, (float[]) deserialize)); + assertTrue(Arrays.equals(l, (float[]) deserialize)); } @Test @@ -511,7 +505,7 @@ public void testByteCompressedArray() throws ClassNotFoundException, IOException { byte[] l = generateByteArray(500); Object deserialize = serialization.deserialize(serialization.serialize(l, true)); - Assert.assertTrue(Arrays.equals(l, (byte[]) deserialize)); + assertTrue(Arrays.equals(l, (byte[]) deserialize)); } @Test @@ -519,7 +513,7 @@ public void testCharCompressedArray() throws ClassNotFoundException, IOException { char[] l = generateCharArray(500); Object deserialize = serialization.deserialize(serialization.serialize(l, true)); - Assert.assertTrue(Arrays.equals(l, (char[]) deserialize)); + assertTrue(Arrays.equals(l, (char[]) deserialize)); } @Test @@ -527,7 +521,7 @@ public void testShortCompressedArray() throws ClassNotFoundException, IOException { short[] l = generateShortArray(500); Object deserialize = serialization.deserialize(serialization.serialize(l, true)); - Assert.assertTrue(Arrays.equals(l, (short[]) deserialize)); + assertTrue(Arrays.equals(l, (short[]) deserialize)); } @Test @@ -535,7 +529,7 @@ public void testIntCompressedArray() throws ClassNotFoundException, IOException { int[] l = generateIntArray(500); Object deserialize = serialization.deserialize(serialization.serialize(l, true)); - Assert.assertTrue(Arrays.equals(l, (int[]) deserialize)); + assertTrue(Arrays.equals(l, (int[]) deserialize)); } @Test @@ -543,25 +537,25 @@ public void testLongCompressedArray() throws ClassNotFoundException, IOException { long[] l = generateLongArray(500); Object deserialize = serialization.deserialize(serialization.serialize(l, true)); - Assert.assertTrue(Arrays.equals(l, (long[]) deserialize)); + assertTrue(Arrays.equals(l, (long[]) deserialize)); } @Test public void testBigDecimal() throws IOException, ClassNotFoundException { BigDecimal d = new BigDecimal("445656.7889889895165654423236"); - Assert.assertEquals(d, serialization.deserialize(serialization.serialize(d))); + assertEquals(d, serialization.deserialize(serialization.serialize(d))); d = new BigDecimal("-53534534534534445656.7889889895165654423236"); - Assert.assertEquals(d, serialization.deserialize(serialization.serialize(d))); + assertEquals(d, serialization.deserialize(serialization.serialize(d))); } @Test public void testBigInteger() throws IOException, ClassNotFoundException { BigInteger d = new BigInteger("4456567889889895165654423236"); - Assert.assertEquals(d, serialization.deserialize(serialization.serialize(d))); + assertEquals(d, serialization.deserialize(serialization.serialize(d))); d = new BigInteger("-535345345345344456567889889895165654423236"); - Assert.assertEquals(d, serialization.deserialize(serialization.serialize(d))); + assertEquals(d, serialization.deserialize(serialization.serialize(d))); } @Test @@ -571,8 +565,8 @@ public void testMultiDimensionalIntArray() d[0] = new int[]{1, 3}; d[1] = new int[]{-3, 1}; Object res = serialization.deserialize(serialization.serialize(d)); - Assert.assertEquals(res.getClass(), int[][].class); - Assert.assertEquals(d, res); + assertEquals(res.getClass(), int[][].class); + assertArrayEquals(d, (int[][])res); } @Test @@ -582,8 +576,8 @@ public void testMultiDimensionalLongArray() d[0] = new long[]{1, 3}; d[1] = new long[]{-3, 1}; Object res = serialization.deserialize(serialization.serialize(d)); - Assert.assertEquals(res.getClass(), long[][].class); - Assert.assertEquals(d, res); + assertEquals(res.getClass(), long[][].class); + assertArrayEquals(d, (long[][])res); } // UTILITY @@ -651,7 +645,7 @@ private static char[] generateCharArray(int size) { return array; } - private static interface A { + private interface A { int getVal(); } diff --git a/src/test/java/com/linkedin/paldb/impl/TestStore.java b/src/test/java/com/linkedin/paldb/impl/TestStore.java new file mode 100644 index 0000000..8e900bd --- /dev/null +++ b/src/test/java/com/linkedin/paldb/impl/TestStore.java @@ -0,0 +1,569 @@ +/* +* Copyright 2015 LinkedIn Corp. All rights reserved. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +*/ + +package com.linkedin.paldb.impl; + +import com.linkedin.paldb.api.*; +import com.linkedin.paldb.api.errors.DuplicateKeyException; +import com.linkedin.paldb.utils.*; +import org.junit.jupiter.api.*; + +import java.io.*; +import java.nio.file.*; +import java.util.*; + +import static com.linkedin.paldb.utils.TestTempUtils.deleteDirectory; +import static org.junit.jupiter.api.Assertions.*; + +public class TestStore { + + private Path tempDir; + private File storeFile; + + @BeforeEach + public void setUp() throws IOException { + tempDir = Files.createTempDirectory("tmp"); + storeFile = Files.createTempFile(tempDir, "paldb", ".dat").toFile(); + } + + @AfterEach + public void cleanUp() { + deleteDirectory(tempDir.toFile()); + } + + @Test + public void testEmpty() { + StoreWriter writer = PalDB.createWriter(storeFile, new Configuration()); + writer.close(); + + assertTrue(storeFile.exists()); + + try (StoreReader reader = PalDB.createReader(storeFile, new Configuration())) { + assertEquals(reader.size(), 0); + assertNull(reader.get(1, null)); + } + } + + @Test + public void testEmptyStream() { + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + StoreWriter writer = PalDB.createWriter(bos, new Configuration()); + writer.close(); + + assertTrue(bos.toByteArray().length > 0); + + ByteArrayInputStream bis = new ByteArrayInputStream(bos.toByteArray()); + StoreReader reader = PalDB.createReader(bis, new Configuration()); + reader.close(); + } + + @Test + public void testEmptyDefaultConfig() { + StoreWriter writer = PalDB.createWriter(storeFile); + writer.close(); + + assertTrue(storeFile.exists()); + + try (StoreReader reader = PalDB.createReader(storeFile)) { + assertEquals(reader.size(), 0); + assertNull(reader.get(1, null)); + } + } + + @Test + public void testNewConfiguration() { + assertNotNull(PalDB.newConfiguration()); + } + + @Test + public void testNoFolder() { + File file = new File("nofolder.store"); + file.deleteOnExit(); + StoreWriter writer = PalDB.createWriter(file, new Configuration()); + writer.close(); + + assertTrue(file.exists()); + } + + @Test + public void testReaderFileNotFound() { + assertThrows(RuntimeException.class, () -> PalDB.createReader(new File("notfound"), PalDB.newConfiguration())); + } + + @Test + public void testReaderNullFile() { + assertThrows(NullPointerException.class, () -> PalDB.createReader((File) null, PalDB.newConfiguration())); + } + + @Test + public void testReaderNullConfig() { + assertThrows(NullPointerException .class, () -> PalDB.createReader(new File("notfound"), null)); + } + + @Test + public void testReaderNullStream() { + assertThrows(NullPointerException.class, () -> PalDB.createReader((InputStream) null, PalDB.newConfiguration())); + } + + @Test + public void testReaderNullConfigForStream() { + assertThrows(NullPointerException.class, () -> { + PalDB.createReader(new InputStream() { + @Override + public int read() { + return 0; + } + }, null); + }); + } + + @Test + public void testWriterNullFile() { + assertThrows(NullPointerException.class, () -> PalDB.createWriter((File) null, PalDB.newConfiguration())); + } + + @Test + public void testWriterNullConfig() { + assertThrows(NullPointerException.class, () -> PalDB.createWriter(new File("notfound"), null)); + } + + @Test + public void testWriterNullStream() { + assertThrows(NullPointerException.class, () -> PalDB.createWriter((OutputStream) null, PalDB.newConfiguration())); + } + + @Test + public void testWriterNullConfigForStream() { + assertThrows(NullPointerException.class, () -> PalDB.createWriter(new OutputStream() { + @Override + public void write(int i) { + + } + }, null)); + } + + @Test + public void testInvalidSegmentSize() { + StoreWriter writer = PalDB.createWriter(storeFile); + writer.close(); + + Configuration config = new Configuration(); + config.set(Configuration.MMAP_SEGMENT_SIZE, String.valueOf(1 + (long) Integer.MAX_VALUE)); + assertThrows(IllegalArgumentException.class, () -> PalDB.createReader(storeFile, config)); + } + + @Test + public void testByteMarkEmpty() throws IOException { + try (FileOutputStream fos = new FileOutputStream(storeFile)) { + fos.write(12345); + fos.write(FormatVersion.getPrefixBytes()[0]); + fos.write(3456); + StoreWriter writer = PalDB.createWriter(fos, new Configuration()); + writer.close(); + } + + try (StoreReader reader = PalDB.createReader(storeFile, new Configuration())) { + assertEquals(reader.size(), 0); + assertNull(reader.get(1, null)); + } + } + + @Test + public void testOneKey() { + try (StoreWriter writer = PalDB.createWriter(storeFile, new Configuration())) { + writer.put(1, "foo"); + } + + try (StoreReader reader = PalDB.createReader(storeFile, new Configuration())) { + assertEquals(reader.size(), 1); + assertEquals(reader.get(1), "foo"); + } + } + + @Test + public void testPutSerializedKey() throws IOException { + StorageSerialization storageSerialization = new StorageSerialization(new Configuration()); + byte[] serializedKey = storageSerialization.serializeKey(1); + byte[] serializedValue = storageSerialization.serializeValue("foo"); + + try (StoreWriter writer = PalDB.createWriter(storeFile, new Configuration())) { + writer.put(serializedKey, serializedValue); + } + + try (StoreReader reader = PalDB.createReader(storeFile, new Configuration())) { + assertEquals(reader.size(), 1); + assertEquals(reader.get(1), "foo"); + } + } + + @Test + public void testByteMarkOneKey() throws IOException { + try (FileOutputStream fos = new FileOutputStream(storeFile); + StoreWriter writer = PalDB.createWriter(fos, new Configuration())) { + fos.write(12345); + fos.write(FormatVersion.getPrefixBytes()[0]); + fos.write(3456); + + writer.put(1, "foo"); + } + + try (StoreReader reader = PalDB.createReader(storeFile, new Configuration())) { + assertEquals(reader.size(), 1); + assertEquals(reader.get(1), "foo"); + } + } + + @Test + public void testTwoFirstKeyLength() { + Integer key1 = 1; + Integer key2 = 245; + + //Test key length + testKeyLength(key1, 1); + testKeyLength(key2, 2); + + //Write + writeStore(storeFile, new Integer[]{key1, key2}, new Integer[]{1, 6}); + + //Read + try (StoreReader reader = PalDB.createReader(storeFile, new Configuration())) { + assertEquals(reader.get(key1).intValue(), 1); + assertEquals(reader.get(key2).intValue(), 6); + assertNull(reader.get(0, null)); + assertNull(reader.get(6, null)); + assertNull(reader.get(244, null)); + assertNull(reader.get(246, null)); + assertNull(reader.get(1245, null)); + } + } + + @Test + public void testKeyLengthGap() { + Integer key1 = 1; + Integer key2 = 2450; + + //Test key length + testKeyLength(key1, 1); + testKeyLength(key2, 3); + + //Write + writeStore(storeFile, new Integer[]{key1, key2}, new Integer[]{1, 6}); + + //Read + try (StoreReader reader = PalDB.createReader(storeFile, new Configuration())) { + assertEquals(reader.get(key1).intValue(), 1); + assertEquals(reader.get(key2).intValue(), 6); + assertNull(reader.get(0, null)); + assertNull(reader.get(6, null)); + assertNull(reader.get(244, null)); + assertNull(reader.get(267, null)); + assertNull(reader.get(2449, null)); + assertNull(reader.get(2451, null)); + assertNull(reader.get(2454441, null)); + } + } + + @Test + public void testKeyLengthStartTwo() { + Integer key1 = 245; + Integer key2 = 2450; + + //Test key length + testKeyLength(key1, 2); + testKeyLength(key2, 3); + + //Write + writeStore(storeFile, new Integer[]{key1, key2}, new Integer[]{1, 6}); + + //Read + try (StoreReader reader = PalDB.createReader(storeFile, new Configuration())) { + assertEquals(reader.get(key1).intValue(), 1); + assertEquals(reader.get(key2).intValue(), 6); + assertNull(reader.get(6, null)); + assertNull(reader.get(244, null)); + assertNull(reader.get(267, null)); + assertNull(reader.get(2449, null)); + assertNull(reader.get(2451, null)); + assertNull(reader.get(2454441, null)); + } + } + + @Test + public void testDuplicateKeys() { + StoreWriter writer = PalDB.createWriter(storeFile, new Configuration()); + writer.put(0, "ABC"); + writer.put(0, "DGE"); + assertThrows(DuplicateKeyException.class, writer::close); + } + + @Test + public void testDataOnTwoBuffers() throws IOException { + Object[] keys = new Object[]{1, 2, 3}; + Object[] values = new Object[]{GenerateTestData.generateStringData(100), GenerateTestData + .generateStringData(10000), GenerateTestData.generateStringData(100)}; + + StorageSerialization serialization = new StorageSerialization(new Configuration()); + int byteSize = serialization.serialize(values[0]).length + serialization.serialize(values[1]).length; + + //Write + writeStore(storeFile, keys, values); + + //Read + Configuration configuration = new Configuration(); + configuration.set(Configuration.MMAP_SEGMENT_SIZE, String.valueOf(byteSize - 100)); + try (StoreReader reader = PalDB.createReader(storeFile, configuration)) { + for (int i = 0; i < keys.length; i++) { + assertEquals(reader.get((Integer) keys[i], null), values[i]); + } + } + } + + @Test + public void testDataSizeOnTwoBuffers() throws IOException { + Integer[] keys = new Integer[]{1, 2, 3}; + String[] values = new String[]{GenerateTestData.generateStringData(100), GenerateTestData + .generateStringData(10000), GenerateTestData.generateStringData(100)}; + + StorageSerialization serialization = new StorageSerialization(new Configuration()); + byte[] b1 = serialization.serialize(values[0]); + byte[] b2 = serialization.serialize(values[1]); + int byteSize = b1.length + b2.length; + int sizeSize = + LongPacker.packInt(new DataInputOutput(), b1.length) + LongPacker.packInt(new DataInputOutput(), b2.length); + + //Write + writeStore(storeFile, keys, values); + + //Read + Configuration configuration = new Configuration(); + configuration.set(Configuration.MMAP_SEGMENT_SIZE, String.valueOf(byteSize + sizeSize + 3)); + try (StoreReader reader = PalDB.createReader(storeFile, configuration)) { + for (int i = 0; i < keys.length; i++) { + assertEquals(reader.get(keys[i], null), values[i]); + } + } + } + + @Test + public void testReadStringToString() { + testReadKeyToString(GenerateTestData.generateStringKeys(100)); + } + + @Test + public void testReadIntToString() { + testReadKeyToString(GenerateTestData.generateIntKeys(100)); + } + + @Test + public void testReadDoubleToString() { + testReadKeyToString(GenerateTestData.generateDoubleKeys(100)); + } + + @Test + public void testReadLongToString() { + testReadKeyToString(GenerateTestData.generateLongKeys(100)); + } + + @Test + public void testReadStringToInt() { + testReadKeyToInt(GenerateTestData.generateStringKeys(100)); + } + + @Test + public void testReadByteToInt() { + testReadKeyToInt(GenerateTestData.generateByteKeys(100)); + } + + @Test + public void testReadIntToInt() { + testReadKeyToInt(GenerateTestData.generateIntKeys(100)); + } + + @Test + public void testReadIntToIntArray() { + testReadKeyToIntArray(GenerateTestData.generateIntKeys(100)); + } + + @Test + public void testReadCompoundToString() { + testReadKeyToString(GenerateTestData.generateCompoundKeys(100)); + } + + @Test + public void testReadCompoundByteToString() { + testReadKeyToString(new Object[]{GenerateTestData.generateCompoundByteKey()}); + } + + @Test + public void testReadIntToNull() { + testReadKeyToNull(GenerateTestData.generateIntKeys(100)); + } + + @Test + public void testReadDisk() { + Integer[] keys = GenerateTestData.generateIntKeys(10000); + Configuration configuration = new Configuration(); + + //Write + String[] values = GenerateTestData.generateStringData(keys.length, 1000); + try (StoreWriter writer = PalDB.createWriter(storeFile, configuration)) { + writer.putAll(keys, values); + } + + //Read + configuration.set(Configuration.MMAP_DATA_ENABLED, "false"); + try (StoreReader reader = PalDB.createReader(storeFile, configuration)) { + assertEquals(reader.size(), keys.length); + + for (int i = 0; i < keys.length; i++) { + Integer key = keys[i]; + String val = reader.get(key, null); + assertNotNull(val); + assertEquals(val, values[i]); + } + } + } + + @Test + public void testIterate() { + Integer[] keys = GenerateTestData.generateIntKeys(100); + String[] values = GenerateTestData.generateStringData(keys.length, 12); + + //Write + writeStore(storeFile, keys, values); + + //Sets + Set keysSet = new HashSet<>(Arrays.asList(keys)); + Set valuesSet = new HashSet<>(Arrays.asList(values)); + + //Read + try (StoreReader reader = PalDB.createReader(storeFile, new Configuration())) { + var itr = reader.iterable().iterator(); + for (int i = 0; i < keys.length; i++) { + assertTrue(itr.hasNext()); + var entry = itr.next(); + assertNotNull(entry); + assertTrue(keysSet.remove(entry.getKey())); + assertTrue(valuesSet.remove(entry.getValue())); + + Object valSearch = reader.get(entry.getKey(), null); + assertNotNull(valSearch); + assertEquals(valSearch, entry.getValue()); + } + assertFalse(itr.hasNext()); + } + + assertTrue(keysSet.isEmpty()); + assertTrue(valuesSet.isEmpty()); + } + + // UTILITY + + private void testReadKeyToString(K[] keys) { + // Write + String[] values = GenerateTestData.generateStringData(keys.length, 10); + try (StoreWriter writer = PalDB.createWriter(storeFile, new Configuration())) { + writer.putAll(keys, values); + } + // Read + try (StoreReader reader = PalDB.createReader(storeFile, new Configuration())) { + assertEquals(reader.size(), keys.length); + + for (int i = 0; i < keys.length; i++) { + K key = keys[i]; + String val = reader.get(key, null); + assertNotNull(val); + assertEquals(val, values[i]); + } + } + } + + private void testReadKeyToInt(K[] keys) { + // Write + Integer[] values = GenerateTestData.generateIntData(keys.length); + try (StoreWriter writer = PalDB.createWriter(storeFile, new Configuration())) { + writer.putAll(keys, values); + } + + try (StoreReader reader = PalDB.createReader(storeFile, new Configuration())) { + assertEquals(reader.size(), keys.length); + + for (int i = 0; i < keys.length; i++) { + K key = keys[i]; + Object val = reader.get(key, 0); + assertNotNull(val); + assertEquals(val, values[i]); + } + } + } + + private void testReadKeyToNull(K[] keys) { + //Write + try (StoreWriter writer = PalDB.createWriter(storeFile, new Configuration())) { + Object[] values = new Object[keys.length]; + writer.putAll(keys, values); + } + + try (StoreReader reader = PalDB.createReader(storeFile, new Configuration())) { + assertEquals(reader.size(), keys.length); + + for (K key : keys) { + Object val = reader.get(key, null); + assertNull(val); + } + + for (K key : keys) { + Object val = reader.get(key, null); + assertNull(val); + } + } + } + + private void testReadKeyToIntArray(K[] keys) { + //Write + int[][] values = GenerateTestData.generateIntArrayData(keys.length, 100); + try (StoreWriter writer = PalDB.createWriter(storeFile, new Configuration())) { + writer.putAll(keys, values); + } + + //Read + try (StoreReader reader = PalDB.createReader(storeFile, new Configuration())) { + assertEquals(reader.size(), keys.length); + + for (int i = 0; i < keys.length; i++) { + K key = keys[i]; + int[] val = reader.get(key, null); + assertNotNull(val); + assertArrayEquals(val, values[i]); + } + } + } + + private void writeStore(File location, K[] keys, V[] values) { + try (StoreWriter writer = PalDB.createWriter(location, new Configuration())) { + writer.putAll(keys, values); + } + } + + private void testKeyLength(Object key, int expectedLength) { + StorageSerialization serializationImpl = new StorageSerialization(new Configuration()); + int keyLength = 0; + try { + keyLength = serializationImpl.serializeKey(key).length; + } catch (IOException e) { + throw new RuntimeException(e); + } + assertEquals(keyLength, expectedLength); + } +} diff --git a/src/test/java/com/linkedin/paldb/impl/TestStoreReader.java b/src/test/java/com/linkedin/paldb/impl/TestStoreReader.java new file mode 100644 index 0000000..363a5c4 --- /dev/null +++ b/src/test/java/com/linkedin/paldb/impl/TestStoreReader.java @@ -0,0 +1,502 @@ +/* +* Copyright 2015 LinkedIn Corp. All rights reserved. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +*/ + +package com.linkedin.paldb.impl; + +import com.linkedin.paldb.api.*; +import org.junit.jupiter.api.*; + +import java.awt.*; +import java.io.*; +import java.nio.file.*; +import java.util.List; +import java.util.*; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.atomic.AtomicBoolean; + +import static com.linkedin.paldb.utils.TestTempUtils.deleteDirectory; +import static org.junit.jupiter.api.Assertions.*; + +public class TestStoreReader { + + private Path tempDir; + private File storeFile; + + @BeforeEach + public void setUp() throws IOException { + tempDir = Files.createTempDirectory("tmp"); + storeFile = Files.createTempFile(tempDir, "paldb", ".dat").toFile(); + } + + @AfterEach + public void cleanUp() { + deleteDirectory(tempDir.toFile()); + } + + @SafeVarargs + private StoreReader readerForMany(V... values) { + var configuration = new Configuration(); + configuration.registerSerializer(new PointSerializer()); + try (StoreWriter writer = PalDB.createWriter(storeFile, configuration)) { + for (int i = 0; i < values.length; i++) { + writer.put(i, values[i]); + } + } + return PalDB.createReader(storeFile, configuration); + } + + private StoreReader readerFor(V value) { + return readerForMany(value); + } + + @Test + public void testFile() { + try (var reader = readerFor(true)) { + assertEquals(reader.getFile(), storeFile); + } + } + + @Test + public void testSize() { + try (var reader = readerFor(true)) { + assertEquals(reader.size(), 1); + } + } + + @Test + public void testStoreClosed() { + var reader = readerFor(true); + reader.close(); + assertThrows(IllegalStateException.class, () -> reader.get(0)); + } + + @Test + public void testGetBoolean() { + try (var reader = readerFor(true)) { + assertTrue(reader.get(0)); + assertTrue(reader.get(0, false)); + assertFalse(reader.get(-1, false)); + } + } + + @Test + public void testGetBooleanMissing() { + try (var reader = readerFor(true)) { + assertNull(reader.get(-1)); + } + } + + @Test + public void testGetByte() { + try (var reader = readerFor((byte)1)) { + assertEquals(reader.get(0).byteValue(), (byte) 1); + assertEquals(reader.get(0, (byte) 5).byteValue(), (byte) 1); + assertEquals(reader.get(-1, (byte) 5).byteValue(), (byte) 5); + } + } + + @Test + public void testGetByteMissing() { + try (var reader = readerFor((byte)1)) { + assertNull(reader.get(-1)); + } + } + + @Test + public void testGetChar() { + try (var reader = readerFor('a')) { + assertEquals(reader.get(0).charValue(), 'a'); + assertEquals(reader.get(0, 'b').charValue(), 'a'); + assertEquals(reader.get(-1, 'b').charValue(), 'b'); + } + } + + @Test + public void testGetCharMissing() { + try (var reader = readerFor('a')) { + assertNull(reader.get(-1)); + } + } + + @Test + public void testGetDouble() { + try (var reader = readerFor(1.0)) { + assertEquals(reader.get(0).doubleValue(), 1.0); + assertEquals(reader.get(0, 2.0).doubleValue(), 1.0); + assertEquals(reader.get(-1, 2.0).doubleValue(), 2.0); + } + } + + @Test + public void testGetDoubleMissing() { + try (var reader = readerFor(1.0)) { + assertNull(reader.get(-1)); + } + } + + @Test + public void testGetFloat() { + try (var reader = readerFor(1f)) { + assertEquals(reader.get(0).floatValue(), 1f); + assertEquals(reader.get(0, 2f).floatValue(), 1f); + assertEquals(reader.get(-1, 2f).floatValue(), 2f); + } + } + + @Test + public void testGetFloatMissing() { + try (var reader = readerFor(1.0)) { + assertNull(reader.get(-1)); + } + } + + @Test + public void testGetShort() { + try (var reader = readerFor((short) 1)) { + assertEquals(reader.get(0).shortValue(), (short) 1); + assertEquals(reader.get(0, (short) 2).shortValue(), (short) 1); + assertEquals(reader.get(-1, (short) 2).shortValue(), (short) 2); + } + } + + @Test + public void testGetShortMissing() { + try (var reader = readerFor((short) 1)) { + assertNull(reader.get(-1)); + } + } + + @Test + public void testGetInt() { + try (var reader = readerFor(1)) { + assertEquals(reader.get(0).intValue(), 1); + assertEquals(reader.get(0, 2).intValue(), 1); + assertEquals(reader.get(-1, 2).intValue(), 2); + } + } + + @Test + public void testGetIntMissing() { + try (var reader = readerFor(1)) { + assertNull(reader.get(-1)); + } + } + + @Test + public void testGetLong() { + try (var reader = readerFor(1L)) { + assertEquals(reader.get(0).longValue(), 1L); + assertEquals(reader.get(0, 2L).longValue(), 1L); + assertEquals(reader.get(-1, 2L).longValue(), 2L); + } + } + + @Test + public void testGetLongMissing() { + try (var reader = readerFor(1L)) { + assertNull(reader.get(-1)); + } + } + + @Test + public void testGetString() { + try (var reader = readerFor("foo")) { + assertEquals(reader.get(0), "foo"); + assertEquals(reader.get(0, "bar"), "foo"); + assertEquals(reader.get(-1, "bar"), "bar"); + } + } + + @Test + public void testGetStringMissing() { + try (var reader = readerFor("foo")) { + assertNull(reader.get(-1)); + } + } + + @Test + public void testGetBooleanArray() { + try (var reader = readerFor(new boolean[]{true})) { + assertArrayEquals(reader.get(0), new boolean[]{true}); + assertArrayEquals(reader.get(0, new boolean[]{false}), new boolean[]{true}); + assertArrayEquals(reader.get(-1, new boolean[]{false}), new boolean[]{false}); + } + } + + @Test + public void testGetBooleanArrayMissing() { + try (var reader = readerFor(new boolean[]{true})) { + assertNull(reader.get(-1)); + } + } + + @Test + public void testGetByteArray() { + try (var reader = readerFor(new byte[]{1})) { + assertArrayEquals(reader.get(0), new byte[]{1}); + assertArrayEquals(reader.get(0, new byte[]{2}), new byte[]{1}); + assertArrayEquals(reader.get(-1, new byte[]{2}), new byte[]{2}); + } + } + + @Test + public void testGetByteArrayMissing() { + try (var reader = readerFor(new byte[]{1})) { + assertNull(reader.get(-1)); + } + } + + @Test + public void testGetCharArray() { + try (var reader = readerFor(new char[]{'a'})) { + assertArrayEquals(reader.get(0), new char[]{'a'}); + assertArrayEquals(reader.get(0, new char[]{'b'}), new char[]{'a'}); + assertArrayEquals(reader.get(-1, new char[]{'b'}), new char[]{'b'}); + } + } + + @Test + public void testGetCharArrayMissing() { + try (var reader = readerFor(new char[]{'a'})) { + assertNull(reader.get(-1)); + } + } + + @Test + public void testGetDoubleArray() { + try (var reader = readerFor(new double[]{1.0})) { + assertArrayEquals(reader.get(0), new double[]{1.0}); + assertArrayEquals(reader.get(0, new double[]{2.0}), new double[]{1.0}); + assertArrayEquals(reader.get(-1, new double[]{2.0}), new double[]{2.0}); + } + } + + @Test + public void testGetDoubleArrayMissing() { + try (var reader = readerFor(new double[]{1.0})) { + assertNull(reader.get(-1)); + } + } + + @Test + public void testGetFloatArray() { + try (var reader = readerFor(new float[]{1f})) { + assertArrayEquals(reader.get(0), new float[]{1f}); + assertArrayEquals(reader.get(0, new float[]{2f}), new float[]{1f}); + assertArrayEquals(reader.get(-1, new float[]{2f}), new float[]{2f}); + } + } + + @Test + public void testGetFloatArrayMissing() { + try (var reader = readerFor(new float[]{1f})) { + assertNull(reader.get(-1)); + } + } + + @Test + public void testGetShortArray() { + try (var reader = readerFor(new short[]{1})) { + assertArrayEquals(reader.get(0), new short[]{1}); + assertArrayEquals(reader.get(0, new short[]{2}), new short[]{1}); + assertArrayEquals(reader.get(-1, new short[]{2}), new short[]{2}); + } + } + + @Test + public void testGetShortArrayMissing() { + try (var reader = readerFor(new short[]{1})) { + assertNull(reader.get(-1)); + } + } + + @Test + public void testGetIntArray() { + try (var reader = readerFor(new int[]{1})) { + assertArrayEquals(reader.get(0), new int[]{1}); + assertArrayEquals(reader.get(0, new int[]{2}), new int[]{1}); + assertArrayEquals(reader.get(-1, new int[]{2}), new int[]{2}); + } + } + + @Test + public void testGetIntArrayMissing() { + try (var reader = readerFor(new int[]{1})) { + assertNull(reader.get(-1)); + } + } + + @Test + public void testGetLongArray() { + try (var reader = readerFor(new long[]{1L})) { + assertArrayEquals(reader.get(0), new long[]{1L}); + assertArrayEquals(reader.get(0, new long[]{2L}), new long[]{1L}); + assertArrayEquals(reader.get(-1, new long[]{2L}), new long[]{2L}); + } + } + + @Test + public void testGetLongArrayMissing() { + try (var reader = readerFor(new long[]{1L})) { + assertNull(reader.get(-1)); + } + } + + @Test + public void testGetStringArray() { + try (var reader = readerFor(new String[]{"foo"})) { + assertArrayEquals(reader.get(0), new String[]{"foo"}); + assertArrayEquals(reader.get(0, new String[]{"bar"}), new String[]{"foo"}); + assertArrayEquals(reader.get(-1, new String[]{"bar"}), new String[]{"bar"}); + } + } + + @Test + public void testGetStringArrayMissing() { + try (var reader = readerFor(new String[]{"foo"})) { + assertNull(reader.get(-1)); + } + } + + @Test + public void testGetMissing() { + try (var reader = readerFor("foo")) { + assertNull(reader.get(-1)); + } + } + + @Test + public void testGetArray() { + try (var reader = readerFor(new Object[]{"foo"})) { + assertArrayEquals(reader.get(0), new Object[]{"foo"}); + assertArrayEquals(reader.get(0, new Object[]{"bar"}), new Object[]{"foo"}); + assertArrayEquals(reader.get(-1, new Object[]{"bar"}), new Object[]{"bar"}); + } + } + + @Test + public void testGetArrayMissing() { + try (var reader = readerFor(new Object[]{"foo"})) { + assertNull(reader.get(-1)); + } + } + + @Test + public void testGetPoint() { + try (var reader = readerFor(new Point(4, 56))) { + assertEquals(reader.get(0), new Point(4, 56)); + } + } + + @Test + public void testIterator() { + var values = List.of("foo", "bar"); + try (var reader = readerForMany(values.get(0), values.get(1))) { + var iter = reader.iterable(); + assertNotNull(iter); + var itr = iter.iterator(); + assertNotNull(itr); + + for (int i = 0; i < values.size(); i++) { + assertTrue(itr.hasNext()); + var v = itr.next(); + assertEquals(v.getValue(), values.get(v.getKey())); + } + } + } + + @Test + public void testIterate() { + var values = List.of("foo", "bar"); + try (var reader = readerForMany(values.get(0), values.get(1))) { + for (var entry: reader) { + var val = values.get(entry.getKey()); + assertEquals(entry.getValue(), val); + } + } + } + + @Test + public void testKeyIterator() { + var values = List.of("foo", "bar"); + try (var reader = readerForMany(values.get(0), values.get(1))) { + var iter = reader.keys(); + assertNotNull(iter); + var itr = iter.iterator(); + assertNotNull(itr); + + Set actual = new HashSet<>(); + Set expected = new HashSet<>(); + for (int i = 0; i < values.size(); i++) { + assertTrue(itr.hasNext()); + Integer k = itr.next(); + actual.add(k); + expected.add(i); + } + assertEquals(actual, expected); + } + } + + @Test + public void testMultiThreadRead() throws InterruptedException { + int threadCount = 50; + final CountDownLatch latch = new CountDownLatch(threadCount); + final AtomicBoolean success = new AtomicBoolean(true); + var values = List.of("foobar", "any", "any value"); + try (var reader = readerForMany(values.get(0), values.get(1))) { + for(int i = 0; i < threadCount; i++) { + new Thread(() -> { + try { + for(int c = 0; c < 100000; c++) { + if(!success.get())break; + assertEquals(reader.get(1), "any"); + assertEquals(reader.get(0), "foobar"); + } + } catch (Throwable error){ + error.printStackTrace(); + success.set(false); + } finally { + latch.countDown(); + } + }).start(); + } + latch.await(); + assertTrue(success.get()); + } + } + + // UTILITY + + public static class PointSerializer implements Serializer { + + @Override + public Point read(DataInput input) + throws IOException { + return new Point(input.readInt(), input.readInt()); + } + + @Override + public Class serializedClass() { + return Point.class; + } + + @Override + public void write(DataOutput output, Point input) + throws IOException { + output.writeInt(input.x); + output.writeInt(input.y); + } + + } +} diff --git a/paldb/src/perfTest/java/com/linkedin/paldb/TestMemoryUsageHashMap.java b/src/test/java/com/linkedin/paldb/performance/TestMemoryUsageHashMap.java similarity index 88% rename from paldb/src/perfTest/java/com/linkedin/paldb/TestMemoryUsageHashMap.java rename to src/test/java/com/linkedin/paldb/performance/TestMemoryUsageHashMap.java index 7b5750f..cc9ba3e 100644 --- a/paldb/src/perfTest/java/com/linkedin/paldb/TestMemoryUsageHashMap.java +++ b/src/test/java/com/linkedin/paldb/performance/TestMemoryUsageHashMap.java @@ -12,15 +12,15 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */ -package com.linkedin.paldb; +package com.linkedin.paldb.performance; -import com.linkedin.paldb.utils.NanoBench; -import java.util.HashSet; -import java.util.Random; -import java.util.Set; -import org.testng.annotations.Test; +import com.linkedin.paldb.performance.utils.NanoBench; +import org.junit.jupiter.api.*; +import java.util.*; +@Disabled +@Tag("performance") public class TestMemoryUsageHashMap { private Set ref; diff --git a/src/test/java/com/linkedin/paldb/performance/TestReadThroughput.java b/src/test/java/com/linkedin/paldb/performance/TestReadThroughput.java new file mode 100644 index 0000000..8610fb8 --- /dev/null +++ b/src/test/java/com/linkedin/paldb/performance/TestReadThroughput.java @@ -0,0 +1,222 @@ +/* +* Copyright 2015 LinkedIn Corp. All rights reserved. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +*/ + +package com.linkedin.paldb.performance; + +import com.linkedin.paldb.api.*; +import com.linkedin.paldb.impl.GenerateTestData; +import com.linkedin.paldb.performance.utils.*; +import org.apache.commons.lang.RandomStringUtils; +import org.junit.jupiter.api.*; + +import java.io.*; +import java.nio.file.Files; +import java.util.*; +import java.util.concurrent.ForkJoinPool; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.stream.IntStream; + +@Disabled +@Tag("performance") +public class TestReadThroughput { + + private File testFolder = createTempDir(); + private static final int READS = 500000; + + @BeforeEach + public void setUp() { + DirectoryUtils.deleteDirectory(testFolder); + testFolder.mkdir(); + } + + @AfterEach + public void cleanUp() { + DirectoryUtils.deleteDirectory(testFolder); + } + + private static File createTempDir() { + try { + return Files.createTempDirectory("testreadthroughput").toFile(); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } + + @Test + public void testReadThroughput() { + + List measures = new ArrayList<>(); + int max = 10000000; + for (int i = 100; i <= max; i *= 10) { + Measure m = measure(i, 0, 0, false, 1); + + measures.add(m); + } + + report("READ THROUGHPUT (Set int -> boolean)", measures); + } + + @Test + public void testReadThroughputMultiThread() { + + List measures = new ArrayList<>(); + int max = 10000000; + for (int i = 100; i <= max; i *= 10) { + Measure m = measure(i, 0, 0, false, 4); + + measures.add(m); + } + + report("READ THROUGHPUT MULTI THREAD (Set int -> boolean)", measures); + } + + @Test + public void testReadThroughputWithCache() { + + List measures = new ArrayList<>(); + int max = 10000000; + for (int i = 100; i <= max; i *= 10) { + Measure m = measure(i, 0, 0.05, false, 1); + + measures.add(m); + } + + report("READ THROUGHPUT WITH BLOOM FILTER (Set int -> boolean)", measures); + } + + @Test + public void testReadThroughputWithCacheRandomFinds() { + + List measures = new ArrayList<>(); + int max = 10000000; + for (int i = 100; i <= max; i *= 10) { + Measure m = measure(i, 0, 0.01, true, 1); + + measures.add(m); + } + + report("READ THROUGHPUT WITH BLOOM FILTER RANDOM FINDS (Set int -> boolean)", measures); + } + + @Test + public void testReadThroughputWithCacheRandomFindsMultipleThreads() { + + List measures = new ArrayList<>(); + int max = 10000000; + for (int i = 100; i <= max; i *= 10) { + Measure m = measure(i, 0, 0.01, true, 4); + + measures.add(m); + } + + report("READ THROUGHPUT WITH BLOOM FILTER RANDOM FINDS MULTITHREADED (Set int -> boolean)", measures); + } + + // UTILITY + + private Measure measure(int keysCount, int valueLength, double errorRate, boolean randomReads, int noOfThreads) { + // Write store + File storeFile = new File(testFolder, "paldb" + keysCount + "-" + valueLength + ".store"); + // Generate keys + long seed = 4242; + final Integer[] keys = GenerateTestData.generateRandomIntKeys(keysCount, Integer.MAX_VALUE, seed); + + var configBuilder = PalDBConfigBuilder.create(); + if (errorRate > 0) { + configBuilder.withEnableBloomFilter(true) + .withBloomFilterErrorFactor(errorRate); + } + + var config = configBuilder + .build(); + + try (StoreWriter writer = PalDB.createWriter(storeFile, config)) { + for (Integer key : keys) { + if (valueLength == 0) { + writer.put(key.toString(), Boolean.TRUE.toString()); + } else { + writer.put(key.toString(), RandomStringUtils.randomAlphabetic(valueLength)); + } + } + } + + var totalCount = new AtomicInteger(0); + var findCount = new AtomicInteger(0); + try (StoreReader reader = PalDB.createReader(storeFile, config)) { + // Measure + NanoBench nanoBench = NanoBench.create(); + nanoBench.cpuOnly().warmUps(5).measurements(20).measure("Measure %d reads for %d keys with cache", () -> { + if (noOfThreads < 2) { + doWork(randomReads, keys, totalCount, findCount, reader); + } else { + var forkJoinPool = new ForkJoinPool(noOfThreads); + try { + forkJoinPool.submit(() -> IntStream.range(0, noOfThreads).parallel() + .forEach(i -> doWork(randomReads, keys, totalCount, findCount, reader)) + ).join(); + } finally { + forkJoinPool.shutdown(); + } + } + }); + + // Return measure + double rps = READS * noOfThreads * nanoBench.getTps(); + return new Measure(storeFile.length(), rps, findCount.get(), totalCount.get(), keys.length); + } + } + + private void doWork(boolean randomReads, Integer[] keys, AtomicInteger totalCount, AtomicInteger findCount, StoreReader reader) { + Random r = new Random(42); + int length = keys.length; + for (int j = 0; j < READS; j++) { + totalCount.incrementAndGet(); + int key; + if (randomReads) { + key = r.nextInt(Integer.MAX_VALUE); + } else { + key = keys[r.nextInt(length)]; + } + var value = reader.get(Integer.toString(key)); + if (value != null) { + findCount.incrementAndGet(); + } + } + } + + private void report(String title, List measures) { + System.out.println(title); + System.out.println("FILE LENGTH;\tKEYS;\tRPS;\tVALUES FOUND;\tTOTAL READS"); + for (Measure m : measures) { + System.out.println(m.fileSize + ";\t" + m.keys + ";\t" + m.rps + ";\t" + m.valueLength + ";\t" + m.cacheSize); + } + } + + // Measurement class + private static class Measure { + private long fileSize; + private double rps; + private int valueLength; + private long cacheSize; + private int keys; + + private Measure(long fileSize, double rps, int valueLength, long cacheSize, int keys) { + this.fileSize = fileSize; + this.rps = rps; + this.valueLength = valueLength; + this.cacheSize = cacheSize; + this.keys = keys; + } + } +} diff --git a/paldb/src/perfTest/java/com/linkedin/paldb/TestReadThroughputLevelDB.java b/src/test/java/com/linkedin/paldb/performance/TestReadThroughputLevelDB.java similarity index 87% rename from paldb/src/perfTest/java/com/linkedin/paldb/TestReadThroughputLevelDB.java rename to src/test/java/com/linkedin/paldb/performance/TestReadThroughputLevelDB.java index 22265ff..3c0dda8 100644 --- a/paldb/src/perfTest/java/com/linkedin/paldb/TestReadThroughputLevelDB.java +++ b/src/test/java/com/linkedin/paldb/performance/TestReadThroughputLevelDB.java @@ -12,40 +12,33 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */ -package com.linkedin.paldb; +package com.linkedin.paldb.performance; import com.linkedin.paldb.impl.GenerateTestData; -import com.linkedin.paldb.utils.DirectoryUtils; -import com.linkedin.paldb.utils.NanoBench; -import java.io.File; -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.Random; +import com.linkedin.paldb.performance.utils.*; import org.apache.commons.lang.RandomStringUtils; -import org.iq80.leveldb.CompressionType; -import org.iq80.leveldb.DB; -import org.iq80.leveldb.Options; -import org.testng.annotations.AfterMethod; -import org.testng.annotations.BeforeMethod; -import org.testng.annotations.Test; +import org.iq80.leveldb.*; +import org.junit.jupiter.api.*; -import static org.fusesource.leveldbjni.JniDBFactory.bytes; -import static org.fusesource.leveldbjni.JniDBFactory.factory; +import java.io.*; +import java.util.*; +import static org.fusesource.leveldbjni.JniDBFactory.*; +@Disabled +@Tag("performance") public class TestReadThroughputLevelDB { private File TEST_FOLDER = new File("testreadthroughputleveldb"); private final int READS = 500000; - @BeforeMethod + @BeforeEach public void setUp() { DirectoryUtils.deleteDirectory(TEST_FOLDER); TEST_FOLDER.mkdir(); } - @AfterMethod + @AfterEach public void cleanUp() { DirectoryUtils.deleteDirectory(TEST_FOLDER); } diff --git a/paldb/src/perfTest/java/com/linkedin/paldb/TestReadThrouputHashMap.java b/src/test/java/com/linkedin/paldb/performance/TestReadThrouputHashMap.java similarity index 89% rename from paldb/src/perfTest/java/com/linkedin/paldb/TestReadThrouputHashMap.java rename to src/test/java/com/linkedin/paldb/performance/TestReadThrouputHashMap.java index b9f0e2b..615b10b 100644 --- a/paldb/src/perfTest/java/com/linkedin/paldb/TestReadThrouputHashMap.java +++ b/src/test/java/com/linkedin/paldb/performance/TestReadThrouputHashMap.java @@ -12,15 +12,15 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */ -package com.linkedin.paldb; +package com.linkedin.paldb.performance; -import com.linkedin.paldb.utils.NanoBench; -import java.util.HashSet; -import java.util.Random; -import java.util.Set; -import org.testng.annotations.Test; +import com.linkedin.paldb.performance.utils.NanoBench; +import org.junit.jupiter.api.*; +import java.util.*; +@Disabled +@Tag("performance") public class TestReadThrouputHashMap { private final int READS = 500000; diff --git a/paldb/src/perfTest/java/com/linkedin/paldb/TestReadThrouputRocksDB.java b/src/test/java/com/linkedin/paldb/performance/TestReadThrouputRocksDB.java similarity index 88% rename from paldb/src/perfTest/java/com/linkedin/paldb/TestReadThrouputRocksDB.java rename to src/test/java/com/linkedin/paldb/performance/TestReadThrouputRocksDB.java index fe9c2fc..0f01ca1 100644 --- a/paldb/src/perfTest/java/com/linkedin/paldb/TestReadThrouputRocksDB.java +++ b/src/test/java/com/linkedin/paldb/performance/TestReadThrouputRocksDB.java @@ -12,47 +12,36 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */ -package com.linkedin.paldb; +package com.linkedin.paldb.performance; import com.linkedin.paldb.impl.GenerateTestData; -import com.linkedin.paldb.utils.DirectoryUtils; -import com.linkedin.paldb.utils.NanoBench; -import java.io.File; -import java.util.ArrayList; -import java.util.List; -import java.util.Random; +import com.linkedin.paldb.performance.utils.*; import org.apache.commons.lang.RandomStringUtils; -import org.rocksdb.CompactionStyle; -import org.rocksdb.CompressionType; -import org.rocksdb.Options; -import org.rocksdb.ReadOptions; -import org.rocksdb.RocksDB; -import org.rocksdb.RocksDBException; -import org.rocksdb.BlockBasedTableConfig; -import org.rocksdb.BloomFilter; -import org.testng.annotations.AfterMethod; -import org.testng.annotations.BeforeClass; -import org.testng.annotations.BeforeMethod; -import org.testng.annotations.Test; +import org.junit.jupiter.api.*; +import org.rocksdb.*; +import java.io.File; +import java.util.*; +@Disabled +@Tag("performance") public class TestReadThrouputRocksDB { private File TEST_FOLDER = new File("testreadthroughputrocksdb"); private final int READS = 500000; - @BeforeClass + @BeforeEach public void loadLibrary() { RocksDB.loadLibrary(); } - @BeforeMethod + @BeforeEach public void setUp() { DirectoryUtils.deleteDirectory(TEST_FOLDER); TEST_FOLDER.mkdir(); } - @AfterMethod + @AfterEach public void cleanUp() { DirectoryUtils.deleteDirectory(TEST_FOLDER); } diff --git a/paldb/src/perfTest/java/com/linkedin/paldb/TestStoreSize.java b/src/test/java/com/linkedin/paldb/performance/TestStoreSize.java similarity index 81% rename from paldb/src/perfTest/java/com/linkedin/paldb/TestStoreSize.java rename to src/test/java/com/linkedin/paldb/performance/TestStoreSize.java index 38d559f..d2023d3 100644 --- a/paldb/src/perfTest/java/com/linkedin/paldb/TestStoreSize.java +++ b/src/test/java/com/linkedin/paldb/performance/TestStoreSize.java @@ -12,30 +12,27 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */ -package com.linkedin.paldb; +package com.linkedin.paldb.performance; -import com.linkedin.paldb.api.Configuration; -import com.linkedin.paldb.api.PalDB; -import com.linkedin.paldb.api.StoreWriter; +import com.linkedin.paldb.api.*; import com.linkedin.paldb.impl.GenerateTestData; -import com.linkedin.paldb.utils.DirectoryUtils; -import java.io.File; -import org.testng.annotations.AfterMethod; -import org.testng.annotations.BeforeMethod; -import org.testng.annotations.Test; - +import com.linkedin.paldb.performance.utils.DirectoryUtils; +import org.junit.jupiter.api.*; +import java.io.File; +@Disabled +@Tag("performance") public class TestStoreSize { private File TEST_FOLDER = new File("teststoresize"); - @BeforeMethod + @BeforeEach public void setUp() { DirectoryUtils.deleteDirectory(TEST_FOLDER); TEST_FOLDER.mkdir(); } - @AfterMethod + @AfterEach public void cleanUp() { DirectoryUtils.deleteDirectory(TEST_FOLDER); } diff --git a/paldb/src/perfTest/java/com/linkedin/paldb/utils/DirectoryUtils.java b/src/test/java/com/linkedin/paldb/performance/utils/DirectoryUtils.java similarity index 94% rename from paldb/src/perfTest/java/com/linkedin/paldb/utils/DirectoryUtils.java rename to src/test/java/com/linkedin/paldb/performance/utils/DirectoryUtils.java index 7617c52..0b68317 100644 --- a/paldb/src/perfTest/java/com/linkedin/paldb/utils/DirectoryUtils.java +++ b/src/test/java/com/linkedin/paldb/performance/utils/DirectoryUtils.java @@ -1,4 +1,4 @@ -package com.linkedin.paldb.utils; +package com.linkedin.paldb.performance.utils; import java.io.File; diff --git a/paldb/src/perfTest/java/com/linkedin/paldb/utils/NanoBench.java b/src/test/java/com/linkedin/paldb/performance/utils/NanoBench.java similarity index 99% rename from paldb/src/perfTest/java/com/linkedin/paldb/utils/NanoBench.java rename to src/test/java/com/linkedin/paldb/performance/utils/NanoBench.java index 5b7eaba..43b5637 100644 --- a/paldb/src/perfTest/java/com/linkedin/paldb/utils/NanoBench.java +++ b/src/test/java/com/linkedin/paldb/performance/utils/NanoBench.java @@ -1,4 +1,4 @@ -package com.linkedin.paldb.utils; +package com.linkedin.paldb.performance.utils; import java.lang.management.ManagementFactory; import java.text.DecimalFormat; diff --git a/src/test/java/com/linkedin/paldb/utils/BloomFilterTest.java b/src/test/java/com/linkedin/paldb/utils/BloomFilterTest.java new file mode 100644 index 0000000..b35b771 --- /dev/null +++ b/src/test/java/com/linkedin/paldb/utils/BloomFilterTest.java @@ -0,0 +1,165 @@ +package com.linkedin.paldb.utils; + + +import org.junit.jupiter.api.*; + +import java.lang.management.*; +import java.util.*; +import java.util.concurrent.CompletableFuture; +import java.util.stream.*; + +import static org.junit.jupiter.api.Assertions.*; + + +public class BloomFilterTest { + + private int elements = 1_000_000; + private int bitsize = 10_000_000; + private BloomFilter filter; + private Random prng; + private ThreadMXBean bean; + + @BeforeEach + void setUp() { + bean = ManagementFactory.getThreadMXBean(); + prng = new Random(); + prng.setSeed(0); + filter = new BloomFilter(elements, bitsize); + } + + @Test + void should_calculate_bit_size_correctly() { + var sut = new BloomFilter(10_000, 0.01); + + assertEquals(95851 , sut.bitSize()); + } + + @Test + void should_calculate_hash_functions_correctly() { + var sut = new BloomFilter(10_000, 0.01); + + assertEquals(7 , sut.hashFunctions()); + } + + @Test + void should_provide_hashcode() { + var sut1 = new BloomFilter(10_000, 0.01); + sut1.add("test".getBytes()); + var sut1_1 = new BloomFilter(10_000, 0.01); + sut1_1.add("test".getBytes()); + + assertEquals(sut1.hashCode(), sut1_1.hashCode()); + } + + @Test + void should_test_multi_thread_get() { + var sut1 = new BloomFilter(10_000, 0.01); + sut1.add("test".getBytes()); + + var futures = IntStream.range(0, 1000) + .mapToObj(i -> CompletableFuture.runAsync(() -> assertTrue(sut1.mightContain("test".getBytes())))) + .collect(Collectors.toList()); + + futures.forEach(CompletableFuture::join); + } + + @Test + void correctness() { + System.out.println("Testing correctness.\n"+ + "Creating a Set and filling it together with our filter..."); + filter.clear(); + Set inside = new HashSet<>((int)(elements / 0.75)); + var bytes = new byte[10]; + while(inside.size() < elements) { + var v = Math.abs(prng.nextLong()); + var pos = LongPacker.packLong(bytes, v); + var valueBytes = Arrays.copyOf(bytes, pos); + + inside.add(v); + filter.add(valueBytes); + assertTrue(filter.mightContain(valueBytes), "There should be no false negative: " + v); + } + + // testing + int found = 0, total = 0; + double rate = 0; + while (total < elements) { + var v = Math.abs(prng.nextLong()); + if (inside.contains(v)) continue; + var pos = LongPacker.packLong(bytes, v); + var valueBytes = Arrays.copyOf(bytes, pos); + total++; + found += filter.mightContain(valueBytes) ? 1 : 0; + + rate = (float) found / total; + if (total % 1000 == 0 || total == elements) { + System.out.format( + "\rElements incorrectly found to be inside: %8d/%-8d (%3.2f%%)", + found, total, 100*rate + ); + } + } + System.out.println("\n"); + + double ln2 = Math.log(2); + double expectedRate = Math.exp(-ln2*ln2 * bitsize / elements); + assertTrue(rate <= expectedRate * 1.10, "error rate p = e^(-ln2^2*m/n)"); + } + + @Test + void insertion() { + System.out.println("Testing insertion speed..."); + var bytes = new byte[10]; + filter.clear(); + long start = bean.getCurrentThreadCpuTime(); + for(int i=0; i LongPacker.packInt(dio.reset(), -42)); } @Test @@ -58,7 +59,7 @@ public void testPackLong() throws IOException { DataInputOutput dio = new DataInputOutput(); LongPacker.packLong(dio.reset(), 42l); - Assert.assertEquals(LongPacker.unpackLong(dio.reset(dio.toByteArray())), 42); + assertEquals(LongPacker.unpackLong(dio.reset(dio.toByteArray())), 42); } @Test @@ -66,7 +67,7 @@ public void testPackLongZero() throws IOException { DataInputOutput dio = new DataInputOutput(); LongPacker.packLong(dio.reset(), 0l); - Assert.assertEquals(LongPacker.unpackLong(dio.reset(dio.toByteArray())), 0l); + assertEquals(LongPacker.unpackLong(dio.reset(dio.toByteArray())), 0l); } @Test @@ -74,7 +75,7 @@ public void testPackLongBytes() throws IOException { byte[] buf = new byte[15]; LongPacker.packLong(buf, 42l); - Assert.assertEquals(LongPacker.unpackLong(buf), 42l); + assertEquals(LongPacker.unpackLong(buf), 42l); } @Test @@ -82,7 +83,7 @@ public void testPackLongMax() throws IOException { DataInputOutput dio = new DataInputOutput(); LongPacker.packLong(dio.reset(), Long.MAX_VALUE); - Assert.assertEquals(LongPacker.unpackLong(dio.reset(dio.toByteArray())), Long.MAX_VALUE); + assertEquals(LongPacker.unpackLong(dio.reset(dio.toByteArray())), Long.MAX_VALUE); } @Test @@ -90,28 +91,26 @@ public void testPackLongBytesMax() throws IOException { byte[] buf = new byte[15]; LongPacker.packLong(buf, Long.MAX_VALUE); - Assert.assertEquals(LongPacker.unpackLong(buf), Long.MAX_VALUE); + assertEquals(LongPacker.unpackLong(buf), Long.MAX_VALUE); } - @Test(expectedExceptions = IllegalArgumentException.class) + @Test public void testPackLongNeg() throws IOException { DataInputOutput dio = new DataInputOutput(); - LongPacker.packLong(dio.reset(), -42l); + assertThrows(IllegalArgumentException.class, () -> LongPacker.packLong(dio.reset(), -42L)); } - @Test(expectedExceptions = IllegalArgumentException.class) - public void testPackLongBytesNeg() - throws IOException { - LongPacker.packLong(new byte[15], -42l); + @Test + public void testPackLongBytesNeg() { + assertThrows(IllegalArgumentException.class, () -> LongPacker.packLong(new byte[15], -42L)); } @Test - public void test() - throws IOException { + public void test() throws IOException { DataInputOutput dio = new DataInputOutput(); LongPacker.packInt(dio.reset(), 5); ByteBuffer bb = ByteBuffer.wrap(dio.getBuf()); - Assert.assertEquals(LongPacker.unpackInt(bb), 5); + assertEquals(LongPacker.unpackInt(bb), 5); } } diff --git a/paldb/src/test/java/com/linkedin/paldb/utils/TestHashUtils.java b/src/test/java/com/linkedin/paldb/utils/TestMurmur3.java similarity index 57% rename from paldb/src/test/java/com/linkedin/paldb/utils/TestHashUtils.java rename to src/test/java/com/linkedin/paldb/utils/TestMurmur3.java index c881c0e..25faa75 100644 --- a/paldb/src/test/java/com/linkedin/paldb/utils/TestHashUtils.java +++ b/src/test/java/com/linkedin/paldb/utils/TestMurmur3.java @@ -14,20 +14,26 @@ package com.linkedin.paldb.utils; -import org.testng.Assert; -import org.testng.annotations.Test; +import org.apache.commons.codec.digest.MurmurHash3; +import org.junit.jupiter.api.Test; +import static org.junit.jupiter.api.Assertions.*; -public class TestHashUtils { - HashUtils hashUtils = new HashUtils(); +public class TestMurmur3 { @Test public void testHashEquals() { - Assert.assertEquals(hashUtils.hash("foo".getBytes()), hashUtils.hash("foo".getBytes())); + assertEquals(Murmur3.hash("foo".getBytes()), Murmur3.hash("foo".getBytes())); } @Test public void testEmpty() { - Assert.assertTrue(hashUtils.hash(new byte[0]) > 0); + assertTrue(Murmur3.hash(new byte[0]) > 0); + } + + @Test + public void testSameHash() { + var bytes = "foo".getBytes(); + assertEquals(Murmur3.hash(bytes, 42), MurmurHash3.hash32(bytes, bytes.length, 42)); } } diff --git a/paldb/src/test/java/com/linkedin/paldb/utils/TestTempUtils.java b/src/test/java/com/linkedin/paldb/utils/TestTempUtils.java similarity index 60% rename from paldb/src/test/java/com/linkedin/paldb/utils/TestTempUtils.java rename to src/test/java/com/linkedin/paldb/utils/TestTempUtils.java index 5e7fb64..53480cd 100644 --- a/paldb/src/test/java/com/linkedin/paldb/utils/TestTempUtils.java +++ b/src/test/java/com/linkedin/paldb/utils/TestTempUtils.java @@ -14,23 +14,33 @@ package com.linkedin.paldb.utils; -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import org.testng.Assert; -import org.testng.annotations.Test; +import org.junit.jupiter.api.Test; + +import java.io.*; + +import static org.junit.jupiter.api.Assertions.*; public class TestTempUtils { + public static boolean deleteDirectory(File directoryToBeDeleted) { + if (directoryToBeDeleted.isDirectory()) { + File[] allContents = directoryToBeDeleted.listFiles(); + if (allContents != null) { + for (File file : allContents) { + deleteDirectory(file); + } + } + } + return directoryToBeDeleted.delete(); + } + @Test public void testTempDir() { File file = TempUtils.createTempDir("foo"); - Assert.assertTrue(file.exists()); - Assert.assertTrue(file.isDirectory()); - Assert.assertTrue(file.getName().contains("foo")); + assertTrue(file.exists()); + assertTrue(file.isDirectory()); + assertTrue(file.getName().contains("foo")); file.delete(); } @@ -39,10 +49,10 @@ public void testCopyIntoTempFile() throws IOException { ByteArrayInputStream bis = new ByteArrayInputStream("foo".getBytes()); File file = TempUtils.copyIntoTempFile("bar", bis); - Assert.assertTrue(file.exists()); - Assert.assertTrue(file.isFile()); - Assert.assertTrue(file.getName().contains("bar")); - Assert.assertEquals(bis.available(), 0); + assertTrue(file.exists()); + assertTrue(file.isFile()); + assertTrue(file.getName().contains("bar")); + assertEquals(bis.available(), 0); ByteArrayOutputStream bos = new ByteArrayOutputStream(); FileInputStream fis = new FileInputStream(file); @@ -53,7 +63,7 @@ public void testCopyIntoTempFile() } fis.close(); bos.close(); - Assert.assertEquals(bos.toByteArray(), "foo".getBytes()); + assertArrayEquals(bos.toByteArray(), "foo".getBytes()); } } diff --git a/src/test/resources/logback-test.xml b/src/test/resources/logback-test.xml new file mode 100644 index 0000000..cbb400f --- /dev/null +++ b/src/test/resources/logback-test.xml @@ -0,0 +1,15 @@ + + + + + + [%d{yyyy-MM-dd HH:mm:ss.SSS}] %-5level [%thread] %class{1}.%M:%L - %msg %n + + + + + + + + + \ No newline at end of file