Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support UTF-8 in RPSL Objects #1576

Draft
wants to merge 34 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
ed7ccfa
Parse UTF-8 characters in RPSL objects.
eshryane Aug 23, 2013
46b785a
Switch to UTF-8 within the application (including the database).
eshryane Aug 23, 2013
bbe1b42
Encode test client query response as UTF-8.
eshryane Aug 26, 2013
3947248
Export database with UTF-8 encoding.
eshryane Aug 26, 2013
8e7580b
Merge branch 'master' into utf8
eshryane Aug 26, 2013
780f695
Tested that single-byte latin1 characters > ASCII in the object BLOB …
eshryane Sep 9, 2013
438f25f
Merge branch 'master' into utf8
eshryane Dec 12, 2014
6eb4812
Fixed UTF8 tests (no conversion to latin1 on this branch).
eshryane Dec 17, 2014
9bae706
Fixed UTF8 in test
eshryane Dec 17, 2014
9e8c5b9
Merge branch 'master' into utf8
eshryane Feb 19, 2015
b970151
Merge branch 'master' into utf8
eshryane Oct 29, 2015
8b22bc4
Merge branch 'master' into utf8
eshryane Nov 16, 2015
90aea06
Merge branch 'master' into utf8
eshryane May 16, 2017
a4146e6
Merge branch 'master' into utf8
eshryane May 16, 2017
7f0ea25
Merge branch 'master' into utf8
eshryane Jul 8, 2020
801865e
Deleted obsolete files
eshryane Jul 12, 2020
31ec4e3
Use utf8mb4 character set instead of utf8.
eshryane Jul 12, 2020
a18632f
Merge branch 'master' into utf8
eshryane Aug 11, 2020
681a9b3
Created script to convert Whois schema to UTF8.
eshryane Aug 11, 2020
511c6ea
Added conversion script for versions database
eshryane Aug 12, 2020
6d70e0d
Merge branch 'master' into utf8
eshryane Mar 15, 2021
213cc95
Fixed tests following merge
eshryane Mar 15, 2021
1889d3d
Merge branch 'master' into utf8
eshryane Jan 22, 2024
5397a57
Merge branch 'master' into utf8
eshryane Jan 22, 2024
5553089
Merge branch 'master' into utf8
eshryane Mar 27, 2024
44be90b
Add patch files for remaining DB schemas
eshryane Mar 27, 2024
2e4e2ce
Specify UTF8 in all schemas (not latin1)
eshryane Mar 27, 2024
201eba5
Corrected NRTM schema (put the drop table just before the create tabl…
eshryane Mar 27, 2024
bc22104
Removed redundant character set and collation from Whois auth table V…
eshryane Mar 27, 2024
90c516d
schema charset is utf8mb4 not latin1
eshryane Mar 27, 2024
1ad1c33
Merge branch 'master' into utf8
eshryane Oct 29, 2024
1ed092a
remove obsolete patch files (database schema is already converted to …
eshryane Oct 29, 2024
84dc093
remove obsolete patch files
eshryane Oct 29, 2024
1b194f3
fixed test
eshryane Oct 29, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import net.ripe.db.whois.common.Latin1Conversion;
import net.ripe.db.whois.common.Latin1ConversionResult;
import net.ripe.db.whois.common.PunycodeConversion;
import net.ripe.db.whois.update.domain.Operation;
import net.ripe.db.whois.update.domain.Paragraph;
import net.ripe.db.whois.update.domain.Update;
Expand All @@ -19,21 +18,14 @@ public static Update createUpdate(final Paragraph paragraph,
final String rpslObject,
final UpdateContext updateContext) {

final String punycodeResult = PunycodeConversion.convert(rpslObject);
final Latin1ConversionResult conversionResult = Latin1Conversion.convert(rpslObject);
final Update update = new Update(paragraph, operation, deleteReasons, conversionResult.getRpslObject());

final Latin1ConversionResult latin1ConversionResult = Latin1Conversion.convert(punycodeResult);

final Update update = new Update(paragraph, operation, deleteReasons, latin1ConversionResult.getRpslObject());

if (!punycodeResult.equals(rpslObject)) {
updateContext.addMessage(update, UpdateMessages.valueChangedDueToPunycodeConversion());
}

if (latin1ConversionResult.isGlobalSubstitution()) {
if (conversionResult.isGlobalSubstitution()) {
updateContext.addMessage(update, UpdateMessages.valueChangedDueToLatin1Conversion());
}

latin1ConversionResult.getSubstitutedAttributes().forEach(attr -> updateContext.addMessage(update, attr, UpdateMessages.valueChangedDueToLatin1Conversion(attr.getKey())));
conversionResult.getSubstitutedAttributes().forEach(attr -> updateContext.addMessage(update, attr, UpdateMessages.valueChangedDueToLatin1Conversion(attr.getKey())));

return update;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@ Charset getCharset(final ContentType contentType) {
}
}

return StandardCharsets.ISO_8859_1;
return StandardCharsets.UTF_8;
}

String getHeaders(final Part part) throws MessagingException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -295,7 +295,6 @@ private Charset getCharset(final String contentType) {
}
}
}

// application/x-www-form-urlencoded is UTF-8 by default
return StandardCharsets.UTF_8;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -762,7 +762,7 @@ public void parse_smime_multipart_alternative() throws Exception {

@Test
public void illegal_charset() throws Exception {
assertThat(subject.getCharset(new ContentType("text/plain;\n\tcharset=\"_iso-2022-jp$ESC\"")), is(StandardCharsets.ISO_8859_1));
assertThat(subject.getCharset(new ContentType("text/plain;\n\tcharset=\"_iso-2022-jp$ESC\"")), is(StandardCharsets.UTF_8));
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -722,7 +722,7 @@ public void post_url_encoded_data() {
}

@Test
public void post_url_encoded_data_with_latin1_charset_error() {
public void post_url_encoded_data_with_latin1_charset_failure() {
rpslObjectUpdateDao.createObject(RpslObject.parse(PERSON_ANY1_TEST));
rpslObjectUpdateDao.createObject(RpslObject.parse(MNTNER_TEST_MNTNER));

Expand All @@ -744,7 +744,7 @@ public void post_url_encoded_data_with_latin1_charset_error() {
}

@Test
public void post_url_encoded_data_with_latin1_charset() {
public void post_url_encoded_data_with_non_latin1_address_success() {
rpslObjectUpdateDao.createObject(RpslObject.parse(PERSON_ANY1_TEST));
rpslObjectUpdateDao.createObject(RpslObject.parse(MNTNER_TEST_MNTNER));

Expand Down Expand Up @@ -805,7 +805,7 @@ public void post_url_encoded_data_with_non_latin1_address() {
MediaType.valueOf("application/x-www-form-urlencoded; charset=UTF-8")), String.class);

assertThat(databaseHelper.lookupObject(ObjectType.PERSON, "TP2-TEST").toString(),
containsString("address: ???????? ?????,??????"));
containsString("address: Тверская улица,москва"));
}

@Test
Expand Down Expand Up @@ -899,12 +899,14 @@ public void post_multipart_data_with_non_latin1_address() {
"source: TEST\n" +
"password: emptypassword")
.field("NEW", "yes");
RestTest.target(getPort(), "whois/syncupdates/test")

final String response = RestTest.target(getPort(), "whois/syncupdates/test")
.request()
.post(Entity.entity(multipart, multipart.getMediaType()), String.class);

assertThat(response, containsString("Create SUCCEEDED: [person] TP2-TEST Test Person"));
assertThat(databaseHelper.lookupObject(ObjectType.PERSON, "TP2-TEST").toString(),
containsString("address: ???????? ?????,??????"));
containsString("address: Тверская улица,москва"));
}

@Test
Expand All @@ -922,15 +924,15 @@ public void post_multipart_data_with_control_characters_address() {
"source: TEST\n" +
"password: emptypassword")
.field("NEW", "yes");
RestTest.target(getPort(), "whois/syncupdates/test")
final String response = RestTest.target(getPort(), "whois/syncupdates/test")
.request()
.post(Entity.entity(multipart, multipart.getMediaType()), String.class);

assertThat(response, containsString("Create SUCCEEDED: [person] TP2-TEST Test Person"));
assertThat(databaseHelper.lookupObject(ObjectType.PERSON, "TP2-TEST").toString(),
containsString("address: Test???? Address"));
containsString("address: Тверская улица,москва"));
}


@Test
public void post_multipart_data_with_latin1_non_ascii_address() {
databaseHelper.addObject(PERSON_ANY1_TEST);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3085,8 +3085,9 @@ public void create_non_latin1_characters_are_substituted() {
.request()
.post(Entity.entity(map(person), MediaType.APPLICATION_XML), WhoisResources.class);

// UTF-8 characters are NOT mapped to latin1, but left as-is
final WhoisObject responseObject = whoisResources.getWhoisObjects().get(0);
assertThat(responseObject.getAttributes().get(1).getValue(), is("???????? ?????,??????"));
assertThat(responseObject.getAttributes().get(1).getValue(), is("test \u03A3 and \u00DF characters"));
}

@Test
Expand Down Expand Up @@ -3293,19 +3294,18 @@ public void update_person_non_latin1_characters_are_substituted() {
.put(Entity.entity(whoisObjectMapper.mapRpslObjects(FormattedClientAttributeMapper.class, update), MediaType.APPLICATION_XML),
WhoisResources.class);

RestTest.assertWarningCount(response, 1);
RestTest.assertErrorMessage(response, 0, "Warning", "Value changed due to conversion into the ISO-8859-1 (Latin-1) character set");
RestTest.assertErrorCount(response, 0);

final RpslObject lookupObject = databaseHelper.lookupObject(ObjectType.PERSON, "TP1-TEST");
assertThat(lookupObject.findAttribute(AttributeType.ADDRESS).getValue(), is(" ???????? ?????,??????"));
assertThat(lookupObject.findAttribute(AttributeType.ADDRESS).getValue(), is(" address: Тверская улица,москва"));
}
{
final WhoisResources response =
RestTest.target(getPort(), "whois/test/person/TP1-TEST?password=test")
.request()
.get(WhoisResources.class);

assertThat(response.getWhoisObjects().get(0).getAttributes(), hasItem(new Attribute("address", "???????? ?????,??????")));
assertThat(response.getWhoisObjects().get(0).getAttributes(), hasItem(new Attribute("address", "address: Тверская улица,москва")));
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,9 @@ private void handleNext() throws SQLException {
row.add("<null>");
} else if (object instanceof Blob) {
final Blob blob = (Blob) object;
row.add(new String(blob.getBytes(0, (int) blob.length()), StandardCharsets.ISO_8859_1));
row.add(new String(blob.getBytes(0, (int) blob.length()), StandardCharsets.UTF_8));
} else if (object instanceof byte[]) {
row.add(new String((byte[]) object, StandardCharsets.ISO_8859_1));
row.add(new String((byte[]) object, StandardCharsets.UTF_8));
} else {
row.add(object.toString());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,10 @@
import java.nio.charset.StandardCharsets;

public final class ChannelUtil {

private static final Logger LOGGER = LoggerFactory.getLogger(ChannelUtil.class);
public static final Charset BYTE_ENCODING = StandardCharsets.ISO_8859_1;

public static final Charset BYTE_ENCODING = StandardCharsets.UTF_8;

public static final AttributeKey<InetAddress> CLIENT_IP = AttributeKey.newInstance("client-ip");

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ public class TelnetWhoisClient {

public static final int DEFAULT_PORT = 43;
public static final String DEFAULT_HOST = "localhost";
public static final Charset DEFAULT_CHARSET = StandardCharsets.ISO_8859_1;
public static final Charset DEFAULT_CHARSET = StandardCharsets.UTF_8;
private static final int DEFAULT_TIMEOUT = (int)TimeUnit.MINUTES.toMillis(5);
private final String host;
private final int port;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,11 @@ public static X509CertificateWrapper parse(final RpslObject rpslObject) {
throw new IllegalArgumentException("The supplied object has no key");
}

return parse(RpslObjectFilter.getCertificateFromKeyCert(rpslObject).getBytes(StandardCharsets.ISO_8859_1));
try {
return parse(RpslObjectFilter.getCertificateFromKeyCert(rpslObject).getBytes(StandardCharsets.UTF_8));
} catch (Exception e) {
throw new IllegalArgumentException("Error parsing X509 certificate from key-cert object", e);
}
}

public static X509CertificateWrapper parse(final String certificate) {
Expand Down
16 changes: 8 additions & 8 deletions whois-commons/src/main/resources/acl_schema.sql
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
/*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */;
/*!40101 SET @OLD_CHARACTER_SET_RESULTS=@@CHARACTER_SET_RESULTS */;
/*!40101 SET @OLD_COLLATION_CONNECTION=@@COLLATION_CONNECTION */;
/*!40101 SET NAMES utf8 */;
/*!40101 SET NAMES utf8mb4 */;
/*!40103 SET @OLD_TIME_ZONE=@@TIME_ZONE */;
/*!40103 SET TIME_ZONE='+00:00' */;
/*!40014 SET @OLD_UNIQUE_CHECKS=@@UNIQUE_CHECKS, UNIQUE_CHECKS=0 */;
Expand All @@ -21,7 +21,7 @@

DROP TABLE IF EXISTS `acl_denied`;
/*!40101 SET @saved_cs_client = @@character_set_client */;
/*!40101 SET character_set_client = utf8 */;
/*!40101 SET character_set_client = utf8mb4 */;
CREATE TABLE `acl_denied` (
`prefix` varchar(50) NOT NULL,
`comment` text,
Expand All @@ -36,7 +36,7 @@ CREATE TABLE `acl_denied` (

DROP TABLE IF EXISTS `acl_event`;
/*!40101 SET @saved_cs_client = @@character_set_client */;
/*!40101 SET character_set_client = utf8 */;
/*!40101 SET character_set_client = utf8mb4 */;
CREATE TABLE `acl_event` (
`prefix` varchar(50) NOT NULL,
`event_time` timestamp NOT NULL DEFAULT '0000-00-00 00:00:00',
Expand Down Expand Up @@ -79,7 +79,7 @@ CREATE TABLE `acl_sso_event` (

DROP TABLE IF EXISTS `acl_limit`;
/*!40101 SET @saved_cs_client = @@character_set_client */;
/*!40101 SET character_set_client = utf8 */;
/*!40101 SET character_set_client = utf8mb4 */;
CREATE TABLE `acl_limit` (
`prefix` varchar(50) NOT NULL,
`daily_limit` int(11) NOT NULL DEFAULT '-1',
Expand All @@ -95,7 +95,7 @@ CREATE TABLE `acl_limit` (

DROP TABLE IF EXISTS `acl_proxy`;
/*!40101 SET @saved_cs_client = @@character_set_client */;
/*!40101 SET character_set_client = utf8 */;
/*!40101 SET character_set_client = utf8mb4 */;
CREATE TABLE `acl_proxy` (
`prefix` varchar(50) NOT NULL,
`comment` text,
Expand All @@ -109,7 +109,7 @@ CREATE TABLE `acl_proxy` (

DROP TABLE IF EXISTS `override_users`;
/*!40101 SET @saved_cs_client = @@character_set_client */;
/*!40101 SET character_set_client = utf8 */;
/*!40101 SET character_set_client = utf8mb4 */;
CREATE TABLE `override_users` (
`username` varchar(255) NOT NULL,
`password` varchar(64) NOT NULL,
Expand All @@ -125,7 +125,7 @@ CREATE TABLE `override_users` (

DROP TABLE IF EXISTS `apikeys`;
/*!40101 SET @saved_cs_client = @@character_set_client */;
/*!40101 SET character_set_client = utf8 */;
/*!40101 SET character_set_client = utf8mb4 */;
CREATE TABLE `apikeys` (
`apikey` varchar(128) NOT NULL,
`uri_prefix` varchar(128) NOT NULL,
Expand All @@ -140,7 +140,7 @@ CREATE TABLE `apikeys` (

DROP TABLE IF EXISTS `version`;
/*!40101 SET @saved_cs_client = @@character_set_client */;
/*!40101 SET character_set_client = utf8 */;
/*!40101 SET character_set_client = utf8mb4 */;
CREATE TABLE `version` (
`version` varchar(80) DEFAULT NULL,
PRIMARY KEY (`version`)
Expand Down
6 changes: 3 additions & 3 deletions whois-commons/src/main/resources/mailupdates_schema.sql
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
/*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */;
/*!40101 SET @OLD_CHARACTER_SET_RESULTS=@@CHARACTER_SET_RESULTS */;
/*!40101 SET @OLD_COLLATION_CONNECTION=@@COLLATION_CONNECTION */;
/*!40101 SET NAMES utf8 */;
/*!40101 SET NAMES utf8mb4 */;
/*!40103 SET @OLD_TIME_ZONE=@@TIME_ZONE */;
/*!40103 SET TIME_ZONE='+00:00' */;
/*!40014 SET @OLD_UNIQUE_CHECKS=@@UNIQUE_CHECKS, UNIQUE_CHECKS=0 */;
Expand All @@ -21,7 +21,7 @@

DROP TABLE IF EXISTS `mailupdates`;
/*!40101 SET @saved_cs_client = @@character_set_client */;
/*!40101 SET character_set_client = utf8 */;
/*!40101 SET character_set_client = utf8mb4 */;
CREATE TABLE `mailupdates` (
`id` int(10) unsigned NOT NULL AUTO_INCREMENT,
`message` longblob NOT NULL,
Expand All @@ -40,7 +40,7 @@ CREATE TABLE `mailupdates` (

DROP TABLE IF EXISTS `version`;
/*!40101 SET @saved_cs_client = @@character_set_client */;
/*!40101 SET character_set_client = utf8 */;
/*!40101 SET character_set_client = utf8mb4 */;
CREATE TABLE `version` (
`version` varchar(80) DEFAULT NULL,
PRIMARY KEY (`version`)
Expand Down
13 changes: 12 additions & 1 deletion whois-commons/src/main/resources/nrtm_schema.sql
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ CREATE TABLE `source`
) ENGINE = InnoDB
DEFAULT CHARSET = utf8mb4;

DROP TABLE IF EXISTS `version_info`;
CREATE TABLE `version_info`
(
`id` int unsigned NOT NULL AUTO_INCREMENT,
Expand All @@ -47,6 +48,7 @@ CREATE TABLE `version_info`
) ENGINE = InnoDB
DEFAULT CHARSET = utf8mb4;

DROP TABLE IF EXISTS `snapshot_file`;
CREATE TABLE `snapshot_file`
(
`id` int unsigned NOT NULL AUTO_INCREMENT,
Expand All @@ -61,6 +63,7 @@ CREATE TABLE `snapshot_file`
) ENGINE = InnoDB
DEFAULT CHARSET = utf8mb4;

DROP TABLE IF EXISTS `delta_file`;
CREATE TABLE `delta_file`
(
`id` int unsigned NOT NULL AUTO_INCREMENT,
Expand All @@ -75,6 +78,7 @@ CREATE TABLE `delta_file`
) ENGINE = InnoDB
DEFAULT CHARSET = utf8mb4;

DROP TABLE IF EXISTS `notification_file`;
CREATE TABLE `notification_file`
(
`id` int unsigned NOT NULL AUTO_INCREMENT,
Expand All @@ -86,7 +90,7 @@ CREATE TABLE `notification_file`
) ENGINE = InnoDB
DEFAULT CHARSET = utf8mb4;


DROP TABLE IF EXISTS `key_pair`;
CREATE TABLE `key_pair`
(
`id` int unsigned NOT NULL AUTO_INCREMENT,
Expand All @@ -101,6 +105,13 @@ CREATE TABLE `key_pair`
) ENGINE = InnoDB
DEFAULT CHARSET = utf8mb4;

DROP TABLE IF EXISTS `version`;
CREATE TABLE `version`
(
`version` varchar(80)
) ENGINE = InnoDB
DEFAULT CHARSET = utf8mb4;

/*!40103 SET TIME_ZONE = @OLD_TIME_ZONE */;

/*!40101 SET SQL_MODE = @OLD_SQL_MODE */;
Expand Down
Loading