Skip to content

Commit

Permalink
bugfixing file encoding detection - fix #34
Browse files Browse the repository at this point in the history
  • Loading branch information
krzemin committed Nov 19, 2015
1 parent b2ab353 commit abb02b6
Showing 1 changed file with 20 additions and 13 deletions.
33 changes: 20 additions & 13 deletions src/encodingutils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include "encodingutils.h"

#include <QTextStream>
#include <QTextCodec>

EncodingUtils::EncodingUtils()
{
Expand All @@ -24,8 +25,13 @@ EncodingUtils::EncodingUtils()
<<"I"<<"I"<<"I"<<"I"<<"D"<<"N"<<"O"<<"O"<<"O"<<"O"<<"O"<<"O"<<"U"<<"U"<<"U"<<"U"<<"Y"<<"s"<<"a"
<<"a"<<"a"<<"a"<<"a"<<"a"<<"ae"<<"c"<<"e"<<"e"<<"e"<<"e"<<"i"<<"i"<<"i"<<"i"<<"o"<<"n"<<"o"<<"o"
<<"o"<<"o"<<"o"<<"o"<<"u"<<"u"<<"u"<<"u"<<"y"<<"y";
codecs << "windows-1250" << "windows-1257" << "ISO-8859-2"
<< "ISO-8859-13" << "ISO-8859-16" << "UTF-8";

codecs << "windows-1257"
<< "ISO-8859-13"
<< "ISO-8859-16"
<< "ISO-8859-2"
<< "windows-1250"
<< "UTF-8";
}

QString EncodingUtils::replaceDiacriticsWithASCII(const QString & str) {
Expand All @@ -50,21 +56,22 @@ QString EncodingUtils::detectBufferEncoding(const QByteArray & buffer) {

foreach(QString codec, codecs)
{
QTextStream ts(buffer);
ts.setCodec(qPrintable(codec));
QString encodedData = ts.readAll();
QTextCodec *tc = QTextCodec::codecForName(qPrintable(codec));
const QString text = tc->toUnicode(buffer.constData(), buffer.size());

QStringList chars = QString::fromUtf8("ą/ś/ż/ć/ń/ł/ó/ę").split("/");

int i;
for (i = 0; i < chars.count(); i++)
int found = 0;

foreach (QString c, chars)
{
if(!encodedData.contains(chars[i], Qt::CaseInsensitive))
break;
if(text.contains(c, Qt::CaseInsensitive))
++found;
}

if(i + 1 > bestMatch) {
bestMatch = i + 1;
from = codec;
}
if(found >= bestMatch) {
bestMatch = found;
from = codec;
}
}

Expand Down

0 comments on commit abb02b6

Please sign in to comment.