Skip to content

Commit

Permalink
Merge pull request #1699 from brave/incorrect-attention
Browse files Browse the repository at this point in the history
Fixes youtube channel/user detection
  • Loading branch information
NejcZdovc committed Feb 18, 2019
1 parent 422cb8f commit 2882554
Show file tree
Hide file tree
Showing 3 changed files with 197 additions and 25 deletions.
54 changes: 44 additions & 10 deletions vendor/bat-native-ledger/src/bat_get_media.cc
Original file line number Diff line number Diff line change
Expand Up @@ -536,7 +536,7 @@ void BatGetMedia::processYoutubeChannelPath(uint64_t windowId,
const ledger::VisitData& visit_data,
const std::string& providerType) {
std::string publisher_key = "youtube#channel:";
std::string key = getYoutubePublisherKeyFromUrl(visit_data);
std::string key = getYoutubePublisherKeyFromUrl(visit_data.path);
if (!key.empty()) {
publisher_key += key;
fetchPublisherDataFromDB(windowId,
Expand Down Expand Up @@ -582,7 +582,7 @@ void BatGetMedia::processYoutubeUserPath(uint64_t windowId,
const ledger::VisitData& visit_data,
const std::string& providerType) {

std::string user = getYoutubeUserFromUrl(visit_data);
std::string user = getYoutubeUserFromUrl(visit_data.path);

if (user.empty()) {
onMediaActivityError(visit_data, providerType, windowId);
Expand Down Expand Up @@ -699,7 +699,7 @@ void BatGetMedia::onGetChannelHeadlineVideo(uint64_t windowId,
if (visit_data.path.find("/channel/") != std::string::npos) {
std::string title = getNameFromChannel(response);
std::string favicon = parseFavIconUrl(response);
std::string channelId = getYoutubePublisherKeyFromUrl(visit_data);
std::string channelId = getYoutubePublisherKeyFromUrl(visit_data.path);

savePublisherInfo(0,
"",
Expand Down Expand Up @@ -928,21 +928,54 @@ std::string BatGetMedia::getYoutubeMediaKeyFromUrl(
return std::string();
}

std::string BatGetMedia::getYoutubePublisherKeyFromUrl(const ledger::VisitData& visit_data) {
return extractData(visit_data.path + "/", "/channel/", "/");
// static
std::string BatGetMedia::getYoutubePublisherKeyFromUrl(const std::string& path) {
if (path.empty()) {
return std::string();
}

const std::string id = extractData(path + "/", "/channel/", "/");

if (id.empty()) {
return std::string();
}

std::vector<std::string> params = braveledger_bat_helper::split(id, '?');

return params[0];
}

std::string BatGetMedia::getYoutubeUserFromUrl(const ledger::VisitData& visit_data) {
return extractData(visit_data.path + "/", "/user/", "/");
// static
std::string BatGetMedia::getYoutubeUserFromUrl(const std::string& path) {
if (path.empty()) {
return std::string();
}

const std::string id = extractData(path + "/", "/user/", "/");

if (id.empty()) {
return std::string();
}

std::vector<std::string> params = braveledger_bat_helper::split(id, '?');

return params[0];
}

// static
std::string BatGetMedia::extractData(const std::string& data,
const std::string& matchAfter, const std::string& matchUntil) const {
const std::string& matchAfter, const std::string& matchUntil) {
std::string match;
size_t matchAfterSize = matchAfter.size();
size_t match_after_size = matchAfter.size();
size_t data_size = data.size();

if (data_size < match_after_size) {
return match;
}

size_t startPos = data.find(matchAfter);
if (startPos != std::string::npos) {
startPos += matchAfterSize;
startPos += match_after_size;
size_t endPos = data.find(matchUntil, startPos);
if (endPos != startPos) {
if (endPos != std::string::npos && endPos > startPos) {
Expand All @@ -954,6 +987,7 @@ std::string BatGetMedia::extractData(const std::string& data,
}
}
}

return match;
}

Expand Down
28 changes: 18 additions & 10 deletions vendor/bat-native-ledger/src/bat_get_media.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

#include "bat/ledger/ledger.h"
#include "bat_helper.h"
#include "base/gtest_prod_util.h"

namespace bat_ledger {
class LedgerImpl;
Expand Down Expand Up @@ -46,8 +47,6 @@ class BatGetMedia {
const std::string& providerType,
const std::string& publisher_blob);

static std::string getYoutubeMediaIdFromUrl(const ledger::VisitData& visit_data);

private:
std::string getMediaURL(const std::string& mediaId, const std::string& providerName);
void getPublisherFromMediaPropsCallback(const uint64_t& duration,
Expand Down Expand Up @@ -206,14 +205,6 @@ class BatGetMedia {

std::string getYoutubeMediaKeyFromUrl(const std::string& provider_type, const std::string& media_id);

std::string getYoutubePublisherKeyFromUrl(const ledger::VisitData& visit_data);

std::string getYoutubeUserFromUrl(const ledger::VisitData& visit_data);

std::string extractData(const std::string& data,
const std::string& matchAfter,
const std::string& matchUntil) const;

std::string getPublisherUrl(const std::string& publisher_key, const std::string& providerName);

void fetchPublisherDataFromDB(
Expand All @@ -225,11 +216,28 @@ class BatGetMedia {

void fetchDataFromUrl(const std::string& url, FetchDataFromUrlCallback callback);

static std::string getYoutubeMediaIdFromUrl(
const ledger::VisitData& visit_data);

static std::string getYoutubePublisherKeyFromUrl(const std::string& path);

static std::string getYoutubeUserFromUrl(const std::string& path);

static std::string extractData(const std::string& data,
const std::string& matchAfter,
const std::string& matchUntil);

std::string getNameFromChannel(const std::string& data);

bat_ledger::LedgerImpl* ledger_; // NOT OWNED

std::map<std::string, ledger::TwitchEventInfo> twitchEvents;

// For testing purposes
friend class BatGetMediaTest;
FRIEND_TEST_ALL_PREFIXES(BatGetMediaTest, GetYoutubeMediaIdFromUrl);
FRIEND_TEST_ALL_PREFIXES(BatGetMediaTest, GetYoutubePublisherKeyFromUrl);
FRIEND_TEST_ALL_PREFIXES(BatGetMediaTest, GetYoutubeUserFromUrl);
};

} // namespace braveledger_bat_get_media
Expand Down
140 changes: 135 additions & 5 deletions vendor/bat-native-ledger/src/bat_get_media_unittest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,18 @@
* License, v. 2.0. If a copy of the MPL was not distributed with this file,
* You can obtain one at http://mozilla.org/MPL/2.0/. */

#include "brave/vendor/bat-native-ledger/include/bat/ledger/ledger.h"
#include "brave/vendor/bat-native-ledger/src/bat_get_media.h"
#include "bat/ledger/ledger.h"
#include "bat_get_media.h"
#include "testing/gtest/include/gtest/gtest.h"

TEST(BatGetMediaTest, GetYoutubeMediaIdFromUrl) {
//BatGetMedia media = new BatGetMedia(nullptr);
// npm run test -- brave_unit_tests --filter=BatGetMediaTest.*

namespace braveledger_bat_get_media {

class BatGetMediaTest : public testing::Test {
};

TEST(BatGetMediaTest, GetYoutubeMediaIdFromUrl) {
// missing video id
ledger::VisitData data;
data.url = "https://www.youtube.com/watch";
Expand Down Expand Up @@ -49,4 +54,129 @@ TEST(BatGetMediaTest, GetYoutubeMediaIdFromUrl) {
braveledger_bat_get_media::BatGetMedia::getYoutubeMediaIdFromUrl(data);

ASSERT_EQ(media, "44444444");
}
}

TEST(BatGetMediaTest, GetYoutubePublisherKeyFromUrl) {
// path is empty
std::string path = "";

std::string key = braveledger_bat_get_media::BatGetMedia::
getYoutubePublisherKeyFromUrl(path);

ASSERT_EQ(key, "");

// path is just slash
path = "/";

key = braveledger_bat_get_media::BatGetMedia::
getYoutubePublisherKeyFromUrl(path);

ASSERT_EQ(key, "");

// wrong path
path = "/test";

key = braveledger_bat_get_media::BatGetMedia::
getYoutubePublisherKeyFromUrl(path);

ASSERT_EQ(key, "");

// single element in the url
path = "https://www.youtube.com/channel/"
"UCRkcacarvLbUfygxUAAAAAA";

key = braveledger_bat_get_media::BatGetMedia::
getYoutubePublisherKeyFromUrl(path);

ASSERT_EQ(key, "UCRkcacarvLbUfygxUAAAAAA");

// multiple elements in the url
path = "https://www.youtube.com/channel/"
"UCRkcacarvLbUfygxUAAAAAA?view_as=subscriber";

key = braveledger_bat_get_media::BatGetMedia::
getYoutubePublisherKeyFromUrl(path);

ASSERT_EQ(key, "UCRkcacarvLbUfygxUAAAAAA");

// multiple paths in the url
path = "https://www.youtube.com/channel/"
"UCRkcacarvLbUfygxUAAAAAA/playlist";

key = braveledger_bat_get_media::BatGetMedia::
getYoutubePublisherKeyFromUrl(path);

ASSERT_EQ(key, "UCRkcacarvLbUfygxUAAAAAA");

// multiple paths in the url
path = "https://www.youtube.com/channel/"
"UCRkcacarvLbUfygxUAAAAAA/playlist?view_as=subscriber";

key = braveledger_bat_get_media::BatGetMedia::
getYoutubePublisherKeyFromUrl(path);

ASSERT_EQ(key, "UCRkcacarvLbUfygxUAAAAAA");
}

TEST(BatGetMediaTest, GetYoutubeUserFromUrl) {
// path is empty
std::string path = "/";

std::string user = braveledger_bat_get_media::BatGetMedia::
getYoutubeUserFromUrl(path);

ASSERT_EQ(user, "");

// path is just slash
path = "/";

user = braveledger_bat_get_media::BatGetMedia::
getYoutubeUserFromUrl(path);

ASSERT_EQ(user, "");

// wrong url
path = "https://www.youtube.com/test";

user =
braveledger_bat_get_media::BatGetMedia::getYoutubeUserFromUrl(path);

ASSERT_EQ(user, "");

// single element in the url
path = "https://www.youtube.com/user/brave";

user =
braveledger_bat_get_media::BatGetMedia::getYoutubeUserFromUrl(path);

ASSERT_EQ(user, "brave");

// multiple elements in the url
path = "https://www.youtube.com/user/"
"brave?view_as=subscriber";

user =
braveledger_bat_get_media::BatGetMedia::getYoutubeUserFromUrl(path);

ASSERT_EQ(user, "brave");

// multiple paths in the url
path = "https://www.youtube.com/user/"
"brave/playlist";

user =
braveledger_bat_get_media::BatGetMedia::getYoutubeUserFromUrl(path);

ASSERT_EQ(user, "brave");

// multiple paths + elements in the url
path = "https://www.youtube.com/user/"
"brave/playlist?view_as=subscriber";

user =
braveledger_bat_get_media::BatGetMedia::getYoutubeUserFromUrl(path);

ASSERT_EQ(user, "brave");
}

} // braveledger_bat_get_media

0 comments on commit 2882554

Please sign in to comment.