-
Notifications
You must be signed in to change notification settings - Fork 12
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #92 from klockla/geturlstatus
Add method to get URL Status (returns an URLItem)
- Loading branch information
Showing
15 changed files
with
20,914 additions
and
20,790 deletions.
There are no files selected for viewing
3,232 changes: 1,384 additions & 1,848 deletions
3,232
API/src/main/java/crawlercommons/urlfrontier/URLFrontierGrpc.java
Large diffs are not rendered by default.
Oops, something went wrong.
37,535 changes: 18,596 additions & 18,939 deletions
37,535
API/src/main/java/crawlercommons/urlfrontier/Urlfrontier.java
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
106 changes: 106 additions & 0 deletions
106
client/src/main/java/crawlercommons/urlfrontier/client/GetURLStatus.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,106 @@ | ||
// SPDX-FileCopyrightText: 2020 Crawler-commons | ||
// SPDX-License-Identifier: Apache-2.0 | ||
|
||
package crawlercommons.urlfrontier.client; | ||
|
||
import crawlercommons.urlfrontier.CrawlID; | ||
import crawlercommons.urlfrontier.URLFrontierGrpc; | ||
import crawlercommons.urlfrontier.URLFrontierGrpc.URLFrontierBlockingStub; | ||
import crawlercommons.urlfrontier.Urlfrontier.URLItem; | ||
import crawlercommons.urlfrontier.Urlfrontier.URLStatusRequest; | ||
import crawlercommons.urlfrontier.Urlfrontier.URLStatusRequest.Builder; | ||
import io.grpc.ManagedChannel; | ||
import io.grpc.ManagedChannelBuilder; | ||
import io.grpc.StatusRuntimeException; | ||
import java.time.Instant; | ||
import java.time.LocalDateTime; | ||
import java.time.ZoneId; | ||
import picocli.CommandLine.Command; | ||
import picocli.CommandLine.Option; | ||
import picocli.CommandLine.ParentCommand; | ||
|
||
@Command(name = "GetURLStatus", description = "Get the status of an URL", sortOptions = false) | ||
public class GetURLStatus implements Runnable { | ||
|
||
@ParentCommand private Client parent; | ||
|
||
@Option( | ||
names = {"-c", "--crawlID"}, | ||
required = false, | ||
defaultValue = CrawlID.DEFAULT, | ||
paramLabel = "STRING", | ||
description = "crawl of the url to be checked") | ||
private String crawl; | ||
|
||
@Option( | ||
names = {"-k", "--key"}, | ||
required = false, | ||
paramLabel = "STRING", | ||
description = "key to use to target a specific queue") | ||
private String key; | ||
|
||
@Option( | ||
names = {"-u", "--url"}, | ||
required = true, | ||
paramLabel = "STRING", | ||
description = "url to check for") | ||
private String url; | ||
|
||
@Option( | ||
names = {"-p", "--parsedate"}, | ||
description = { | ||
"Print the refetch date in local time zone", | ||
"By default, time is in UTC seconds since the Unix epoch" | ||
}) | ||
private boolean parse = false; | ||
|
||
// Use the system default time zone | ||
private ZoneId zoneId = ZoneId.systemDefault(); | ||
|
||
@Override | ||
public void run() { | ||
ManagedChannel channel = | ||
ManagedChannelBuilder.forAddress(parent.hostname, parent.port) | ||
.usePlaintext() | ||
.build(); | ||
URLFrontierBlockingStub blockingFrontier = URLFrontierGrpc.newBlockingStub(channel); | ||
|
||
Builder builder = URLStatusRequest.newBuilder().setUrl(url).setCrawlID(crawl); | ||
|
||
String s1 = String.format("Checking status of URL %s (crawlId = %s)", url, crawl); | ||
if (key != null) { | ||
s1 += String.format(" (key = %s)", key); | ||
builder.setKey(key); | ||
} | ||
System.out.println(s1); | ||
|
||
URLStatusRequest request = builder.build(); | ||
|
||
try { | ||
URLItem item = blockingFrontier.getURLStatus(request); | ||
String fetchDate; | ||
|
||
if (parse) { | ||
Instant instant = Instant.ofEpochSecond(item.getKnown().getRefetchableFromDate()); | ||
LocalDateTime localDate = instant.atZone(zoneId).toLocalDateTime(); | ||
fetchDate = localDate.toString(); | ||
} else { | ||
fetchDate = String.valueOf(item.getKnown().getRefetchableFromDate()); | ||
} | ||
System.out.println(item.getKnown().getInfo().getUrl() + ";" + fetchDate); | ||
|
||
} catch (StatusRuntimeException sre) { | ||
if (sre.getStatus().getCode() == io.grpc.Status.Code.NOT_FOUND) { | ||
System.out.println("URL is not in frontier: " + url); | ||
} else { | ||
// Handle other errors | ||
System.err.println(sre.getMessage()); | ||
} | ||
} catch (Exception t) { | ||
// Handle other errors | ||
System.err.println(t.getMessage()); | ||
} | ||
|
||
channel.shutdownNow(); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.