Skip to content

Commit

Permalink
[SPARK-34789][TEST] Introduce Jetty based construct for integration t…
Browse files Browse the repository at this point in the history
…ests where HTTP server is used

### What changes were proposed in this pull request?

Introducing a new test construct:
```
  withHttpServer() { baseURL =>
    ...
  }
```
Which starts and stops a Jetty server to serve files via HTTP.

Moreover this PR uses this new construct in the test `Run SparkRemoteFileTest using a remote data file`.

### Why are the changes needed?

Before this PR github URLs was used like "https://raw.githubusercontent.com/apache/spark/master/data/mllib/pagerank_data.txt".
This connects two Spark version in an unhealthy way like connecting the "master" branch which is moving part with the committed test code which is a non-moving (as it might be even released).
So this way a test running for an earlier version of Spark expects something (filename, content, path) from a the latter release and what is worse when the moving version is changed the earlier test will break.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Existing unit test.

Closes #31935 from attilapiros/SPARK-34789.

Authored-by: “attilapiros” <[email protected]>
Signed-off-by: Dongjoon Hyun <[email protected]>
  • Loading branch information
attilapiros authored and dongjoon-hyun committed Apr 15, 2021
1 parent 7ff9d2e commit 8a3815f
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 6 deletions.
23 changes: 22 additions & 1 deletion core/src/main/scala/org/apache/spark/TestUtils.scala
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
package org.apache.spark

import java.io.{ByteArrayInputStream, File, FileInputStream, FileOutputStream}
import java.net.{HttpURLConnection, URI, URL}
import java.net.{HttpURLConnection, InetSocketAddress, URI, URL}
import java.nio.charset.StandardCharsets
import java.nio.file.{Files => JavaFiles, Paths}
import java.nio.file.attribute.PosixFilePermission.{OWNER_EXECUTE, OWNER_READ, OWNER_WRITE}
Expand All @@ -41,6 +41,11 @@ import scala.util.Try
import com.google.common.io.{ByteStreams, Files}
import org.apache.commons.lang3.StringUtils
import org.apache.log4j.PropertyConfigurator
import org.eclipse.jetty.server.Handler
import org.eclipse.jetty.server.Server
import org.eclipse.jetty.server.handler.DefaultHandler
import org.eclipse.jetty.server.handler.HandlerList
import org.eclipse.jetty.server.handler.ResourceHandler
import org.json4s.JsonAST.JValue
import org.json4s.jackson.JsonMethods.{compact, render}

Expand Down Expand Up @@ -364,6 +369,22 @@ private[spark] object TestUtils {
}
}

def withHttpServer(resBaseDir: String = ".")(body: URL => Unit): Unit = {
// 0 as port means choosing randomly from the available ports
val server = new Server(new InetSocketAddress(Utils.localCanonicalHostName, 0))
val resHandler = new ResourceHandler()
resHandler.setResourceBase(resBaseDir)
val handlers = new HandlerList()
handlers.setHandlers(Array[Handler](resHandler, new DefaultHandler()))
server.setHandler(handlers)
server.start()
try {
body(server.getURI.toURL)
} finally {
server.stop()
}
}

/**
* Wait until at least `numExecutors` executors are up, or throw `TimeoutException` if the waiting
* time elapsed before `numExecutors` executors up. Exposed for testing.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ package org.apache.spark.deploy.k8s.integrationtest

import io.fabric8.kubernetes.api.model.Pod

import org.apache.spark.TestUtils
import org.apache.spark.launcher.SparkLauncher

private[spark] trait BasicTestsSuite { k8sSuite: KubernetesSuite =>
Expand Down Expand Up @@ -99,9 +100,12 @@ private[spark] trait BasicTestsSuite { k8sSuite: KubernetesSuite =>
}

test("Run SparkRemoteFileTest using a remote data file", k8sTestTag) {
sparkAppConf
.set("spark.files", REMOTE_PAGE_RANK_DATA_FILE)
runSparkRemoteCheckAndVerifyCompletion(appArgs = Array(REMOTE_PAGE_RANK_FILE_NAME))
assert(sys.props.contains("spark.test.home"), "spark.test.home is not set!")
TestUtils.withHttpServer(sys.props("spark.test.home")) { baseURL =>
sparkAppConf
.set("spark.files", baseURL.toString + REMOTE_PAGE_RANK_DATA_FILE)
runSparkRemoteCheckAndVerifyCompletion(appArgs = Array(REMOTE_PAGE_RANK_FILE_NAME))
}
}
}

Expand All @@ -110,7 +114,6 @@ private[spark] object BasicTestsSuite {
val CONTAINER_LOCAL_FILE_DOWNLOAD_PATH = "/var/spark-data/spark-files"
val CONTAINER_LOCAL_DOWNLOADED_PAGE_RANK_DATA_FILE =
s"$CONTAINER_LOCAL_FILE_DOWNLOAD_PATH/pagerank_data.txt"
val REMOTE_PAGE_RANK_DATA_FILE =
"https://raw.githubusercontent.com/apache/spark/master/data/mllib/pagerank_data.txt"
val REMOTE_PAGE_RANK_DATA_FILE = "data/mllib/pagerank_data.txt"
val REMOTE_PAGE_RANK_FILE_NAME = "pagerank_data.txt"
}

0 comments on commit 8a3815f

Please sign in to comment.