Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add fs ls command for dbfs #429

Merged
merged 17 commits into from
Jun 5, 2023
Merged
7 changes: 3 additions & 4 deletions cmd/fs/fs.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,9 @@ import (

// fsCmd represents the fs command
var fsCmd = &cobra.Command{
Use: "fs",
Short: "Filesystem related commands",
Long: `Commands to do DBFS operations.`,
Hidden: true,
Use: "fs",
Short: "Filesystem related commands",
Long: `Commands to do DBFS operations.`,
}

func init() {
Expand Down
14 changes: 14 additions & 0 deletions cmd/fs/helpers.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
package fs

import (
"fmt"
"strings"
)

func resolveDbfsPath(path string) (string, error) {
if !strings.HasPrefix(path, "dbfs:/") {
return "", fmt.Errorf("expected dbfs path (with the dbfs:/ prefix): %s", path)
}

return strings.TrimPrefix(path, "dbfs:"), nil
}
38 changes: 38 additions & 0 deletions cmd/fs/helpers_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
package fs

import (
"testing"

"github.com/stretchr/testify/assert"
)

func TestResolveDbfsPath(t *testing.T) {
path, err := resolveDbfsPath("dbfs:/")
assert.NoError(t, err)
assert.Equal(t, "/", path)

path, err = resolveDbfsPath("dbfs:/abc")
assert.NoError(t, err)
assert.Equal(t, "/abc", path)

path, err = resolveDbfsPath("dbfs:/a/b/c")
assert.NoError(t, err)
assert.Equal(t, "/a/b/c", path)

path, err = resolveDbfsPath("dbfs:/a/b/.")
assert.NoError(t, err)
assert.Equal(t, "/a/b/.", path)

path, err = resolveDbfsPath("dbfs:/a/../c")
assert.NoError(t, err)
assert.Equal(t, "/a/../c", path)

_, err = resolveDbfsPath("dbf:/a/b/c")
assert.ErrorContains(t, err, "expected dbfs path (with the dbfs:/ prefix): dbf:/a/b/c")

_, err = resolveDbfsPath("/a/b/c")
assert.ErrorContains(t, err, "expected dbfs path (with the dbfs:/ prefix): /a/b/c")

_, err = resolveDbfsPath("dbfs:a/b/c")
assert.ErrorContains(t, err, "expected dbfs path (with the dbfs:/ prefix): dbfs:a/b/c")
}
82 changes: 76 additions & 6 deletions cmd/fs/ls.go
Original file line number Diff line number Diff line change
@@ -1,23 +1,93 @@
package fs

import (
"fmt"
"io/fs"
"sort"
"time"

"github.com/databricks/cli/cmd/root"
"github.com/databricks/cli/libs/cmdio"
"github.com/databricks/cli/libs/filer"
"github.com/spf13/cobra"
)

type jsonDirEntry struct {
Name string `json:"name"`
IsDir bool `json:"is_directory"`
Size int64 `json:"size"`
ModTime time.Time `json:"last_modified"`
}

func toJsonDirEntry(f fs.DirEntry) (*jsonDirEntry, error) {
info, err := f.Info()
if err != nil {
return nil, err
}

return &jsonDirEntry{
Name: f.Name(),
IsDir: f.IsDir(),
Size: info.Size(),
ModTime: info.ModTime(),
}, nil
}

// lsCmd represents the ls command
var lsCmd = &cobra.Command{
Use: "ls <dir-name>",
Short: "Lists files",
Long: `Lists files`,
Hidden: true,
Use: "ls DIR_PATH",
Short: "Lists files",
Long: `Lists files`,
Args: cobra.ExactArgs(1),
PreRunE: root.MustWorkspaceClient,

RunE: func(cmd *cobra.Command, args []string) error {
return fmt.Errorf("TODO")
ctx := cmd.Context()
w := root.WorkspaceClient(ctx)

path, err := resolveDbfsPath(args[0])
if err != nil {
return err
}

f, err := filer.NewDbfsClient(w, "/")
if err != nil {
return err
}

entries, err := f.ReadDir(ctx, path)
if err != nil {
return err
}

jsonDirEntries := make([]jsonDirEntry, len(entries))
for i, entry := range entries {
jsonDirEntry, err := toJsonDirEntry(entry)
if err != nil {
return err
}
jsonDirEntries[i] = *jsonDirEntry
}
sort.Slice(jsonDirEntries, func(i, j int) bool {
return jsonDirEntries[i].Name < jsonDirEntries[j].Name
})

// Use template for long mode if the flag is set
if longMode {
return cmdio.RenderWithTemplate(ctx, jsonDirEntries, cmdio.Heredoc(`
{{range .}}{{if .IsDir}}DIRECTORY {{else}}FILE {{end}}{{.Size}} {{.ModTime|pretty_date}} {{.Name}}
{{end}}
`))
}
return cmdio.RenderWithTemplate(ctx, jsonDirEntries, cmdio.Heredoc(`
{{range .}}{{.Name}}
{{end}}
`))
},
}

var longMode bool

func init() {
lsCmd.Flags().BoolVarP(&longMode, "long", "l", false, "Displays full information including size, file type and modification time since Epoch in milliseconds.")
fsCmd.AddCommand(lsCmd)
}
2 changes: 1 addition & 1 deletion internal/filer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@ func TestAccFilerWorkspaceFilesReadDir(t *testing.T) {

func temporaryDbfsDir(t *testing.T, w *databricks.WorkspaceClient) string {
ctx := context.Background()
path := fmt.Sprintf("/tmp/%s", RandomName("integration-test-filer-dbfs-"))
path := fmt.Sprintf("/tmp/%s", RandomName("integration-test-dbfs-"))

// This call fails if the path already exists.
t.Logf("mkdir dbfs:%s", path)
Expand Down
104 changes: 104 additions & 0 deletions internal/fs_ls_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
package internal

import (
"context"
"encoding/json"
"io/fs"
"path"
"regexp"
"strings"
"testing"

_ "github.com/databricks/cli/cmd/fs"
"github.com/databricks/cli/libs/filer"
"github.com/databricks/databricks-sdk-go"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

func TestFsLsForDbfs(t *testing.T) {
t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV"))

ctx := context.Background()
w, err := databricks.NewWorkspaceClient()
require.NoError(t, err)

tmpDir := temporaryDbfsDir(t, w)

f, err := filer.NewDbfsClient(w, tmpDir)
require.NoError(t, err)

err = f.Mkdir(ctx, "a")
require.NoError(t, err)
err = f.Write(ctx, "a/hello.txt", strings.NewReader("abc"), filer.CreateParentDirectories)
require.NoError(t, err)
err = f.Write(ctx, "bye.txt", strings.NewReader("def"))
require.NoError(t, err)

stdout, stderr := RequireSuccessfulRun(t, "fs", "ls", "dbfs:"+tmpDir, "--output=json")
assert.Equal(t, "", stderr.String())
var parsedStdout []map[string]any
err = json.Unmarshal(stdout.Bytes(), &parsedStdout)
require.NoError(t, err)

// assert on ls output
assert.Equal(t, "a", parsedStdout[0]["name"])
assert.Equal(t, true, parsedStdout[0]["is_directory"])
assert.Equal(t, float64(0), parsedStdout[0]["size"])
assert.Equal(t, "bye.txt", parsedStdout[1]["name"])
assert.Equal(t, false, parsedStdout[1]["is_directory"])
assert.Equal(t, float64(3), parsedStdout[1]["size"])
}

func TestFsLsForDbfsOnFile(t *testing.T) {
t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV"))

ctx := context.Background()
w, err := databricks.NewWorkspaceClient()
require.NoError(t, err)

tmpDir := temporaryDbfsDir(t, w)

f, err := filer.NewDbfsClient(w, tmpDir)
require.NoError(t, err)

err = f.Mkdir(ctx, "a")
require.NoError(t, err)
err = f.Write(ctx, "a/hello.txt", strings.NewReader("abc"), filer.CreateParentDirectories)
require.NoError(t, err)

_, _, err = RequireErrorRun(t, "fs", "ls", "dbfs:"+path.Join(tmpDir, "a", "hello.txt"), "--output=json")
assert.Regexp(t, regexp.MustCompile("not a directory: .*/a/hello.txt"), err.Error())
}

func TestFsLsForDbfsOnEmptyDir(t *testing.T) {
t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV"))

w, err := databricks.NewWorkspaceClient()
require.NoError(t, err)

tmpDir := temporaryDbfsDir(t, w)

stdout, stderr := RequireSuccessfulRun(t, "fs", "ls", "dbfs:"+tmpDir, "--output=json")
assert.Equal(t, "", stderr.String())
var parsedStdout []map[string]any
err = json.Unmarshal(stdout.Bytes(), &parsedStdout)
require.NoError(t, err)

// assert on ls output
assert.Equal(t, 0, len(parsedStdout))
}

func TestFsLsForDbfsForNonexistingDir(t *testing.T) {
t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV"))

_, _, err := RequireErrorRun(t, "fs", "ls", "dbfs:/john-cena", "--output=json")
assert.ErrorIs(t, err, fs.ErrNotExist)
}

func TestFsLsWithoutScheme(t *testing.T) {
t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV"))

_, _, err := RequireErrorRun(t, "fs", "ls", "/ray-mysterio", "--output=json")
assert.ErrorContains(t, err, "expected dbfs path (with the dbfs:/ prefix): /ray-mysterio")
}
17 changes: 9 additions & 8 deletions libs/cmdio/io.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,26 +66,27 @@ func (c *cmdIO) IsTTY() bool {
return isatty.IsTerminal(fd) || isatty.IsCygwinTerminal(fd)
}

func (c *cmdIO) Render(v any) error {
func Render(ctx context.Context, v any) error {
c := fromContext(ctx)
return RenderWithTemplate(ctx, v, c.template)
}

func RenderWithTemplate(ctx context.Context, v any, template string) error {
// TODO: add terminal width & white/dark theme detection
c := fromContext(ctx)
switch c.outputFormat {
case flags.OutputJSON:
return renderJson(c.out, v)
case flags.OutputText:
if c.template != "" {
return renderTemplate(c.out, c.template, v)
if template != "" {
return renderTemplate(c.out, template, v)
}
return renderJson(c.out, v)
default:
return fmt.Errorf("invalid output format: %s", c.outputFormat)
}
}

func Render(ctx context.Context, v any) error {
c := fromContext(ctx)
return c.Render(v)
}

type tuple struct{ Name, Id string }

func (c *cmdIO) Select(names map[string]string, label string) (id string, err error) {
Expand Down
4 changes: 4 additions & 0 deletions libs/cmdio/render.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"strings"
"text/tabwriter"
"text/template"
"time"

"github.com/fatih/color"
"github.com/nwidger/jsoncolor"
Expand Down Expand Up @@ -85,6 +86,9 @@ func renderTemplate(w io.Writer, tmpl string, v any) error {
}
return string(b), nil
},
"pretty_date": func(t time.Time) string {
return t.Format("2006-01-02T15:04:05Z")
},
}).Parse(tmpl)
if err != nil {
return err
Expand Down