Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

More extensive record-keeping during module loading/compilation #23898

Merged
merged 3 commits into from
Oct 7, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 57 additions & 18 deletions base/loading.jl
Original file line number Diff line number Diff line change
Expand Up @@ -201,20 +201,24 @@ const package_locks = Dict{Symbol,Condition}()
# Callbacks take the form (mod::Symbol) -> nothing.
# WARNING: This is an experimental feature and might change later, without deprecation.
const package_callbacks = Any[]
# to notify downstream consumers that a file has been included into a particular module
# Callbacks take the form (mod::Module, filename::String) -> nothing
# WARNING: This is an experimental feature and might change later, without deprecation.
const include_callbacks = Any[]

# used to optionally track dependencies when requiring a module:
const _concrete_dependencies = Any[] # these dependency versions are "set in stone", and the process should try to avoid invalidating them
const _require_dependencies = Any[] # a list of (path, mtime) tuples that are the file dependencies of the module currently being precompiled
const _require_dependencies = Any[] # a list of (mod, path, mtime) tuples that are the file dependencies of the module currently being precompiled
const _track_dependencies = Ref(false) # set this to true to track the list of file dependencies
function _include_dependency(_path::AbstractString)
function _include_dependency(modstring::AbstractString, _path::AbstractString)
prev = source_path(nothing)
if prev === nothing
path = abspath(_path)
else
path = joinpath(dirname(prev), _path)
end
if _track_dependencies[]
push!(_require_dependencies, (path, mtime(path)))
push!(_require_dependencies, (modstring, path, mtime(path)))
end
return path, prev
end
Expand All @@ -230,7 +234,7 @@ This is only needed if your module depends on a file that is not used via `inclu
no effect outside of compilation.
"""
function include_dependency(path::AbstractString)
_include_dependency(path)
_include_dependency("#__external__", path)
return nothing
end

Expand Down Expand Up @@ -518,7 +522,10 @@ end

include_relative(mod::Module, path::AbstractString) = include_relative(mod, String(path))
function include_relative(mod::Module, _path::String)
path, prev = _include_dependency(_path)
path, prev = _include_dependency(string(mod), _path)
for callback in include_callbacks # to preserve order, must come before Core.include
invokelatest(callback, mod, path)
end
tls = task_local_storage()
tls[:SOURCE_PATH] = path
local result
Expand Down Expand Up @@ -671,15 +678,20 @@ function parse_cache_header(f::IO)
end
totbytes = ntoh(read(f, Int64)) # total bytes for file dependencies
# read the list of files
files = Tuple{String,Float64}[]
files = Tuple{String,String,Float64}[]
while true
n = ntoh(read(f, Int32))
n == 0 && break
totbytes -= 4 + n + 8
@assert n >= 0 "EOF while reading cache header" # probably means this wasn't a valid file to be read by Base.parse_cache_header
push!(files, (String(read(f, n)), ntoh(read(f, Float64))))
end
@assert totbytes == 4 "header of cache file appears to be corrupt"
n1 = ntoh(read(f, Int32))
n1 == 0 && break
@assert n1 >= 0 "EOF while reading cache header" # probably means this wasn't a valid file to be read by Base.parse_cache_header
modname = String(read(f, n1))
n2 = ntoh(read(f, Int32))
@assert n2 >= 0 "EOF while reading cache header" # probably means this wasn't a valid file to be read by Base.parse_cache_header
filename = String(read(f, n2))
push!(files, (modname, filename, ntoh(read(f, Float64))))
totbytes -= 8 + n1 + n2 + 8
end
@assert totbytes == 12 "header of cache file appears to be corrupt"
srctextpos = ntoh(read(f, Int64))
# read the list of modules that are required to be present during loading
required_modules = Dict{Symbol,UInt64}()
while true
Expand All @@ -689,7 +701,7 @@ function parse_cache_header(f::IO)
uuid = ntoh(read(f, UInt64)) # module UUID
required_modules[sym] = uuid
end
return modules, files, required_modules
return modules, files, required_modules, srctextpos
end

function parse_cache_header(cachefile::String)
Expand All @@ -704,7 +716,7 @@ end

function cache_dependencies(f::IO)
defs, files, modules = parse_cache_header(f)
return modules, files
return modules, map(mod_fl_mt -> (mod_fl_mt[2], mod_fl_mt[3]), files) # discard the module
end

function cache_dependencies(cachefile::String)
Expand All @@ -717,6 +729,33 @@ function cache_dependencies(cachefile::String)
end
end

function read_dependency_src(io::IO, filename::AbstractString)
modules, files, required_modules, srctextpos = parse_cache_header(io)
srctextpos == 0 && error("no source-text stored in cache file")
seek(io, srctextpos)
while !eof(io)
filenamelen = ntoh(read(io, Int32))
filenamelen == 0 && break
fn = String(read(io, filenamelen))
len = ntoh(read(io, UInt64))
if fn == filename
return String(read(io, len))
end
seek(io, position(io) + len)
end
error(filename, " is not stored in the source-text cache")
end

function read_dependency_src(cachefile::String, filename::AbstractString)
io = open(cachefile, "r")
try
!isvalid_cache_header(io) && throw(ArgumentError("Invalid header in cache file $cachefile."))
return read_dependency_src(io, filename)
finally
close(io)
end
end

function stale_cachefile(modpath::String, cachefile::String)
io = open(cachefile, "r")
try
Expand Down Expand Up @@ -756,11 +795,11 @@ function stale_cachefile(modpath::String, cachefile::String)
end

# now check if this file is fresh relative to its source files
if !samefile(files[1][1], modpath)
DEBUG_LOADING[] && info("JL_DEBUG_LOADING: Rejecting cache file $cachefile because it is for file $(files[1][1])) not file $modpath.")
if !samefile(files[1][2], modpath)
DEBUG_LOADING[] && info("JL_DEBUG_LOADING: Rejecting cache file $cachefile because it is for file $(files[1][2])) not file $modpath.")
return true # cache file was compiled from a different path
end
for (f, ftime_req) in files
for (_, f, ftime_req) in files
# Issue #13606: compensate for Docker images rounding mtimes
# Issue #20837: compensate for GlusterFS truncating mtimes to microseconds
ftime = mtime(f)
Expand Down
86 changes: 72 additions & 14 deletions src/dump.c
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,12 @@ static uint64_t read_uint64(ios_t *s)
return b0 | (b1<<32);
}

static void write_int64(ios_t *s, int64_t i)
{
write_int32(s, (i>>32) & 0xffffffff);
write_int32(s, i & 0xffffffff);
}

static void write_uint16(ios_t *s, uint16_t i)
{
write_uint8(s, (i>> 8) & 0xff);
Expand Down Expand Up @@ -1030,7 +1036,7 @@ static void write_mod_list(ios_t *s, jl_array_t *a)
}

// "magic" string and version header of .ji file
static const int JI_FORMAT_VERSION = 3;
static const int JI_FORMAT_VERSION = 4;
static const char JI_MAGIC[] = "\373jli\r\n\032\n"; // based on PNG signature
static const uint16_t BOM = 0xFEFF; // byte-order marker
static void write_header(ios_t *s)
Expand Down Expand Up @@ -1065,9 +1071,10 @@ static void write_work_list(ios_t *s)

// serialize the global _require_dependencies array of pathnames that
// are include depenencies
static void write_dependency_list(ios_t *s)
static int64_t write_dependency_list(ios_t *s, jl_array_t **udepsp)
{
size_t total_size = 0;
int64_t pos = 0;
static jl_array_t *deps = NULL;
if (!deps)
deps = (jl_array_t*)jl_get_global(jl_base_module, jl_symbol("_require_dependencies"));
Expand All @@ -1080,18 +1087,19 @@ static void write_dependency_list(ios_t *s)
jl_value_t *uniqargs[2] = {unique_func, (jl_value_t*)deps};
size_t last_age = jl_get_ptls_states()->world_age;
jl_get_ptls_states()->world_age = jl_world_counter;
jl_array_t *udeps = deps && unique_func ? (jl_array_t*)jl_apply(uniqargs, 2) : NULL;
jl_array_t *udeps = (*udepsp = deps && unique_func ? (jl_array_t*)jl_apply(uniqargs, 2) : NULL);
jl_get_ptls_states()->world_age = last_age;

JL_GC_PUSH1(&udeps);
if (udeps) {
size_t l = jl_array_len(udeps);
for (size_t i=0; i < l; i++) {
jl_value_t *dep = jl_fieldref(jl_array_ptr_ref(udeps, i), 0);
size_t slen = jl_string_len(dep);
total_size += 4 + slen + 8;
dep = jl_fieldref(jl_array_ptr_ref(udeps, i), 1);
slen += jl_string_len(dep);
total_size += 8 + slen + 8;
}
total_size += 4;
total_size += 4 + 8;
}
// write the total size so that we can quickly seek past all of the
// dependencies if we don't need them
Expand All @@ -1100,15 +1108,22 @@ static void write_dependency_list(ios_t *s)
size_t l = jl_array_len(udeps);
for (size_t i=0; i < l; i++) {
jl_value_t *deptuple = jl_array_ptr_ref(udeps, i);
jl_value_t *dep = jl_fieldref(deptuple, 0);
jl_value_t *dep = jl_fieldref(deptuple, 0); // evaluating module (as string)
size_t slen = jl_string_len(dep);
write_int32(s, slen);
ios_write(s, jl_string_data(dep), slen);
write_float64(s, jl_unbox_float64(jl_fieldref(deptuple, 1)));
dep = jl_fieldref(deptuple, 1); // file abspath
slen = jl_string_len(dep);
write_int32(s, slen);
ios_write(s, jl_string_data(dep), slen);
write_float64(s, jl_unbox_float64(jl_fieldref(deptuple, 2))); // mtime
}
write_int32(s, 0); // terminator, for ease of reading
// write a dummy file position to indicate the beginning of the source-text
pos = ios_pos(s);
write_int64(s, 0);
}
JL_GC_POP();
return pos;
}

// --- deserialize ---
Expand Down Expand Up @@ -2280,18 +2295,18 @@ JL_DLLEXPORT int jl_save_incremental(const char *fname, jl_array_t *worklist)
{
char *tmpfname = strcat(strcpy((char *) alloca(strlen(fname)+8), fname), ".XXXXXX");
ios_t f;
jl_array_t *mod_array;
jl_array_t *mod_array, *udeps = NULL;
if (ios_mkstemp(&f, tmpfname) == NULL) {
jl_printf(JL_STDERR, "Cannot open cache file \"%s\" for writing.\n", tmpfname);
return 1;
}
JL_GC_PUSH1(&mod_array);
JL_GC_PUSH2(&mod_array, &udeps);
mod_array = jl_get_loaded_modules();

serializer_worklist = worklist;
write_header(&f);
write_work_list(&f);
write_dependency_list(&f);
int64_t srctextpos = write_dependency_list(&f, &udeps);
write_mod_list(&f, mod_array); // this can return errors during deserialize,
// best to keep it early (before any actual initialization)

Expand All @@ -2313,7 +2328,6 @@ JL_DLLEXPORT int jl_save_incremental(const char *fname, jl_array_t *worklist)
assert(jl_is_module(m));
jl_collect_lambdas_from_mod(lambdas, m);
}
JL_GC_POP();

jl_collect_backedges(edges);

Expand All @@ -2326,15 +2340,59 @@ JL_DLLEXPORT int jl_save_incremental(const char *fname, jl_array_t *worklist)
jl_serialize_value(&s, worklist);
jl_serialize_value(&s, lambdas);
jl_serialize_value(&s, edges);
jl_finalize_serializer(&s); // done with f
jl_finalize_serializer(&s);
serializer_worklist = NULL;

jl_gc_enable(en);
htable_reset(&edges_map, 0);
htable_reset(&backref_table, 0);
arraylist_free(&reinit_list);

// Write the source-text for the dependent files
if (udeps) {
// Go back and update the source-text position to point to the current position
int64_t posfile = ios_pos(&f);
ios_seek(&f, srctextpos);
write_int64(&f, posfile);
ios_seek_end(&f);
// Each source-text file is written as
// int32: length of abspath
// char*: abspath
// uint64: length of src text
// char*: src text
// At the end we write int32(0) as a terminal sentinel.
len = jl_array_len(udeps);
ios_t srctext;
for (i = 0; i < len; i++) {
jl_value_t *deptuple = jl_array_ptr_ref(udeps, i);
jl_value_t *dep = jl_fieldref(deptuple, 0); // module name
// Dependencies declared with `include_dependency` are excluded
// because these may not be Julia code (and could be huge)
if (strcmp(jl_string_data(dep), "#__external__") != 0) {
dep = jl_fieldref(deptuple, 1); // file abspath
ios_t *srctp = ios_file(&srctext, jl_string_data(dep), 1, 0, 0, 0);
if (!srctp) {
jl_printf(JL_STDERR, "WARNING: could not cache source text for \"%s\".\n",
jl_string_data(dep));
continue;
}
size_t slen = jl_string_len(dep);
write_int32(&f, slen);
ios_write(&f, jl_string_data(dep), slen);
posfile = ios_pos(&f);
write_uint64(&f, 0); // placeholder for length of this file in bytes
uint64_t filelen = (uint64_t) ios_copyall(&f, &srctext);
ios_close(&srctext);
ios_seek(&f, posfile);
write_uint64(&f, filelen);
ios_seek_end(&f);
}
}
}
write_int32(&f, 0); // mark the end of the source text
ios_close(&f);

JL_GC_POP();
if (jl_fs_rename(tmpfname, fname) < 0) {
jl_printf(JL_STDERR, "Cannot write cache file \"%s\".\n", fname);
return 1;
Expand Down
10 changes: 8 additions & 2 deletions test/compile.jl
Original file line number Diff line number Diff line change
Expand Up @@ -204,16 +204,22 @@ try
@test stringmime("text/plain", Base.Docs.doc(Foo.Bar.bar)) == "bar function\n"

modules, deps, required_modules = Base.parse_cache_header(cachefile)
discard_module = mod_fl_mt -> (mod_fl_mt[2], mod_fl_mt[3])
@test modules == Dict(Foo_module => Base.module_uuid(Foo))
@test map(x -> x[1], sort(deps)) == [Foo_file, joinpath(dir, "bar.jl"), joinpath(dir, "foo.jl")]
@test map(x -> x[1], sort(discard_module.(deps))) == [Foo_file, joinpath(dir, "bar.jl"), joinpath(dir, "foo.jl")]
srctxt = Base.read_dependency_src(cachefile, Foo_file)
@test !isempty(srctxt) && srctxt == read(Foo_file, String)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we add a test for a manually included dependency. If I understand one could end up parsing the list of dependent files and end up with an ("__external__", "external.so", mtime) and `Base.read_dependency_src(cachefile, "external.so") should fail.

Copy link
Member Author

@timholy timholy Oct 3, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right, but IMO that's fine. This is an internal method and consumers can make sure they don't try to read "__external__" dependencies. (Reading the src text is not something that's done unless it is requested.) Revise doesn't intend to track external dependencies, because it's hard to know what you're supposed to do about them.

But if you know of packages that are using this feature, I would be happy to go look at how they're using external dependencies; that could end up changing my mind.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Everything that would lead to a stale cachefile should probably prompt Revise to to reload that file :).
In my use cases I have stateful information from that external dependency (if it exists or not, API version, ...)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would love a link to your use cases, if they are in public repos.

It would be fine to trigger revise(module) when an external dependency updates, but it seems like it can't be robust: what if the external dependency is a single file in another package? How is Revise supposed to know whether it has to revise that package first? (In some cases it wouldn't but in others it would be necessary.)

In either case, I don't think Revise will need access to the source text of external dependencies. If, as you say, it's an .so file, Revise can't do anything with it anyway. In fact it occurs to me that we probably shouldn't store external dependencies in the src-text cache; it would be crazy to cache, say, libkde.so in a *.ji file 😄.

I will change the format so that each source-text file has its name at the beginning, so that one doesn't have to "count" from the list of dependencies while skipping over #__external__ dependencies.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was wondering if that might happen ;) yeah that would be crazy.

@test_throws ErrorException Base.read_dependency_src(cachefile, "/tmp/nonexistent.txt")
# dependencies declared with `include_dependency` should not be stored
@test_throws ErrorException Base.read_dependency_src(cachefile, joinpath(dir, "foo.jl"))

modules, deps1 = Base.cache_dependencies(cachefile)
@test modules == merge(Dict(s => Base.module_uuid(getfield(Foo, s)) for s in
[:Base, :Core, Foo2_module, FooBase_module, :Main, :Test]),
# plus modules included in the system image
Dict(s => Base.module_uuid(Base.root_module(s)) for s in
[:DelimitedFiles,:Mmap]))
@test deps == deps1
@test discard_module.(deps) == deps1

@test current_task()(0x01, 0x4000, 0x30031234) == 2
@test nothing(0x01, 0x4000, 0x30031234) == 52
Expand Down
4 changes: 4 additions & 0 deletions test/loading.jl
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,11 @@ end
@test @nested_LINE_expansion() == ((@__LINE__() - 4, @__LINE__() - 12), @__LINE__())
@test @nested_LINE_expansion2() == ((@__LINE__() - 5, @__LINE__() - 9), @__LINE__())

loaded_files = String[]
push!(Base.include_callbacks, (mod::Module, fn::String) -> push!(loaded_files, fn))
include("test_sourcepath.jl")
@test length(loaded_files) == 1 && endswith(loaded_files[1], "test_sourcepath.jl")
pop!(Base.include_callbacks)
thefname = "the fname!//\\&\1*"
include_string_test_func = include_string(@__MODULE__, "include_string_test() = @__FILE__", thefname)
@test include_string_test_func() == thefname
Expand Down