Skip to content

Commit

Permalink
seperated yk and yklua setup logic.
Browse files Browse the repository at this point in the history
  • Loading branch information
nmdis1999 committed Jan 29, 2024
1 parent c173f3e commit 6acd1bb
Show file tree
Hide file tree
Showing 2 changed files with 102 additions and 77 deletions.
71 changes: 71 additions & 0 deletions ykrt/pass_finder/setup_genetic.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
import os, shutil, subprocess, sys, queue
import multiprocessing
from multiprocessing import Manager, Process, Queue

def setup_worker(curr_dir, temp_directories, base_temp_dir, yk_path, yklua, tasks):
while True:
try:
i = tasks.get(block=False)
except queue.Empty:
print(f"Closing setup worker")
break
else:
# Create a directory with the custom name
temp_dir_name = f"tmp_{i}"
temp_dir = os.path.join(base_temp_dir, temp_dir_name)
git_repo_path = os.path.join(temp_dir, "yk")
yklua_dest_path = os.path.join(temp_dir, "yklua")
yk_test_src = os.path.join(git_repo_path, "tests", "src", "lib.rs")
os.makedirs(temp_dir, exist_ok=True)

if not os.path.exists(git_repo_path):
shutil.copytree(yk_path, git_repo_path)
os.chdir(git_repo_path)

if os.path.exists(yk_test_src):
subprocess.run(f"sed -i -e 's/PRELINK_PASSES/PRELINK_PASSES_{i}/g' {yk_test_src}", shell=True)
subprocess.run(f"sed -i -e 's/POSTLINK_PASSES/POSTLINK_PASSES_{i}/g' {yk_test_src}", shell=True)
subprocess.run("cargo test", shell=True, env=os.environ)
elif os.path.exists(git_repo_path):
os.chdir(git_repo_path)
subprocess.run("cargo test", shell=True, env=os.environ)
print("..")
else:
print(f"Directory {git_repo_path} does not exist.")
sys.exit()

if not os.path.exists(yklua_dest_path):
shutil.copytree(yklua, yklua_dest_path)
yklua_src = os.path.join(yklua_dest_path, "src")
os.chdir(yklua_src)
subprocess.run(f"sed -i -e 's/PRELINK_PASSES/PRELINK_PASSES_{i}/g' Makefile", shell=True)
subprocess.run(f"sed -i -e 's/POSTLINK_PASSES/POSTLINK_PASSES_{i}/g' Makefile", shell=True)

temp_directories.extend(temp_dir)
os.chdir(curr_dir)

def setup(curr_dir, base_temp_dir, yk_path, yklua):
num_cores = multiprocessing.cpu_count() - 1
directories = []
# for i in range(num_cores):
with Manager() as manager:
temp_directories = manager.list()
tasks = Queue()
processes = []

#TODO: on bencher9 change num_cores to num_cores * 2
for i in range(num_cores):
tasks.put(i)

for i in range(num_cores):
p = Process(target=setup_worker, args=(curr_dir, temp_directories, base_temp_dir, yk_path, yklua, tasks))
processes.append(p)
os.system(f"taskset -p -c {i} {p.pid}")
p.start()

for p in processes:
p.join()

directories = [dir for dir in temp_directories]

return directories
108 changes: 31 additions & 77 deletions ykrt/pass_finder/try_passes.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from dataclasses import dataclass
from multiprocessing import Manager, Process, Queue, Value
import cargo_run
import setup_genetic
import multiprocessing

RED = '\033[91m'
Expand Down Expand Up @@ -128,37 +129,37 @@ def get_all_passes(is_prelink):
return passes

def test_pipeline(logf, pl, id, yk_path, yklua_path):
"""
The function sets PRELINK and POSTLINK env variable
and then run test oracle.
"""
sys.stdout.write(str(pl) + "...")
sys.stdout.flush()

# log(logf, "\n\n" + str(pl) + "\n")
log(logf, "\n\n" + str(pl) + "\n")

# Make sure we don't run empty strings in pipeline.
assert (len(pl.pre_link) != 0 or len(pl.link_time) != 0), "Both prelink and postlink passes cannot be empty!!!"

env = os.environ.copy() # Create a copy of the environment
env[f"PRELINK_PASSES_{id}"] = ",".join([p.name for p in pl.pre_link])
env[f"POSTLINK_PASSES_{id}"] = ",".join([p.name for p in pl.link_time])

# prelink_passes = env.get(f'PRELINK_PASSES_{id}', 'Not Set')
# env[f"POSTLINK_PASSES_{id}"] = ",".join([p.name for p in pl.link_time])

# print(f"{PURPLE}id: {id} prelink: {env[f'PRELINK_PASSES_{id}']}{RESET}")
ret, time = cargo_run.run_test(id, yk_path, yklua_path, env=env)
print(f"{PURPLE}ret code for cargo run is {ret}{RESET}")
if ret == 0:
print(" [OK]")
log(logf, str(pl) + ": OK\n")
return True, time
else:
# log(logf, str(pl) + " : FAILED\n")
log(logf, str(pl) + " : FAILED\n")
print(" [FAIL]")
return False, None

def list_of_passes_to_str(passes):
return ",".join([str(p) for p in passes])


def evaluate_fitness(glogf, is_prelink, tasks, passes, tmpdir, id, cwd, fitness_scores, processing, run):
def evaluate_fitness(glogf, is_prelink, tasks, passes, tmpdir, id, fitness_scores, processing, run):
"""
Returns fitness score based on whether the passes list
successfully builds the pipeline and runs all the tests.
Expand All @@ -182,7 +183,6 @@ def evaluate_fitness(glogf, is_prelink, tasks, passes, tmpdir, id, cwd, fitness_
processing.value += 1
print(f"{PURPLE}in else{RESET}")
try_passes = [passes[i] for (i, bit) in enumerate(entity[1]) if bit]
# log(glogf, f"\ncurrently evaluating {try_passes}\n")
config = get_pipeline_config(is_prelink, try_passes)
ret, exec_time = test_pipeline(glogf, config, id, yk_path, yklua_path)
if ret:
Expand All @@ -199,16 +199,21 @@ def evaluate_fitness(glogf, is_prelink, tasks, passes, tmpdir, id, cwd, fitness_
return True

def tournament(population, fitness, sp):
"""
Selects a parent from the given population using the tournament selection method.
This function implements tournament selection for genetic algorithms.
Two individuals are randomly selected from the population. The one with the better fitness
(lower fitness value) is chosen as the parent with a probability of `sp` (selection probability).
If not chosen based on `sp`, the other individual is selected as the parent.
"""
population_ = [(i, population[i]) for i in range(len(population))]
parent1 = random.choices(population_, weights=[1]*len(population), k=1)[0]
parent2 = random.choices(population_, weights=[1]*len(population), k=1)[0]

while parent1 == parent2:
parent2 = random.choices(population_, weights=[1]*len(population), k=1)[0]
# while fitness[parent2[0]] == float('inf'):
# parent2 = random.choices(population_, weights=[1]*len(population), k=1)[0]
# while fitness[parent1[0]] == float('inf'):
# parent1 = random.choices(population_, weights=[1]*len(population), k=1)[0]


if random.random() > sp:
parent = parent1[1] if fitness[parent1[0]] < fitness[parent2[0]] else parent2[1]
else:
Expand Down Expand Up @@ -242,7 +247,7 @@ def mutate(entity, mutation_rate):
return mutated_entity

def genetic_algorithm(glogf, is_prelink, population_size, mutation_rate,
generations, target_fitness, passes, tmpdirs, ncpu, cwd):
generations, target_fitness, passes, tmpdirs, ncpu):
"""
Executes a genetic algorithm for finding optimization passes. It randomly generates
a population, evolves it through crossover and mutation, and selects entities based on
Expand Down Expand Up @@ -274,35 +279,33 @@ def genetic_algorithm(glogf, is_prelink, population_size, mutation_rate,
# TODO: make tmpdir = tmpdirs[w/2] to assign 2 threads to every core (for bencher9)
print(f"{PURPLE}w is {w}{RESET}")
tmpdir = tmpdirs[w]
# def evaluate_fitness(glogf, is_prelink, tasks, passes, tmpdir, id, cwd, fitness_scores, processing, run):
id = w
p = Process(target=evaluate_fitness, args=(glogf, is_prelink,
tasks, passes,
tmpdir, id, cwd, fitness_scores, processing, run))
tmpdir, id, fitness_scores, processing, run))
print(f"{RED}process: {p}{RESET}")
processes.append(p)
# This command make sures single process runs on 1 core
os.system(f"taskset -p -c {w} {p.pid}") # TODO: change w to w/2 to pin 2 processes to 1 core on bencher9
p.start()

print(f"{GREEN}processes list: {processes}{RESET}")

# Time to let at least one thread to start processing
time.sleep(5)
while processing.value > 0:
time.sleep(5)
print(f"{GREEN}processing.value: {processing.value}{RESET}")

# close workers
run.value = 0

fitness_scores.sort() # sort by entity id
print(f"{PURPLE}fitness_scores: {fitness_scores}{RESET}")
fitness = [score[1] for score in fitness_scores]

log(glogf, f"\nfitness score: {fitness}\n")
log(glogf, "=========================================================")

# A lower execution time is better
# wt = [(1/t) for t in fitness]
print(f"{PURPLE}{fitness}{RESET}")

if any(y <= target_fitness for y in fitness):
print(f"Target fitness reached in generation {generation + 1}!")
Expand All @@ -313,17 +316,8 @@ def genetic_algorithm(glogf, is_prelink, population_size, mutation_rate,
if fitness[i] < float('inf'):
elites.append(population[i])

# Select parents for reproduction (roulette wheel selection)
parents = []
# for _ in range(population_size // 2):
# parent1 = random.choices(population, weights=wt, k=1)[0]
# parent2 = random.choices(population, weights=wt, k=1)[0]
# while parent2 == parent1:
# parent2 = random.choices(population, weights=wt, k=1)[0]
# parents.append((parent1, parent2))

# ranked_population = [x for _, x in sorted(zip(fitness, population))]
sp = 0.1 # selection probability
sp = 0.1

for i in range((len(population) - len(elites)) // 2):
parent1 = tournament(population, fitness, sp)
Expand All @@ -350,58 +344,18 @@ def genetic_algorithm(glogf, is_prelink, population_size, mutation_rate,
print(f"{PURPLE}{best_entity}{RESET}")
return best_entity

def main(glogf, is_prelink, yk_path, yklua, cwd):
def main(glogf, is_prelink, yk_path, yklua, cwd, base_temp_dir):
# Sanity check, test script should work with no extra passes.
# assert(test_pipeline(logf, PipelineConfig([], [])))

# cargo_manifest_path = os.path.join(cwd, "Cargo.toml")
# subprocess.run(f"cargo test --manifest-path {cargo_manifest_path}", shell=True, env=os.environ)

# TODO: Make this remove -1 and double this for bencher9
num_cores = multiprocessing.cpu_count() - 1
temp_directories = []

base_temp_dir = "/home/shreei/tmp" # Set base directory to /tmp
curr_dir = os.getcwd()
temp_directories = setup_genetic.setup(curr_dir, base_temp_dir, yk_path, yklua)
passes = get_all_passes(is_prelink)

for i in range(num_cores):
# Create a directory with the custom name
temp_dir_name = f"tmp_{i}"
temp_dir = os.path.join(base_temp_dir, temp_dir_name)
git_repo_path = os.path.join(temp_dir, "yk")
yklua_dest_path = os.path.join(temp_dir, "yklua")
yk_test_src = os.path.join(git_repo_path, "tests", "src", "lib.rs")
os.makedirs(temp_dir, exist_ok=True)

if not os.path.exists(git_repo_path):
shutil.copytree(yk_path, git_repo_path)
os.chdir(git_repo_path)

if os.path.exists(yk_test_src):
subprocess.run(f"sed -i -e 's/PRELINK_PASSES/PRELINK_PASSES_{i}/g' {yk_test_src}", shell=True)

# subprocess.run("cargo build", shell=True, env=os.environ)
# subprocess.run("cargo test", shell=True, env=os.environ)
elif os.path.exists(git_repo_path):
# os.chdir(git_repo_path)
# subprocess.run("cargo test", shell=True, env=os.environ)
print(f"...")
else:
print(f"Directory {git_repo_path} does not exist.")
sys.exit()


# Only copy yklua if it doesn't exist in the temp directory
if not os.path.exists(yklua_dest_path):
shutil.copytree(yklua, yklua_dest_path)
yklua_src = os.path.join(yklua_dest_path, "src")
os.chdir(yklua_src)
subprocess.run(f"sed -i -e 's/PRELINK_PASSES/PRELINK_PASSES_{i}/g' Makefile", shell=True)

temp_directories.append(temp_dir)
os.chdir(curr_dir)

passes = get_all_passes(is_prelink)
#FIXME: choose a better value for target fitness
# currently choosing 0 secs, so the benchmark converges
target_fitness = 0.0
Expand All @@ -414,7 +368,6 @@ def main(glogf, is_prelink, yk_path, yklua, cwd):
passes = passes,
tmpdirs = temp_directories,
ncpu = num_cores,
cwd = cwd,
)

final_passes = [passes[i].name for (i, bit) in enumerate(best_entity) if bit]
Expand All @@ -435,10 +388,11 @@ def main(glogf, is_prelink, yk_path, yklua, cwd):
group = parser.add_mutually_exclusive_group(required=True)
group.add_argument('-lto', action='store_true', help='Set flag for LTO.')
group.add_argument('-prelink', action='store_true', help='Set flag for Prelink.')

parser.add_argument("base_dir", type=str)
args = parser.parse_args()

is_prelink = args.prelink
base_dir = args.base_dir

if not is_prelink and not args.lto:
print("Flag invalid! Please provide a valid flag: -lto or -prelink")
Expand All @@ -458,4 +412,4 @@ def main(glogf, is_prelink, yk_path, yklua, cwd):
print(f"PATH to interpreter: {CWD}")

with open(genetic_log_path, "w+") as glogf:
main(glogf, is_prelink, yk_path, yklua_path, CWD)
main(glogf, is_prelink, yk_path, yklua_path, CWD, base_dir)

0 comments on commit 6acd1bb

Please sign in to comment.