-
Notifications
You must be signed in to change notification settings - Fork 14
/
Copy path1.download_dockerfiles.py
61 lines (46 loc) · 1.71 KB
/
1.download_dockerfiles.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
#!/usr/bin/env python
from sregistry.utils import write_file
from bs4 import BeautifulSoup
import html2text
import pickle
import requests
import os
# Load in the yuuge list of Docker images
containers = pickle.load(open('containers_2.pkl', 'rb'))
# Make a data folder
os.system('mkdir -p data')
start = 0
for c in range(start,len(containers)):
container = containers[c]
if '/' not in container:
continue
collection,repo = container.split('/',1)
dockerfile = None
letter_dir = os.path.join('data', collection[0])
collection_dir = os.path.join(letter_dir, collection)
output_dir = os.path.join(collection_dir, repo)
docker_file = '%s/Dockerfile' %output_dir
if os.path.exists(docker_file):
continue
# Now look for the Dockerfile
url = "https://hub.docker.com/r/%s/~/dockerfile/" %(container)
response = requests.get(url)
if response.status_code == 200:
soup = BeautifulSoup(response.text, 'html.parser')
text = soup.select("span[class^=Dockerfile]")
if len(text) > 0:
text = str(text[0]).replace('\n','<br>')
dockerfile = html2text.html2text(text)
# Check for missing dockerfile
if len(dockerfile.replace('\n','')) == 0:
dockerfile = None
# If we have something, write it!
if dockerfile is not None:
print('Result for %s!' %(container))
# Create the output directory, if doesn't exist
for outdir in [letter_dir, collection_dir, output_dir]:
if not os.path.exists(outdir):
os.mkdir(outdir)
# Dockerfile is text file
if dockerfile is not None:
write_file(docker_file, dockerfile)