Skip to content

Commit

Permalink
Feature/support aml (#14)
Browse files Browse the repository at this point in the history
* add archieml dep

* archieml parsing and displaying

* fixed unbalanced tuple

* following python convention
  • Loading branch information
hwhong authored and hongee committed Apr 19, 2018
1 parent 63d74f9 commit b275dd5
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 7 deletions.
5 changes: 3 additions & 2 deletions assets/js/components/management/PackageView.vue
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@
Loading...
</h3>
<div v-else>
<h5>Data</h5>
<pre><code>{{this.packageData.data}}</code></pre>

<h5>Metadata</h5>
<pre><code>{{fmMetaPretty}}</code></pre>

Expand Down Expand Up @@ -76,8 +79,6 @@ code {
</style>



<script>
import {axios, utils} from "../../util";
import Vue from "vue";
Expand Down
19 changes: 19 additions & 0 deletions packages/migrations/0003_package_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Generated by Django 2.0.2 on 2018-04-18 22:24

import django.contrib.postgres.fields.jsonb
from django.db import migrations


class Migration(migrations.Migration):

dependencies = [
('packages', '0002_auto_20180304_0020'),
]

operations = [
migrations.AddField(
model_name='package',
name='data',
field=django.contrib.postgres.fields.jsonb.JSONField(blank=True, default=dict, null=True),
),
]
30 changes: 25 additions & 5 deletions packages/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import CloudFlare
import re
import requests
import archieml

S3_BUCKET = settings.S3_ASSETS_UPLOAD_BUCKET
s3 = boto3.client('s3', 'us-west-2', config=Config(s3={'addressing_style': 'path'}))
Expand All @@ -42,7 +43,8 @@ def save(self, *args, **kwargs):
def populate(self, user):
print("Starting populate for %s" % self.slug)
google = get_oauth2_session(user)
_, _, folders = list_folder(google, self)
# we don't care about the aml_data dict here
_, _, folders, _ = list_folder(google, self)
instances = []
for folder in folders:
try:
Expand Down Expand Up @@ -72,6 +74,7 @@ class Package(models.Model):
drive_folder_url = models.URLField()
metadata = JSONField(blank=True, default=dict, null=True)
images = JSONField(blank=True, default=dict, null=True)
data = JSONField(blank=True, default=dict, null=True)
processing = models.BooleanField(default=False)
cached_article_preview = models.TextField(blank=True)
publish_date = models.DateField()
Expand All @@ -90,6 +93,7 @@ def as_dict(self):
"description": self.description,
"gdrive_url": self.drive_folder_url,
"images": self.images,
"data": self.data,
"article": self.cached_article_preview,
"publish_date": self.publish_date,
"last_fetched_date": self.last_fetched_date
Expand Down Expand Up @@ -125,11 +129,12 @@ def fetch_from_gdrive(self, user):
self.save()
try:
google = get_oauth2_session(user)
text, images, _ = list_folder(google, self)
text, images, _, aml_data = list_folder(google, self)
self.cached_article_preview = text
if self.images is None:
self.images = {}
self.images["gdrive"] = images
self.data = aml_data
transfer_to_s3(google, self)
self.cached_article_preview = rewrite_image_url(self)
self.last_fetched_date = timezone.now()
Expand Down Expand Up @@ -205,11 +210,9 @@ def transfer_to_s3(session, package):
"hash": image_hash,
"s3_fields": response
}

return package



def add_to_repo_folder(session, package):
payload = {
"id": settings.REPOSITORY_FOLDER_ID
Expand Down Expand Up @@ -238,12 +241,27 @@ def list_folder(session, package):
res = session.get(PREFIX + "/v2/files", params=payload)
items = res.json()['items']
article = list(filter(lambda f: "article" in f['title'], items))
data_files = list(filter(lambda f: ".aml" in f['title'], items))
images = list(filter(img_check, items))
folders = list(filter(lambda f: f["mimeType"] == "application/vnd.google-apps.folder", items))
#print("RES:")
#print(article)
#print(images)

aml_data = {}

# adds title of article as key, and parsed data as value. Saves info to aml_data

for aml in data_files:
if aml['mimeType'] != "application/vnd.google-apps.document":
req = get_file(session, aml['id'], download=True)
text = req.content.decode('utf-8')
else:
data = session.get(PREFIX + "/v2/files/" + aml['id'] + "/export", params={"mimeType": "text/plain"})
text = data.content.decode('utf-8')
#print("IN ARCHIEML ")
aml_data[aml['title']] = archieml.loads(text)

# only taking the first one - assuming there's only one article file
if len(article) >= 1:
if article[0]['mimeType'] != "application/vnd.google-apps.document":
Expand All @@ -255,7 +273,9 @@ def list_folder(session, package):
# fix indentation for yaml
text = text.replace("\t", " ")
# this will take REALLY long.
return text, images, folders

# return everything
return text, images, folders, aml_data

def create_package(session, package, existing=False):
payload = {
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
archieml==0.3.4
astroid==1.5.3
backports.functools-lru-cache==1.4
boto3==1.4.5
Expand Down

0 comments on commit b275dd5

Please sign in to comment.