Skip to content
This repository has been archived by the owner on Apr 17, 2021. It is now read-only.

Commit

Permalink
adding in linkedin mined data (#9)
Browse files Browse the repository at this point in the history
  • Loading branch information
kimmoylan authored Mar 24, 2018
1 parent ea37eda commit b2ac78a
Show file tree
Hide file tree
Showing 2 changed files with 153 additions and 0 deletions.
2 changes: 2 additions & 0 deletions data/linkedin.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[{"_id":"5ab64cebdb994b4fdc35ecda","about":"Final year Computer Science student at Queens University Belfast. Currently working on a data\n \n collection and analysis system for big data integration as my dissertation project. Interested in cyber ...\n \n\n...\n\n\n\n \n \n Show more\n \n \n Show more of Dermot’s summary","name":"Dermot McAteer","url":"https://www.linkedin.com/in/dermot-mcateer/","experience":[{"company_link":"/company/4850002/","description":"Mustard is a SaaS enabled hiring marketplace for elite software engineers.As part of the Mustard business intelligence team I am currently developing a large scale data collation and analysis system to extract information from multiple online resources. Technologies include Python, JavaScript, AWS and MongoDB.","title":"Growth Engineer","date_range":"May 2017 – Present","location":"County Dublin, Ireland","company_name":"Mustard.ie"},{"company_link":"/company/4850002/","description":"Mustard is an invite only web platform, where Ireland's top companies deliver next-level job offers directly to a real-time list of the country's leading software engineers, pre-screened for experience, technical ability and readiness to move. Worked as part of the product team developing the Mustard web app. Day to day technologies included: AngularJS, CoffeeScript, Ruby on Rails, Python, MongoDB, AWS and Linux","title":"Technical Intern (Placement)","date_range":"Mar 2017 – May 2017","location":"Dublin, Ireland","company_name":"Mustard.ie"},{"company_link":"/company/10620744/","description":"Webio is an artificially intelligent conversational interface used for streamlining inbound and outbound customer engagement over channels such as SMS, What's app, Facebook messenger and Viber. I mainly focused on the frontend of the Webio web app. Day to day duties included fixing bugs, developing new features based on live user feedback, making requests and handling responses from a RESTful API. Technologies used: ReactJS, JSX, Java and SQL.","title":"Software Developer (Placement)","date_range":"Sep 2016 – Mar 2017","location":"Dublin, Ireland","company_name":"Webio Ltd"},{"company_link":"/company/1283/","description":"Worked as a cyber security intern at Infosys HQ in Bangalore. Here I carried out white hat hacking research on spear phishing attacks within large enterprises.I built web scraping scripts to retrieve publicly accessible information on high level employees from multiple websites using selenium for python. From the gathered information I identified possible targets and developed possible spear phishing emails using the social engineering toolkit with Kali Linux.","title":"Cyber Security Researcher (InStep Global Internship)","date_range":"Jun 2016 – Aug 2016","location":"Bangalore, India","company_name":"Infosys"},{"company_link":"/company/926365/","description":"Was part of a small design team of four designers/developers working on designing printed and digital publications for use within the college.I developed HTML e-zines for Microsoft Outlook, developed mock up UI designs for the new Belfast Met website and designed printed and digital documents using Adobe Creative Suite.I assisted with social media campaigns and used a range of analytic tools such as Tweet Deck,Hoot Suite and Sprout Social to view and keep track and analyse the social media coverage.","title":"Graphic and Web Designer","date_range":"Jan 2014 – Jun 2014","location":"Belfast, United Kingdom","company_name":"Belfast Metropolitan College"}],"headline":"Growth Engineer @ Mustard.ie","location":"Ireland","skills":["Python","JavaScript","Java","Node.js","React.js","SQL","Git","Adobe Creative Suite","HTML","C#","Selenium","CSS","MongoDB","WordPress","regex","Web Scraping","Vagrant","Flask","NoSQL","RDBMS"],"education":[{"dates":"2014 – 2018","title":"Bachelor of Engineering (B.Eng.) Computer Science","institution_name":"Queen's University Belfast","degree":"Bachelor of Engineering (B.Eng.)","faculty":"Computer Science"},{"dates":"2012 – 2014","title":"Foundation Degree Interactive Media","institution_name":"Belfast Metropolitan College","degree":"Foundation Degree","faculty":"Interactive Media"},{"dates":"2005 – 2012","title":"Student","institution_name":"Lagan College Belfast","degree":"","faculty":"Secondary Education"}],"email":"[email protected]"},
{"_id":"3ab64cebdb994b4fdc35ecda","about":"Final year Computer Science student at Queens University Belfast. Currently working on a data\n \n collection and analysis system for big data integration as my dissertation project. Interested in cyber ...\n \n\n...\n\n\n\n \n \n Show more\n \n \n Show more of Dermot’s summary","name":"Dermot McAteer","url":"https://www.linkedin.com/in/dermot-mcateer/","experience":[{"company_link":"/company/4850002/","description":"Mustard is a SaaS enabled hiring marketplace for elite software engineers.As part of the Mustard business intelligence team I am currently developing a large scale data collation and analysis system to extract information from multiple online resources. Technologies include Python, JavaScript, AWS and MongoDB.","title":"Growth Engineer","date_range":"May 2017 – Present","location":"County Dublin, Ireland","company_name":"Mustard.ie"},{"company_link":"/company/4850002/","description":"Mustard is an invite only web platform, where Ireland's top companies deliver next-level job offers directly to a real-time list of the country's leading software engineers, pre-screened for experience, technical ability and readiness to move. Worked as part of the product team developing the Mustard web app. Day to day technologies included: AngularJS, CoffeeScript, Ruby on Rails, Python, MongoDB, AWS and Linux","title":"Technical Intern (Placement)","date_range":"Mar 2017 – May 2017","location":"Dublin, Ireland","company_name":"Mustard.ie"},{"company_link":"/company/10620744/","description":"Webio is an artificially intelligent conversational interface used for streamlining inbound and outbound customer engagement over channels such as SMS, What's app, Facebook messenger and Viber. I mainly focused on the frontend of the Webio web app. Day to day duties included fixing bugs, developing new features based on live user feedback, making requests and handling responses from a RESTful API. Technologies used: ReactJS, JSX, Java and SQL.","title":"Software Developer (Placement)","date_range":"Sep 2016 – Mar 2017","location":"Dublin, Ireland","company_name":"Webio Ltd"},{"company_link":"/company/1283/","description":"Worked as a cyber security intern at Infosys HQ in Bangalore. Here I carried out white hat hacking research on spear phishing attacks within large enterprises.I built web scraping scripts to retrieve publicly accessible information on high level employees from multiple websites using selenium for python. From the gathered information I identified possible targets and developed possible spear phishing emails using the social engineering toolkit with Kali Linux.","title":"Cyber Security Researcher (InStep Global Internship)","date_range":"Jun 2016 – Aug 2016","location":"Bangalore, India","company_name":"Infosys"},{"company_link":"/company/926365/","description":"Was part of a small design team of four designers/developers working on designing printed and digital publications for use within the college.I developed HTML e-zines for Microsoft Outlook, developed mock up UI designs for the new Belfast Met website and designed printed and digital documents using Adobe Creative Suite.I assisted with social media campaigns and used a range of analytic tools such as Tweet Deck,Hoot Suite and Sprout Social to view and keep track and analyse the social media coverage.","title":"Graphic and Web Designer","date_range":"Jan 2014 – Jun 2014","location":"Belfast, United Kingdom","company_name":"Belfast Metropolitan College"}],"headline":"Growth Engineer @ Mustard.ie","location":"Ireland","skills":["Python","JavaScript","Java","Node.js","React.js","SQL","Git","Adobe Creative Suite","HTML","C#","Selenium","CSS","MongoDB","WordPress","regex","Web Scraping","Vagrant","Flask","NoSQL","RDBMS"],"education":[{"dates":"2014 – 2018","title":"Bachelor of Engineering (B.Eng.) Computer Science","institution_name":"Queen's University Belfast","degree":"Bachelor of Engineering (B.Eng.)","faculty":"Computer Science"},{"dates":"2012 – 2014","title":"Foundation Degree Interactive Media","institution_name":"Belfast Metropolitan College","degree":"Foundation Degree","faculty":"Interactive Media"},{"dates":"2005 – 2012","title":"Student","institution_name":"Lagan College Belfast","degree":"","faculty":"Secondary Education"}],"email":"[email protected]"}]
151 changes: 151 additions & 0 deletions data/linkedin_ml.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 33,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"23\n",
"js 0.392232270276\n",
"adobe 0.196116135138\n",
"creative 0.196116135138\n",
"css 0.196116135138\n",
"flask 0.196116135138\n",
"git 0.196116135138\n",
"html 0.196116135138\n",
"java 0.196116135138\n",
"javascript 0.196116135138\n",
"mongodb 0.196116135138\n",
"node 0.196116135138\n",
"nosql 0.196116135138\n",
"python 0.196116135138\n",
"rdbms 0.196116135138\n",
"react 0.196116135138\n",
"regex 0.196116135138\n",
"scraping 0.196116135138\n",
"selenium 0.196116135138\n",
"sql 0.196116135138\n",
"suite 0.196116135138\n",
"Document 0\n",
"Document 1\n"
]
}
],
"source": [
"import json\n",
"import pandas as pd\n",
"from pprint import pprint\n",
"from collections import Counter\n",
"from sklearn.feature_extraction.text import TfidfVectorizer\n",
"import csv\n",
"\n",
"with open('linkedin.json') as data_file: \n",
" data = json.load(data_file)\n",
"df = pd.DataFrame(data)\n",
"#df.head()\n",
"\n",
"#df['_id'] = df['_id'].astype('|S')\n",
"#df.dtypes\n",
"#pprint(data)\n",
"#skills=df['skills'].astype(str)\n",
"\n",
"Counter(\" \".join(skills).split(\" \")).items()\n",
"#[('Someone', 1), ('ft.jgt', 1), ('My', 1), ('is', 2), ('to', 1), ('going', 1), ('place', 1), ('my', 1), ('nickname', 1)]\n",
"\n",
"#initialise Vectorisation\n",
"tf = TfidfVectorizer(analyzer='word', ngram_range=(1,1), min_df = 0, stop_words = 'english')\n",
"\n",
"#get number of words to analyse creates matix of all words\n",
"tfidf_matrix = tf.fit_transform(skills)\n",
"feature_skills = tf.get_feature_names() \n",
"print len(feature_skills)\n",
"\n",
"#condense matrix to list\n",
"dense = tfidf_matrix.todense()\n",
"len(dense[0].tolist()[0])\n",
"\n",
"#prints out scores for where word appears\n",
"linkedin_results = dense[0].tolist()[0]\n",
"skill_scores = [pair for pair in zip(range(0, len(linkedin_results)), linkedin_results) if pair[1] > 0]\n",
"\n",
"len(skill_scores)\n",
"\n",
"#lookup for the top 20 scores\n",
"sorted_skill_scores = sorted(skill_scores, key=lambda t: t[1] * -1)\n",
"for skill, score in [(feature_skills[word_id], score) for (word_id, score) in sorted_skill_scores][:20]:\n",
" print('{0: <20} {1}'.format(skill, score))\n",
"\n",
"#\n",
"\n",
"\n",
"with open(\"C:/Users/Kim/Desktop/NISRA_to_share/Data/tfidf_scikit.csv\", \"w\") as file:\n",
" writer = csv.writer(file, delimiter=\",\")\n",
" writer.writerow([ \"Skill\", \"Score\"])\n",
"\n",
" doc_id = 0\n",
" for doc in tfidf_matrix.todense():\n",
" print \"Document %d\" %(doc_id)\n",
" word_id = 0\n",
" for score in doc.tolist()[0]:\n",
" if score > 0:\n",
" word = feature_skills[word_id]\n",
" writer.writerow([doc_id+1, word.encode(\"utf-8\"), score])\n",
" word_id +=1\n",
" doc_id +=1\n",
" \n",
"#obj = TfidfVectorizer()\n",
"#X = obj.fit_transform(skills)\n",
"#print X\n",
"\n",
"#print X.__dict__\n",
" \n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"anaconda-cloud": {},
"kernelspec": {
"display_name": "Python [default]",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.12"
}
},
"nbformat": 4,
"nbformat_minor": 1
}

0 comments on commit b2ac78a

Please sign in to comment.