From 734d53cb11f01f13e3da6a5ecb92fd8f56981afb Mon Sep 17 00:00:00 2001 From: "Veena Ghorakavi (Emma)" Date: Sun, 20 Oct 2019 06:53:54 -0700 Subject: [PATCH] Added code for downloading files --- .DS_Store | Bin 6148 -> 6148 bytes scripts/drugInfoScript-FILEOUTPUT.py | 22 ++++++++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/.DS_Store b/.DS_Store index f7def658bf4047ac247f5a5da5ea146e303a6691..21345d6ba9d92f6e8686a897770e68ee90f32529 100644 GIT binary patch delta 136 zcmZoMXfc@J&&aVcU^g=($7UXuG{(t~Se4{c7#J8d|APUL#lXPMP|T3bP{feQP{2^a zP&~PxRh}D3Y;qaP6k$^f9R*VZqgow>YIAb~9R(9(v&piorqW32Kzhr9i}G^v^U{H) PGHxt1X57rq@s}R}FbyPb delta 154 zcmZoMXfc@J&&a+pU^g=(`(_@NG{#&$h75*c215o*23>{%hI}B_V+aD0ML>3zXHI@{ zQcivn0|SEq0|TQtkkpW%lk-_sxj!RBHWvP1 K+|17LmmdJ#CoWh3 diff --git a/scripts/drugInfoScript-FILEOUTPUT.py b/scripts/drugInfoScript-FILEOUTPUT.py index a7a2cb4..76d9b0c 100644 --- a/scripts/drugInfoScript-FILEOUTPUT.py +++ b/scripts/drugInfoScript-FILEOUTPUT.py @@ -3,6 +3,24 @@ import random import csv import pandas as pd +from selenium import webdriver +from selenium.webdriver.common.by import By +import time + +def downloadPathwayCSV(url): + driver = webdriver.Firefox() + driver.get(url) + #download_button = driver.find_element_by_class_name("btn-text") + time.sleep(20) + download_button = driver.find_element_by_xpath("//button[@id='Download']") + time.sleep(20) + download_button.click() + time.sleep(20) + save_button = driver.find_element_by_xpath("//span[.='Save']") + time.sleep(20) + save_button.click() + time.sleep(20) + driver.close() # base url for pubchem baseURL = "https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/" @@ -64,6 +82,10 @@ pathwayURL = "https://pubchem.ncbi.nlm.nih.gov/sdq/sdqagent.cgi?infmt=json&outfmt=csv&query={%22download%22:%22*%22,%22collection%22:%22pathway%22,%22where%22:{%22ands%22:[{%22cid%22:%222244%22},{%22core%22:%221%22}]},%22order%22:[%22name,asc%22],%22start%22:1,%22limit%22:10000000,%22downloadfilename%22:%22CID_" + compoundNum + "_pathway%22}" pathway = requests.get(pathwayURL).text + # This will download the code to the pathway + downloadpathwayURL = "https://pubchem.ncbi.nlm.nih.gov/compound/"+ compoundNum +"#section=Pathways&fullscreen=true" + pathwayCSV = downloadPathwayCSV(downloadPathwayURL) + pathwayData = pd.read_csv(pathwayURL) # drop duplicate names in list