Print Output In As A List
The following code runs fine. It gathers information per listing on LinkedIn. (Account info given and free to use as it is a test account) However, the output joins the data instea
Solution 1:
I can run your code,
Here is what I get, with help from Efficient way to unnest (explode) multiple list columns in a pandas DataFrame
import time
import pandas as pd
import numpy as np
from selenium import webdriver
from bs4 import BeautifulSoup
import requests
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from webdriver_manager.chrome import ChromeDriverManager
test1=[]
options = Options()
driver = webdriver.Chrome(ChromeDriverManager().install())
url = "https://www.linkedin.com/uas/login?session_redirect=https%3A%2F%2Fwww%2Elinkedin%2Ecom%2Fsearch%2Fresults%2Fpeople%2F%3FcurrentCompany%3D%255B%25221252860%2522%255D%26geoUrn%3D%255B%2522103644278%2522%255D%26keywords%3Dsales%26origin%3DFACETED_SEARCH%26page%3D2&fromSignIn=true&trk=cold_join_sign_in"
driver.get(url)
time.sleep(2)
username = driver.find_element_by_id('username')
username.send_keys('kbradons04@gmail.com')
password = driver.find_element_by_id('password')
password.send_keys('Applesauce1')
password.submit()
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(3)
elementj=(WebDriverWait(driver,10).until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR,".subline-level-2.t-12.t-black--light.t-normal.search-result__truncate"))))
place1=[j.text for j in elementj]
elementk=WebDriverWait(driver,10).until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR,".subline-level-1.t-14.t-black.t-normal.search-result__truncate")))
compan=[c.text for c in elementk]
element1 = driver.find_elements_by_class_name("actor-name")
title=[t.text for t in element1]
diction={"Location":place1,"Company":compan,"Title":title}
test1.append(diction)
print(test1)
df = pd.DataFrame(test1)
defexplode(df, lst_cols, fill_value=''):
# make sure `lst_cols` is a listif lst_cols andnotisinstance(lst_cols, list):
lst_cols = [lst_cols]
# all columns except `lst_cols`
idx_cols = df.columns.difference(lst_cols)
# calculate lengths of lists
lens = df[lst_cols[0]].str.len()
if (lens > 0).all():
# ALL lists in cells aren't emptyreturn pd.DataFrame({
col:np.repeat(df[col].values, df[lst_cols[0]].str.len())
for col in idx_cols
}).assign(**{col:np.concatenate(df[col].values) for col in lst_cols}) \
.loc[:, df.columns]
else:
# at least one list in cells is emptyreturn pd.DataFrame({
col:np.repeat(df[col].values, df[lst_cols[0]].str.len())
for col in idx_cols
}).assign(**{col:np.concatenate(df[col].values) for col in lst_cols}) \
.append(df.loc[lens==0, idx_cols]).fillna(fill_value) \
.loc[:, df.columns]
explode(df,['Location','Company','Title'])
And the result
Location Company Title
0 Dayton, Ohio Area National Account Executive LinkedIn Member1 Dayton, Ohio Area Currently seeking permanent employment LinkedIn Member2 Dayton, Ohio Area Account Manager at LexisNexis LinkedIn Member3 Greater Denver Area Currently seeking new opportunities in managem... LinkedIn Member4 Dayton, Ohio Area Advertising Sales Representative at AMOS MEDIA LinkedIn Member5 Dayton, Ohio Area Territory Manager at Huntington Outdoor, LLC LinkedIn Member6 Vandalia, Ohio, United States Cintas LinkedIn Member7 Dayton, Ohio Area Outside Sales Representative at Carter Lumber. LinkedIn Member8 Dayton, Ohio Area Actively Searching LinkedIn Member9 Corpus Christi, Texas Area Currently looking for sales position LinkedIn Member
Post a Comment for "Print Output In As A List"