# strike.place
from scrapingant_client import ScrapingAntClient
from bs4 import BeautifulSoup
import pandas as pd
token = 'get-ur-own-4-free-at-scrapingant.com'
client = ScrapingAntClient(token=token)
urls = [
'https://www.nba.com/stats/teams/four-factors',
'https://www.nba.com/stats/teams/isolation?TypeGrouping=offensive',
'https://www.nba.com/stats/teams/transition?TypeGrouping=offensive',
'https://www.nba.com/stats/teams/roll-man?TypeGrouping=offensive',
'https://www.nba.com/stats/teams/playtype-post-up?TypeGrouping=offensive',
'https://www.nba.com/stats/teams/spot-up?TypeGrouping=offensive',
'https://www.nba.com/stats/teams/hand-off?TypeGrouping=offensive',
'https://www.nba.com/stats/teams/cut?TypeGrouping=offensive',
'https://www.nba.com/stats/teams/off-screen?TypeGrouping=offensive',
'https://www.nba.com/stats/teams/putbacks?TypeGrouping=offensive'
]
# Dictionary to hold data frames
data_frames = {}
for url in urls:
response = client.general_request(url)
content = response.content
soup = BeautifulSoup(content, 'html.parser')
table = soup.find('table', {'class': 'Crom_table__p1iZz'})
df = pd.read_html(str(table))[0]
data_frames[url] = df
# Now you have a dictionary where each URL is a key and the corresponding DataFrame is the value
from scrapingant_client import ScrapingAntClient
from bs4 import BeautifulSoup
import pandas as pd
token = 'get-ur-own-4-free-at-scrapingant.com'
client = ScrapingAntClient(token=token)
urls = [
'https://www.nba.com/stats/teams/isolation?TypeGrouping=defensive',
'https://www.nba.com/stats/teams/transition?TypeGrouping=defensive',
'https://www.nba.com/stats/teams/roll-man?TypeGrouping=defensive',
'https://www.nba.com/stats/teams/playtype-post-up?TypeGrouping=defensive',
'https://www.nba.com/stats/teams/spot-up?TypeGrouping=defensive',
'https://www.nba.com/stats/teams/hand-off?TypeGrouping=defensive',
'https://www.nba.com/stats/teams/cut?TypeGrouping=defensive',
'https://www.nba.com/stats/teams/off-screen?TypeGrouping=defensive',
'https://www.nba.com/stats/teams/putbacks?TypeGrouping=defensive'
]
# Dictionary to hold data frames
data_frames = {}
for url in urls:
response = client.general_request(url)
content = response.content
soup = BeautifulSoup(content, 'html.parser')
table = soup.find('table', {'class': 'Crom_table__p1iZz'})
df = pd.read_html(str(table))[0]
data_frames[url] = df
# Now you have a dictionary where each URL is a key and the corresponding DataFrame is the value
from scrapingant_client import ScrapingAntClient
from bs4 import BeautifulSoup
import pandas as pd
token = 'get-ur-own-4-free-at-scrapingant.com'
client = ScrapingAntClient(token=token)
urls = [
'https://www.nba.com/stats/teams/drives',
'https://www.nba.com/stats/teams/defensive-impact',
'https://www.nba.com/stats/teams/catch-shoot',
'https://www.nba.com/stats/teams/passing',
'https://www.nba.com/stats/teams/touches',
'https://www.nba.com/stats/teams/pullup',
'https://www.nba.com/stats/teams/rebounding',
'https://www.nba.com/stats/teams/offensive-rebounding',
'https://www.nba.com/stats/teams/defensive-rebounding',
'https://www.nba.com/stats/teams/shooting-efficiency'
]
# Dictionary to hold data frames
data_frames = {}
for url in urls:
response = client.general_request(url)
content = response.content
soup = BeautifulSoup(content, 'html.parser')
table = soup.find('table', {'class': 'Crom_table__p1iZz'})
df = pd.read_html(str(table))[0]
data_frames[url] = df
# Now you have a dictionary where each URL is a key and the corresponding DataFrame is the value
from scrapingant_client import ScrapingAntClient
from bs4 import BeautifulSoup
import pandas as pd
token = 'get-ur-own-4-free-at-scrapingant.com'
client = ScrapingAntClient(token=token)
urls = [
'https://www.nba.com/stats/teams/speed-distance',
'https://www.nba.com/stats/teams/elbow-touch',
'https://www.nba.com/stats/teams/tracking-post-ups',
'https://www.nba.com/stats/teams/paint-touch',
'https://www.nba.com/stats/teams/boxscores-traditional',
'https://www.nba.com/stats/teams/boxscores-advanced',
'https://www.nba.com/stats/teams/boxscores-four-factors',
'https://www.nba.com/stats/teams/opponent-shooting',
'https://www.nba.com/stats/teams/opponent-shots-general'
]
# Dictionary to hold data frames
data_frames = {}
for url in urls:
response = client.general_request(url)
content = response.content
soup = BeautifulSoup(content, 'html.parser')
table = soup.find('table', {'class': 'Crom_table__p1iZz'})
df = pd.read_html(str(table))[0]
data_frames[url] = df
# Now you have a dictionary where each URL is a key and the corresponding DataFrame is the value
from scrapingant_client import ScrapingAntClient
from bs4 import BeautifulSoup
import pandas as pd
token = 'get-ur-own-4-free-at-scrapingant.com'
client = ScrapingAntClient(token=token)
urls = [
'https://www.nba.com/stats/teams/opponent-shots-shotclock?ShotClockRange=24-22',
'https://www.nba.com/stats/teams/opponent-shots-shotclock?ShotClockRange=22-18+Very+Early',
'https://www.nba.com/stats/teams/opponent-shots-shotclock?ShotClockRange=18-15+Early',
'https://www.nba.com/stats/teams/opponent-shots-shotclock?ShotClockRange=15-7+Average',
'https://www.nba.com/stats/teams/opponent-shots-shotclock?ShotClockRange=7-4+Late'
'https://www.nba.com/stats/teams/opponent-shots-shotclock?ShotClockRange=4-0+Very+Late'
'https://www.nba.com/stats/teams/opponent-shots-dribbles?DribbleRange=0+Dribbles',
'https://www.nba.com/stats/teams/opponent-shots-dribbles?DribbleRange=1+Dribbles',
'https://www.nba.com/stats/teams/opponent-shots-dribbles?DribbleRange=2+Dribbles',
'https://www.nba.com/stats/teams/opponent-shots-dribbles?DribbleRange=3-6+Dribbles',
'https://www.nba.com/stats/teams/opponent-shots-dribbles?DribbleRange=7%2B+Dribbles',
]
# Dictionary to hold data frames
data_frames = {}
for url in urls:
response = client.general_request(url)
content = response.content
soup = BeautifulSoup(content, 'html.parser')
table = soup.find('table', {'class': 'Crom_table__p1iZz'})
df = pd.read_html(str(table))[0]
data_frames[url] = df
# Now you have a dictionary where each URL is a key and the corresponding DataFrame is the value
from scrapingant_client import ScrapingAntClient
from bs4 import BeautifulSoup
import pandas as pd
token = 'get-ur-own-4-free-at-scrapingant.com'
client = ScrapingAntClient(token=token)
urls = [
'https://www.nba.com/stats/teams/opponent-shots-closest-defender?CloseDefDistRange=0-2+Feet+-+Very+Tight',
'https://www.nba.com/stats/teams/opponent-shots-closest-defender?CloseDefDistRange=2-4+Feet+-+Tight',
'https://www.nba.com/stats/teams/opponent-shots-closest-defender?CloseDefDistRange=4-6+Feet+-+Open',
'https://www.nba.com/stats/teams/opponent-shots-closest-defender?CloseDefDistRange=6%2B+Feet+-+Wide+Open',
'https://www.nba.com/stats/teams/shooting',
'https://www.nba.com/stats/teams/shots-general',
'https://www.nba.com/stats/teams/shots-shotclock?ShotClockRange=24-22',
'https://www.nba.com/stats/teams/shots-shotclock?ShotClockRange=22-18+Very+Early',
'https://www.nba.com/stats/teams/shots-shotclock?ShotClockRange=18-15+Early',
'https://www.nba.com/stats/teams/shots-shotclock?ShotClockRange=15-7+Average',
'https://www.nba.com/stats/teams/shots-shotclock?ShotClockRange=7-4+Late'
'https://www.nba.com/stats/teams/shots-shotclock?ShotClockRange=4-0+Very+Late'
'https://www.nba.com/stats/teams/shots-dribbles?DribbleRange=0+Dribbles',
'https://www.nba.com/stats/teams/shots-dribbles?DribbleRange=1+Dribbles',
'https://www.nba.com/stats/teams/shots-dribbles?DribbleRange=2+Dribbles',
'https://www.nba.com/stats/teams/shots-dribbles?DribbleRange=3-6+Dribbles',
'https://www.nba.com/stats/teams/shots-dribbles?DribbleRange=7%2B+Dribbles'
]
# Dictionary to hold data frames
data_frames = {}
for url in urls:
response = client.general_request(url)
content = response.content
soup = BeautifulSoup(content, 'html.parser')
table = soup.find('table', {'class': 'Crom_table__p1iZz'})
df = pd.read_html(str(table))[0]
data_frames[url] = df
# Now you have a dictionary where each URL is a key and the corresponding DataFrame is the value
from scrapingant_client import ScrapingAntClient
from bs4 import BeautifulSoup
import pandas as pd
import os
token = 'get-ur-own-4-free-at-scrapingant.com'
client = ScrapingAntClient(token=token)
urls = [
'https://www.nba.com/stats/teams/shots-closest-defender?CloseDefDistRange=0-2+Feet+-+Very+Tight',
'https://www.nba.com/stats/teams/shots-closest-defender?CloseDefDistRange=2-4+Feet+-+Tight',
'https://www.nba.com/stats/teams/shots-closest-defender?CloseDefDistRange=4-6+Feet+-+Open',
'https://www.nba.com/stats/teams/shots-closest-defender?CloseDefDistRange=6%2B+Feet+-+Wide+Open',
'https://www.nba.com/stats/lineups/four-factors?slug=four-factors&GroupQuantity=5',
'https://www.nba.com/stats/lineups/four-factors?slug=four-factors&GroupQuantity=4',
'https://www.nba.com/stats/lineups/four-factors?slug=four-factors&GroupQuantity=3',
'https://www.nba.com/stats/lineups/four-factors?slug=four-factors&GroupQuantity=2',
]
# Create directory if it doesn't exist
directory = 'users/lougoat/desktop/strike/nbabase/stats/'
if not os.path.exists(directory):
os.makedirs(directory)
for url in urls:
response = client.general_request(url)
content = response.content
soup = BeautifulSoup(content, 'html.parser')
table = soup.find('table', {'class': 'Crom_table__p1iZz'})
df = pd.read_html(str(table))[0]
# Create a valid filename from the URL
filename = url.split('/')[-1].replace('?', '&') + '.csv'
filepath = os.path.join(directory, filename)
# Save to CSV
df.to_csv(filepath, index=False)
### NBA Stake
import re
import csv
from bs4 import BeautifulSoup
from scrapingant_client import ScrapingAntClient
# Add your ScrapingAnt API Token
token = 'geturown'
# Initialize the client
client = ScrapingAntClient(token=token)
# The URL you want to scrape
url = 'https://stake.com/sports/basketball/usa/nba'
# Use the client to get the content of the page
result = client.general_request(
url,
headers={
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.82 Safari/537.36",
},
proxy_country='BR', # Change this as needed
)
# Your HTML content
content = result.content
# We'll use regex to match the format of the URLs
pattern = re.compile(r'https://stake\.com/sports/basketball/usa/nba/\d{8}-[\w-]+-[\w-]+-[\w-]+-[\w-]+')
matches = re.findall(pattern, content)
# Open the CSV file in write mode
with open('/Users/LouGoat/Desktop/stake.csv', 'w', newline='') as file: # change path
writer = csv.writer(file)
# This will give us a list of all matching URLs in the content
for match in matches:
# If we need just the labels after 'https://stake.com/sports/baseball/usa/mlb/', we can split by '/' and print them
labels = match.split('/')[-1]
# Write the labels to the CSV file
writer.writerow([labels])
import os
import requests
# Prepare CSV file to write
results_path = os.path.join('/Users/LouGoat/Desktop/', 'results.csv')
with open(results_path, 'w', newline='') as file:
writer = csv.writer(file)
writer.writerow(["URL", "Aria-label", "Odds"])
# This will give us a list of all matching URLs in the content
for url in matches:
# Use the client to get the content of the page
result = client.general_request(
url,
headers={
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.82 Safari/537.36",
},
proxy_country='BR', # Change this as needed
)
# Your HTML content
content = result.content
# Initialize BeautifulSoup with the content you got
soup = BeautifulSoup(content, 'html.parser')
# Find all buttons and print aria-label
buttons = soup.find_all('button')
for button in buttons:
aria_label = button.get('aria-label')
odds = None
odds_div = button.find('div', {'class': 'odds'})
if odds_div:
odds_span = odds_div.find('span', {'class': 'weight-bold'})
if odds_span:
odds = odds_span.text.strip()
if aria_label is not None and odds is not None:
writer.writerow([url, aria_label, odds]) # write data to CSV file
import gspread
from oauth2client.service_account import ServiceAccountCredentials
import csv
import time
# Use the JSON key you downloaded when you set up the Google Sheets API
scope = ['https://spreadsheets.google.com/feeds','https://www.googleapis.com/auth/drive']
creds = ServiceAccountCredentials.from_json_keyfile_name('/users/lougoat/desktop/ontobb-efe3bad476ab.json', scope)
client = gspread.authorize(creds)
# Find the workbook by name and open the first sheet
# Make sure you use the right name here.
sheet = client.open('OntoBasketball').worksheet('Stake')
# Clear all existing content from the sheet
sheet.clear()
# Read the CSV file
with open('/users/lougoat/desktop/results.csv', 'r') as file:
csv_data = csv.reader(file)
for row in csv_data:
sheet.append_row(row) # append the row data to Google Sheet
time.sleep(1) # Add a delay of 1 second
### Webscraping to Local Database
# East and West Standings
import pandas as pd
from datetime import datetime
import requests
def fetch_and_save_standings(url):
try:
# Fetch data from the web page
dfs = pd.read_html(url)
# In this example, I assume the first table is for Eastern Conference
# and the second table is for Western Conference.
# You might need to adjust these indices based on the actual web page.
df_east = dfs[0]
df_west = dfs[1]
# Remove asterisks from team names in both DataFrames
df_east = df_east.applymap(lambda x: x.replace('*', '') if isinstance(x, str) else x)
df_west = df_west.applymap(lambda x: x.replace('*', '') if isinstance(x, str) else x)
# Save as CSV
current_date = datetime.now().strftime("%m-%d")
east_csv_path = f"/users/lougoat/desktop/strike/database/BasketballReference/east_standings_{current_date}.csv"
west_csv_path = f"/users/lougoat/desktop/strike/database/BasketballReference/west_standings_{current_date}.csv"
df_east.to_csv(east_csv_path, index=False)
df_west.to_csv(west_csv_path, index=False)
print(f"DataFrames created and CSV files saved as {east_csv_path} and {west_csv_path}.")
except Exception as e:
print(f"An error occurred: {e}")
if __name__ == '__main__':
url = 'https://www.basketball-reference.com/leagues/NBA_2024_standings.html'
fetch_and_save_standings(url)
# BR Per Possession
from urllib.request import urlopen
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime
# Define the URL for 2023 statistics per possession
url = "https://www.basketball-reference.com/leagues/NBA_2024_per_poss.html"
# Fetch the HTML data
html = urlopen(url)
# Create a BeautifulSoup object
soup = BeautifulSoup(html, features="lxml")
# Find the column headers and store them in a list
headers = [header.getText() for header in soup.findAll('th', limit=32)]
headers = headers[1:]
# Remove empty columns
if '' in headers:
headers.remove('')
# Get the rows from the table
rows = soup.findAll('tr')[1:]
player_stats = [[td.getText() for td in rows[i].findAll('td')] for i in range(len(rows))]
# Remove empty rows
player_stats = [e for e in player_stats if e != []]
# Remove the extra column from each row (assuming it's at index 28)
player_stats = [row[:28] + row[29:] for row in player_stats]
# Create the DataFrame
nba_stats_2023 = pd.DataFrame(player_stats, columns=headers)
# Team ID Mapping
team_mapping = {
'ATL': 1, 'BOS': 2, 'BRK': 3, 'CHA': 4, 'CHI': 5,
'CLE': 6, 'DAL': 7, 'DEN': 8, 'DET': 9, 'GSW': 10,
'HOU': 11, 'IND': 12, 'LAC': 13, 'LAL': 14, 'MEM': 15,
'MIA': 16, 'MIL': 17, 'MIN': 18, 'NOP': 19, 'NYK': 20,
'OKC': 21, 'ORL': 22, 'PHI': 23, 'PHO': 24, 'POR': 25,
'SAC': 26, 'SAS': 27, 'TOR': 28, 'UTA': 29, 'WAS': 30, 'TOT': 31,
}
nba_stats_2023['Team_ID'] = nba_stats_2023['Tm'].map(team_mapping)
# Unique ID
nba_stats_2023['Unique_ID'] = nba_stats_2023.apply(lambda row: f"{row['Player']}_{row['Pos']}_{row['Age']}_{row['Tm']}", axis=1)
nba_stats_2023['Unique_ID'] = nba_stats_2023['Unique_ID'].astype('category').cat.codes.apply(lambda x: str(x).zfill(4))
# Reorder columns
nba_stats_2023 = nba_stats_2023[['Unique_ID', 'Team_ID'] + [col for col in nba_stats_2023.columns if col not in ['Unique_ID', 'Team_ID']]]
# Data manipulation
columns_to_divide = ['FG', 'FGA', '3P', '3PA', '2P', '2PA', 'FT', 'FTA', 'ORB', 'DRB', 'TRB', 'AST', 'STL', 'BLK', 'TOV', 'PF', 'PTS']
nba_stats_2023[columns_to_divide] = nba_stats_2023[columns_to_divide].astype(float) / 100
nba_stats_2023['True Shooting'] = nba_stats_2023['PTS'] / (2 * (nba_stats_2023['FGA'] + 0.44 * nba_stats_2023['FTA']))
# Get the current date
current_date = datetime.now().strftime('%m-%d')
# Save CSV
file_path = f'/users/lougoat/desktop/strike/database/BasketballReference/br_{current_date}.csv'
try:
nba_stats_2023.to_csv(file_path, index=False)
print(f"DataFrame created and CSV file saved as {file_path}.")
except Exception as e:
print(f"An error occurred: {e}")
### Inpredictable NBA Rankings
import pandas as pd
import requests
from bs4 import BeautifulSoup
# The URL you want to scrape
url = "https://stats.inpredictable.com/rankings/nba.php"
# Fetch the page content using the requests library
response = requests.get(url)
# Parse the HTML content using BeautifulSoup
soup = BeautifulSoup(response.content, 'html.parser')
# Find the table you're interested in scraping
table = soup.find('table') # Adjust this line if there are multiple tables
# Convert the table to a DataFrame
df = pd.read_html(str(table))[0]
# Save the DataFrame as a CSV file
save_path = "/users/lougoat/desktop/strike/database/Inpredictable/nba_rankings.csv"
df.to_csv(save_path, index=False)
print(f"DataFrame created and CSV file saved as {save_path}")
### Inpredictable Team Poss
import pandas as pd
import requests
from bs4 import BeautifulSoup
# The URL you want to scrape
url = "https://stats.inpredictable.com/nba/ssnTeamPoss.php?season=2023&po=0&frdt=2023-10-24&todt=2024-04-16&view=off"
# Fetch the page content using the requests library
response = requests.get(url)
# Parse the HTML content using BeautifulSoup
soup = BeautifulSoup(response.content, 'html.parser')
# Find the table you're interested in scraping
table = soup.find('table') # Adjust this line if there are multiple tables
# Convert the table to a DataFrame
df = pd.read_html(str(table))[0]
# Save the DataFrame as a CSV file
save_path = "/users/lougoat/desktop/strike/database/Inpredictable/nba_perpossesion.csv"
df.to_csv(save_path, index=False)
print(f"DataFrame created and CSV file saved as {save_path}")
### Inpredictable On/Off
import pandas as pd
import requests
from bs4 import BeautifulSoup
# The URL you want to scrape
url = "https://stats.inpredictable.com/nba/onoff.php?season=2023&pos=ALL&team=BKN&po=0&frdt=2023-10-24&todt=2024-06-12&mpos=100&rate=tot&sort=son_wpa&order=DESC"
# Fetch the page content using the requests library
response = requests.get(url)
# Parse the HTML content using BeautifulSoup
soup = BeautifulSoup(response.content, 'html.parser')
# Find the table you're interested in scraping
table = soup.find('table') # Adjust this line if there are multiple tables
# Convert the table to a DataFrame
df = pd.read_html(str(table))[0]
# Save the DataFrame as a CSV file
save_path = "/users/lougoat/desktop/strike/database/Inpredictable/OnOff/BKN.csv"
df.to_csv(save_path, index=False)
print(f"DataFrame created and CSV file saved as {save_path}")
### Inpredictable Player
import pandas as pd
import requests
from bs4 import BeautifulSoup
# # The URL you want to scrape
# url = "https://stats.inpredictable.com/nba/ssnPlayer.php"
# # Fetch the page content using the requests library
# response = requests.get(url)
# # Parse the HTML content using BeautifulSoup
# soup = BeautifulSoup(response.content, 'html.parser')
# # Find the table you're interested in scraping
# table = soup.find('table') # Adjust this line if there are multiple tables
# # Convert the table to a DataFrame
# df = pd.read_html(str(table))[0]
# # Save the DataFrame as a CSV file
# save_path = "/users/lougoat/desktop/strike/database/Inpredictable/nba_wp.csv"
# df.to_csv(save_path, index=False)
# print(f"DataFrame created and CSV file saved as {save_path}")
import requests
from bs4 import BeautifulSoup
import pandas as pd
# Base URL and query parameters
base_url = "https://stats.inpredictable.com/nba/ssnPlayer.php"
query_params = {
'season': '2023',
'team': 'ALL',
'pos': 'ALL',
'po': '0',
'frdt': '2023-10-24',
'todt': '2023-11-04',
'rate': 'tot',
'sort': 'sWPA',
'order': 'DESC'
}
# Iterate from grp 1 to grp 11
for grp in range(1, 12):
# Update the 'grp' parameter
query_params['grp'] = grp
# Send a GET request
response = requests.get(base_url, params=query_params)
# Check if the request was successful
if response.status_code == 200:
# Parse the HTML content using BeautifulSoup
soup = BeautifulSoup(response.content, 'html.parser')
# Find the table you're interested in scraping
table = soup.find('table') # Adjust this if there are multiple tables
# Convert the table to a DataFrame
df = pd.read_html(str(table))[0]
# Define the save path
save_path = f"/users/lougoat/desktop/strike/database/Inpredictable/nba_wp_grp{grp}.csv"
# Save the DataFrame as a CSV file
df.to_csv(save_path, index=False)
print(f"DataFrame for grp {grp} created and CSV file saved as {save_path}")
else:
print(f"Failed to retrieve data for grp {grp}. Status code: {response.status_code}")
### NBAStuffer Player Stats
import requests
from bs4 import BeautifulSoup
import csv
import os
# Define the URL
url = 'https://www.nbastuffer.com/2023-2024-nba-player-stats/'
# Fetch the page content
response = requests.get(url)
# Check if the request was successful
if response.status_code == 200:
# Parse the HTML content
soup = BeautifulSoup(response.text, 'html.parser')
# Locate tables in the HTML (assuming class or id attributes identify them)
tables = soup.find_all('table') # Replace with actual class or ID if needed
# Create the output directory if it doesn't exist
output_dir = '/Users/lougoat/Desktop/Strike/Database/NBAStuffer'
os.makedirs(output_dir, exist_ok=True)
# Open the CSV file for writing
with open(os.path.join(output_dir, 'AdvancedStats.csv'), 'w', newline='') as csvfile:
csvwriter = csv.writer(csvfile)
# Assuming the first table is what you want
table = tables[0]
# Write each row to the CSV
for row in table.find_all('tr'):
columns = row.find_all(['td', 'th']) # Both table data and header
column_texts = [col.text for col in columns]
csvwriter.writerow(column_texts)
print(f"Data saved to {os.path.join(output_dir, 'AdvancedStats.csv')}")
else:
print("Failed to retrieve the webpage.")
### NBAStuffer Rest Day Analysis
import requests
from bs4 import BeautifulSoup
import csv
import os
# Define the URL
url = 'https://www.nbastuffer.com/2023-2024-nba-schedule-rest-days-analysis/'
# Fetch the page content
response = requests.get(url)
# Check if the request was successful
if response.status_code == 200:
# Parse the HTML content
soup = BeautifulSoup(response.text, 'html.parser')
# Locate tables in the HTML (assuming class or id attributes identify them)
tables = soup.find_all('table') # Replace with actual class or ID if needed
# Create the output directory if it doesn't exist
output_dir = '/Users/lougoat/Desktop/Strike/Database/NBAStuffer'
os.makedirs(output_dir, exist_ok=True)
# Open the CSV file for writing
with open(os.path.join(output_dir, 'RestStats.csv'), 'w', newline='') as csvfile:
csvwriter = csv.writer(csvfile)
# Assuming the first table is what you want
table = tables[0]
# Write each row to the CSV
for row in table.find_all('tr'):
columns = row.find_all(['td', 'th']) # Both table data and header
column_texts = [col.text for col in columns]
csvwriter.writerow(column_texts)
print(f"Data saved to {os.path.join(output_dir, 'AdvancedStats.csv')}")
else:
print("Failed to retrieve the webpage.")
import gspread
import csv
import datetime
import os
import time
from oauth2client.service_account import ServiceAccountCredentials
import pandas as pd
import os
# Define the path where your CSV files are stored
folder_path = '/Users/Lougoat/desktop/Strike/Database/Inpredictable'
# Generate the list of file names based on the pattern grp 1 -11 al
# This is just an example pattern, adjust it to match your actual file names
file_names = [f'nba_wp_grp{i}.csv' for i in range(1, 12)]
# Initialize an empty list to store DataFrames
df_list = []
# Loop through each file name and read the CSV into a DataFrame
for file_name in file_names:
file_path = os.path.join(folder_path, file_name)
if os.path.exists(file_path):
df = pd.read_csv(file_path)
df_list.append(df)
else:
print(f"File not found: {file_path}")
# Concatenate all the DataFrames into a single DataFrame
combined_df = pd.concat(df_list, ignore_index=True)
# Define the path for the combined CSV file
output_file = '/Users/Lougoat/desktop/Strike/Database/Inpredictable/nba_wp.csv'
# Save the combined DataFrame to a CSV file
combined_df.to_csv(output_file, index=False)
print(f"Combined CSV file created at {output_file}")
# Initialize Google Sheets API client
scope = ['https://spreadsheets.google.com/feeds', 'https://www.googleapis.com/auth/drive']
creds = ServiceAccountCredentials.from_json_keyfile_name('/users/lougoat/desktop/ontobb-efe3bad476ab.json', scope)
client = gspread.authorize(creds)
def archive_sheets():
# Get yesterday's date for archive directory
yesterday = (datetime.datetime.today() - datetime.timedelta(days=1)).strftime('%m-%d') # Format: MM-DD
# Open the Google Sheet
spreadsheet = client.open('OntoBasketball')
# Iterate through each worksheet in the spreadsheet
for sheet in spreadsheet.worksheets():
sheet_name = sheet.title
# Create the archive directory if it doesn't exist
archive_dir = f'/users/lougoat/desktop/strike/database/archive/{yesterday}/{sheet_name}'
os.makedirs(archive_dir, exist_ok=True)
# Archive current sheet content
rows = sheet.get_all_values()
archive_file_path = os.path.join(archive_dir, f'{sheet_name}.csv')
with open(archive_file_path, 'w', newline='', encoding='utf-8') as csvfile:
csvwriter = csv.writer(csvfile)
csvwriter.writerows(rows)
def update_sheet(csv_path, sheet_name):
# Open the Google Sheet
sheet = client.open('OntoBasketball').worksheet(sheet_name)
# Clear existing content in the sheet
sheet.clear()
# Read CSV file
with open(csv_path, newline='', encoding='utf-8') as csvfile:
csvreader = csv.reader(csvfile)
# Collect rows to be added
rows_to_add = [row for row in csvreader]
# Add a time delay to avoid hitting API rate limits
time.sleep(2)
# Append all rows at once to Google Sheet
sheet.append_rows(rows_to_add)
# Archive all sheets
archive_sheets()
# Today's date for file paths
today = datetime.datetime.today().strftime('%m-%d') # Format: MM-DD
# Define file paths and sheet names
files_and_sheets = [
(f'/Users/Lougoat/desktop/Strike/Database/BasketballReference/br_{today}.csv', 'BasketballReference'),
(f'/Users/Lougoat/desktop/Strike/Database/BasketballReference/DailySchedule/{today[:2]}/{today[3:]}.csv', 'DailySchedule'),
(f'/Users/Lougoat/desktop/Strike/Database/BasketballReference/east_standings_{today}.csv', 'East'),
(f'/Users/Lougoat/desktop/Strike/Database/BasketballReference/west_standings_{today}.csv', 'West'),
('/Users/Lougoat/desktop/Strike/Database/Inpredictable/nba_perpossesion.csv', 'PerPossession'),
('/Users/Lougoat/desktop/Strike/Database/Inpredictable/nba_rankings.csv', 'Rankings'),
('/Users/Lougoat/desktop/Strike/Database/Inpredictable/nba_wp.csv', 'WP'),
('/Users/Lougoat/desktop/Strike/Database/NBAStuffer/AdvancedStats.csv', 'AdvancedStats'),
('/Users/Lougoat/desktop/Strike/Database/NBAStuffer/RestStats.csv', 'RestStats')
]
# Process each file
for csv_path, sheet_name in files_and_sheets:
update_sheet(csv_path, sheet_name)