This project provides automated scripts to collect real-time snow report data from two different websites: Colorado Ski and OnTheSnow. By leveraging Python’s Selenium library and BeautifulSoup, the scraper captures details such as snowfall, base depth, open runs, and lifts open. The collected data are consolidated into CSV files for further analysis or integration into dashboards.
WebDriverWait for accurate scraping.
Below is an abbreviated version of the two scripts used to scrape data from
coloradoski.com and onthesnow.com.
(You can place them in separate Python files, e.g., colorado_ski_scraper.py and onthesnow_scraper.py.)
import os
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime
driver_path = "C:/path/to/chromedriver.exe"
csv_file_path = "C:/path/to/snow_report_data_coloradoski.csv"
# Set up and launch Chrome
service = Service(driver_path)
driver = webdriver.Chrome(service=service)
driver.get("https://www.coloradoski.com/snow-report/")
# Wait until mid-mountain depth loads
WebDriverWait(driver, 30).until(
lambda d: any(
mid.text.strip() != '0"'
for mid in d.find_elements(By.CLASS_NAME, "answer.mid-mtn")
)
)
html = driver.page_source
soup = BeautifulSoup(html, "html.parser")
driver.quit()
snow_data = []
resorts = soup.find_all("div", class_="inner")
for resort in resorts:
name_tag = resort.find("h3", class_="h5 text-left")
if not name_tag:
continue
name = name_tag.text.strip()
snow_24hr = resort.find("span", class_="answer twentyfour").text.strip() \
if resort.find("span", class_="answer twentyfour") else "N/A"
# ... repeat for other fields ...
snow_data.append({
"Date": datetime.now().strftime("%Y-%m-%d"),
"Resort": name,
"Snow (24hr)": snow_24hr,
# ...
})
new_data_df = pd.DataFrame(snow_data)
# Append or create CSV
if os.path.exists(csv_file_path):
existing_data_df = pd.read_csv(csv_file_path)
combined_df = pd.concat([existing_data_df, new_data_df], ignore_index=True)
else:
combined_df = new_data_df
combined_df.to_csv(csv_file_path, index=False)
print(new_data_df)
import os
import re
import logging
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime
driver_path = r"C:\path\to\chromedriver.exe"
csv_file_path = r"C:\path\to\snow_report_data_onthesnow.csv"
log_file_path = r"C:\path\to\scraper_log.txt"
logging.basicConfig(filename=log_file_path, level=logging.INFO)
service = Service(driver_path)
driver = webdriver.Chrome(service=service)
driver.get("https://www.onthesnow.com/colorado/skireport")
WebDriverWait(driver, 40).until(
EC.presence_of_element_located((By.CLASS_NAME, "styles_row__HA9Yq"))
)
html = driver.page_source
soup = BeautifulSoup(html, "html.parser")
driver.quit()
snow_data = []
resorts = soup.find_all("tr", class_="styles_row__HA9Yq")
for resort in resorts:
data_tags = resort.find_all("span", class_="h4 styles_h4__x3zzi")
# ... parse resort name, snowfall, base depth, open trails, etc. ...
new_data_df = pd.DataFrame(snow_data)
# Example of splitting base depth, computing % trails open, etc.
# ...
new_data_df.to_csv(csv_file_path, index=False)
logging.info("Data saved.")
print(new_data_df)
You can find the complete source code and instructions on how to run the web scraper in my GitHub profile.
Below are examples of console outputs or CSV entries showing updated data for ski resorts: