Scraping

Dynamic class names

If class names change to subCategory_

import re
from bs4 import BeautifulSoup
 
base_url = "https://trustpilot.com"
def get_soup(url):
    return BeautifulSoup(requests.get(url).content, 'lxml')
 
data = {}
 
soup = get_soup(base_url + '/categories')
for category in soup.findAll('div', {'class': re.compile('subCategory___')}):
    name = category.find('h3', {'class': re.compile('subCategoryHeader___')}).text
    name = name.strip()
    data[name] = {}  
    sub_categories = category.find('div', {'class': re.compile('subCategoryList___')})
    for sub_category in sub_categories.findAll('div', {'class': re.compile('subCategoryItem___')}):
        sub_category_name = sub_category.find('a', {'class': re.compile('subtle___')}).text 
        sub_category_uri = sub_category.find('a', {'class': re.compile('subtle___')})['href'] 
        data[name][sub_category_name] = sub_category_uri