gcpdiag.queries.web

Fetch the html content from the given page url.
def fetch_and_extract_table( page_url: str, tag: str = None, tag_id: str = None, class_name: str = None):
23def fetch_and_extract_table(page_url: str,
24                            tag: str = None,
25                            tag_id: str = None,
26                            class_name: str = None):
27  """Fetch the table from the given page url and return it."""
28  table = None
29  response = get(url=page_url, timeout=10)
30  response.raise_for_status(
31  )  # Raise an exception if the response is not successful
32  soup = BeautifulSoup(response.content, 'html.parser')
33  content_fetched = None
34  if tag:
35    if tag_id:
36      content_fetched = soup.find(tag, id=tag_id)
37    elif class_name:
38      content_fetched = soup.find(tag, class_=class_name)
39    else:
40      content_fetched = soup.find(tag)
41
42  if not content_fetched:
43    logging.error('tag/id/class not found for %s with tag %s', page_url, tag)
44    return table
45  if tag == 'table':
46    return content_fetched
47  table = content_fetched.find_next('table')
48  if not table:
49    logging.error('Table not found for %s with tag %s', page_url, tag)
50    return table
51
52  return table

Fetch the table from the given page url and return it.

def get( url, params=None, timeout=10, *, data=None, headers=None) -> requests.models.Response:
55def get(
56    url,
57    params=None,
58    timeout=10,
59    *,
60    data=None,
61    headers=None,
62) -> requests.Response:
63  """A wrapper around requests.get for http calls which can't use the google discovery api"""
64  return requests.get(url=url,
65                      params=params,
66                      timeout=timeout,
67                      data=data,
68                      headers=headers)

A wrapper around requests.get for http calls which can't use the google discovery api