gcpdiag.queries.web
Fetch the html content from the given page url.
def
fetch_and_extract_table( page_url: str, tag: str = None, tag_id: str = None, class_name: str = None):
23def fetch_and_extract_table(page_url: str, 24 tag: str = None, 25 tag_id: str = None, 26 class_name: str = None): 27 """Fetch the table from the given page url and return it.""" 28 table = None 29 response = get(url=page_url, timeout=10) 30 response.raise_for_status( 31 ) # Raise an exception if the response is not successful 32 soup = BeautifulSoup(response.content, 'html.parser') 33 content_fetched = None 34 if tag: 35 if tag_id: 36 content_fetched = soup.find(tag, id=tag_id) 37 elif class_name: 38 content_fetched = soup.find(tag, class_=class_name) 39 else: 40 content_fetched = soup.find(tag) 41 42 if not content_fetched: 43 logging.error('tag/id/class not found for %s with tag %s', page_url, tag) 44 return table 45 if tag == 'table': 46 return content_fetched 47 table = content_fetched.find_next('table') 48 if not table: 49 logging.error('Table not found for %s with tag %s', page_url, tag) 50 return table 51 52 return table
Fetch the table from the given page url and return it.
def
get( url, params=None, timeout=10, *, data=None, headers=None) -> requests.models.Response:
55def get( 56 url, 57 params=None, 58 timeout=10, 59 *, 60 data=None, 61 headers=None, 62) -> requests.Response: 63 """A wrapper around requests.get for http calls which can't use the google discovery api""" 64 return requests.get(url=url, 65 params=params, 66 timeout=timeout, 67 data=data, 68 headers=headers)
A wrapper around requests.get for http calls which can't use the google discovery api