# -*- coding: utf-8 -*- """Simple RSS to HTML converter.""" __version__ = "0.0.2" __author__ = "Barbierosa" import sys import requests from bs4 import BeautifulSoup import xml.etree.ElementTree as ET sys.stdin.reconfigure(encoding='utf-8') sys.stdout.reconfigure(encoding='utf-8') separator=' ' def monthToNum(shortMonth): return { 'jan' : 1, 'feb' : 2, 'mar' : 3, 'apr' : 4, 'may' : 5, 'jun' : 6, 'jul' : 7, 'aug' : 8, 'sep' : 9, 'oct' : 10, 'nov' : 11, 'dec' : 12 }[shortMonth] # scraping function def get_rss(url,count): article_list = [] try: r = requests.get(url) soup = BeautifulSoup(r.content, features='xml') articles = soup.findAll('item') i=int(0) for a in articles: title = a.find('title').text link = a.find('link').text or 'https://senat.cz' published = a.find('pubDate').text description = a.find('description').text article = { 'title': title, 'link': link, 'published': published, 'description': description } article_list.append(article) i=i+1 if (i >= int(count)): break return article_list except Exception as e: print('The scraping job failed. See exception: ') print(e) if len(sys.argv) > 1: num_votes_to_import = int(sys.argv[1]) else: num_votes_to_import=int(999999) print('Starting scraping for '+str(num_votes_to_import)) xml_articles=get_rss('https://www.senat.cz/senatori/hlasovani_rss.php?pid=343',num_votes_to_import) for a in xml_articles: # print ('