feed2toot-docker/feed2toot/confparse.py
2017-04-21 03:51:09 +02:00

235 lines
12 KiB
Python

# -*- coding: utf-8 -*-
# Copyright © 2015-2017 Carl Chenet <carl.chenet@ohmytux.com>
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/
# Get values of the configuration file
'''Get values of the configuration file'''
# standard library imports
from configparser import SafeConfigParser, NoOptionError, NoSectionError
import logging
import os
import os.path
import socket
import sys
# 3rd party library imports
import feedparser
class ConfParse(object):
'''ConfParse class'''
def __init__(self, clioptions):
'''Constructor of the ConfParse class'''
self.clioptions = clioptions
self.tweetformat = ''
self.stringsep = ','
self.confs = []
self.main()
def main(self):
'''Main of the ConfParse class'''
for pathtoconfig in self.clioptions.configs:
options = {}
# read the configuration file
config = SafeConfigParser()
if not config.read(os.path.expanduser(pathtoconfig)):
sys.exit('Could not read config file')
###########################
#
# the rss section
#
###########################
section = 'rss'
if config.has_section(section):
############################
# tweet option
############################
oldconfoption = 'tweet'
confoption = 'toot'
if config.has_option(section, oldconfoption):
self.tweetformat = config.get(section, oldconfoption)
elif config.has_option(section, confoption):
self.tweetformat = config.get(section, confoption)
else:
sys.exit('You should define a format for your tweet with the parameter "{confoption}" in the [{section}] section'.format(confoption=confoption, section=section))
############################
# pattern format option
############################
options['patterns'] = {}
options['patternscasesensitive'] = {}
for pattern in ['summary_detail', 'published_parsed', 'guidislink', 'authors', 'links', 'title_detail', 'author', 'author_detail', 'comments', 'published', 'summary', 'tags', 'title', 'link', 'id']:
currentoption = '{}_pattern'.format(pattern)
if config.has_option(section, currentoption):
tmppattern = config.get(section, currentoption)
if self.stringsep in tmppattern:
options['patterns'][currentoption] = [i for i in tmppattern.split(self.stringsep) if i]
else:
options['patterns'][currentoption] = [tmppattern]
# pattern_case_sensitive format
currentoption = '{}_pattern_case_sensitive'.format(pattern)
if config.has_option(section, currentoption):
try:
options['patternscasesensitive'][currentoption] = config.getboolean(section, currentoption)
except ValueError as err:
print(err)
options['patternscasesensitive'][currentoption] = True
############################
# rsslist
############################
bozoexception = False
feeds = []
patterns = []
currentoption = 'uri_list'
if config.has_option(section, currentoption):
rssfile = config.get(section, currentoption)
rssfile = os.path.expanduser(rssfile)
if not os.path.exists(rssfile) or not os.path.isfile(rssfile):
sys.exit('The path to the uri_list parameter is not valid: {rssfile}'.format(rssfile=rssfile))
rsslist = open(rssfile, 'r').readlines()
for line in rsslist:
line = line.strip()
# split each line in two parts, rss link and a string with the different patterns to look for
confobjects = line.split('|')
if len(confobjects) > 3 or len(confobjects) == 2:
sys.exit('This line in the list of uri to parse is not formatted correctly: {line}'.format(line))
if len(confobjects) == 3:
rss, rssobject, patternstring = line.split('|')
if len(confobjects) == 1:
rss = confobjects[0]
rssobject = ''
patternstring = ''
# split different searched patterns
patterns = [i for i in patternstring.split(self.stringsep) if i]
# retrieve the content of the rss
feed = feedparser.parse(rss)
if 'bozo_exception' in feed:
bozoexception = True
logging.warning(feed['bozo_exception'])
continue
# check if the rss feed and the rss entry are valid ones
if 'entries' in feed:
if rssobject and rssobject not in feed['entries'][0].keys():
sys.exit('The rss object {rssobject} could not be found in the feed {rss}'.format(rssobject=rssobject, rss=rss))
else:
sys.exit('The rss feed {rss} does not seem to be valid'.format(rss=rss))
feeds.append({'feed': feed, 'patterns': patterns, 'rssobject': rssobject})
# test if all feeds in the list were unsuccessfully retrieved and if so, leave
if not feeds and bozoexception:
sys.exit('No feed could be retrieved. Leaving.')
############################
# uri
############################
if not feeds and not self.clioptions.rss_uri:
confoption = 'uri'
if config.has_option(section, confoption):
options['rss_uri'] = config.get('rss', 'uri')
else:
sys.exit('{confoption} parameter in the [{section}] section of the configuration file is mandatory. Exiting.'.format(section=section, confoption=confoption))
else:
options['rss_uri'] = self.clioptions.rss_uri
# get the rss feed for rss parameter of [rss] section
feed = feedparser.parse(options['rss_uri'])
if not feed:
sys.exit('Unable to parse the feed at the following url: {rss}'.format(rss=rss))
#########################################
# no_uri_pattern_no_global_pattern option
#########################################
currentoption = 'no_uri_pattern_no_global_pattern'
# default value
options['nopatternurinoglobalpattern'] = False
if config.has_option(section, currentoption):
options['nopatternurinoglobalpattern'] = config.getboolean(section, currentoption)
###########################
#
# the cache section
#
###########################
section = 'cache'
if not self.clioptions.cachefile:
confoption = 'cachefile'
if config.has_section(section):
options['cachefile'] = config.get(section, confoption)
else:
sys.exit('You should provide a {confoption} parameter in the [{section}] section'.format(section=section, confoption=confoption))
options['cachefile'] = os.path.expanduser(options['cachefile'])
cachefileparent = os.path.dirname(options['cachefile'])
if cachefileparent and not os.path.exists(cachefileparent):
sys.exit('The parent directory of the cache file does not exist: {cachefileparent}'.format(cachefileparent=cachefileparent))
else:
options['cachefile'] = self.clioptions.cachefile
### cache limit
if config.has_section(section):
confoption = 'cache_limit'
if config.has_option(section, confoption):
try:
options['cache_limit'] = int(config.get(section, confoption))
except ValueError as err:
sys.exit('Error in configuration with the {confoption} parameter in [{section}]: {err}'.format(confoption=confoption, section=section, err=err))
else:
options['cache_limit'] = 100
else:
options['cache_limit'] = 100
###########################
#
# the hashtag section
#
###########################
section = 'hashtaglist'
if not self.clioptions.hashtaglist:
confoption = 'several_words_hashtags_list'
if config.has_section(section):
options['hashtaglist'] = config.get(section, confoption)
options['hashtaglist'] = os.path.expanduser(options['hashtaglist'])
if not os.path.exists(options['hashtaglist']) or not os.path.isfile(options['hashtaglist']):
sys.exit('The path to the several_words_hashtags_list parameter is not valid: {hashtaglist}'.format(hashtaglist=options['hashtaglist']))
else:
options['hashtaglist'] = False
###########################
#
# the plugins section
#
###########################
plugins = {}
section = 'influxdb'
if config.has_section(section):
##########################################
# host, port, user, pass, database options
##########################################
plugins[section] = {}
for currentoption in ['host','port','user','pass','database']:
if config.has_option(section, currentoption):
plugins[section][currentoption] = config.get(section, currentoption)
if 'host' not in plugins[section]:
plugins[section]['host'] = '127.0.0.1'
if 'port' not in plugins[section]:
plugins[section]['port'] = 8086
if 'measurement' not in plugins[section]:
plugins[section]['measurement'] = 'tweets'
for field in ['user','pass','database']:
if field not in plugins[section]:
sys.exit('Parsing error for {field} in the [{section}] section: {field} is not defined'.format(field=field, section=section))
# create the returned object with previously parsed data
if feeds:
self.confs.append((options, config, self.tweetformat, feeds, plugins))
else:
self.confs.append((options, config, self.tweetformat, [{'feed': feed, 'patterns': [], 'rssobject': ''}], plugins))
@property
def confvalues(self):
'''Return the values of the different configuration files'''
return self.confs