forked from josifoski/SingleAuditRepo
-
Notifications
You must be signed in to change notification settings - Fork 8
/
get_UT.py
65 lines (55 loc) · 2.27 KB
/
get_UT.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import argparse
import configparser
from utils import Crawler as CoreCrawler
ENTITY_TYPES = (
'City',
'County',
'District Health',
'Interlocal',
'Local and Special Service District',
'Mental Health',
'School District or Charter School',
'Town',
)
class Crawler(CoreCrawler):
abbr = 'UT'
def _get_remote_filename(self, local_filename):
entity_name, entity_type, year = local_filename[:-4].split('|')
if entity_type in ('City' 'Town'):
directory = 'General Purpose'
name = entity_name.replace(' Town', '').replace(' City', '')
elif entity_type in ('City' 'Town'):
directory = 'General Purpose'
name = entity_name
elif entity_type == 'School District or Charter School':
directory = 'School District'
name = entity_name
else:
directory = 'Special District'
name = entity_name
filename = '{} {} {}.pdf'.format(self.abbr, name, year)
return directory, filename
if __name__ == '__main__':
argparser = argparse.ArgumentParser()
argparser.add_argument("year")
args = argparser.parse_args()
config = configparser.ConfigParser()
config.read('conf.ini')
crawler = Crawler(config, 'utah')
crawler.get(config.get('utah', 'url'))
for entity_type in ENTITY_TYPES:
crawler.select_option('form[method="post"] .entityTypeSelect', entity_type)
for entity in crawler.get_text('form[method="post"] .entitySelect option', single=False):
if entity.startswith('--'):
continue
crawler.select_option('form[method="post"] .entitySelect', entity)
try:
crawler.select_option('form[method="post"] .yearSelect', args.year)
crawler.select_option('form[method="post"] .documentSelect', 'Financial Report')
except Exception:
continue
crawler.click('.btn.btnUploadDetails.btnSearch')
url = crawler.get_attr('tbody.reportData a', 'href')
crawler.download(url, '{}|{}|{}.pdf'.format(entity, entity_type, args.year).replace('/', ' '))
crawler.upload_to_ftp('{}|{}|{}.pdf'.format(entity, entity_type, args.year).replace('/', ' '))
crawler.close()