SingleAuditRepo/get_UT.py at master · govwiki/SingleAuditRepo · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import argparse
import configparser
from utils import Crawler as CoreCrawler


ENTITY_TYPES = (
    'City',
    'County',
    'District Health',
    'Interlocal',
    'Local and Special Service District',
    'Mental Health',
    'School District or Charter School',
    'Town',
)


class Crawler(CoreCrawler):
    abbr = 'UT'

    def _get_remote_filename(self, local_filename):
        entity_name, entity_type, year = local_filename[:-4].split('|')
        if entity_type in ('City' 'Town'):
            directory = 'General Purpose'
            name = entity_name.replace(' Town', '').replace(' City', '')
        elif entity_type in ('City' 'Town'):
            directory = 'General Purpose'
            name = entity_name
        elif entity_type == 'School District or Charter School':
            directory = 'School District'
            name = entity_name
        else:
            directory = 'Special District'
            name = entity_name
        filename = '{} {} {}.pdf'.format(self.abbr, name, year)
        return directory, filename


if __name__ == '__main__':
    argparser = argparse.ArgumentParser()
    argparser.add_argument("year")
    args = argparser.parse_args()

    config = configparser.ConfigParser()
    config.read('conf.ini')

    crawler = Crawler(config, 'utah')
    crawler.get(config.get('utah', 'url'))

    for entity_type in ENTITY_TYPES:
        crawler.select_option('form[method="post"] .entityTypeSelect', entity_type)
        for entity in crawler.get_text('form[method="post"] .entitySelect option', single=False):
            if entity.startswith('--'):
                continue
            crawler.select_option('form[method="post"] .entitySelect', entity)
            try:
                crawler.select_option('form[method="post"] .yearSelect', args.year)
                crawler.select_option('form[method="post"] .documentSelect', 'Financial Report')
            except Exception:
                continue
            crawler.click('.btn.btnUploadDetails.btnSearch')
            url = crawler.get_attr('tbody.reportData a', 'href')
            crawler.download(url, '{}|{}|{}.pdf'.format(entity, entity_type, args.year).replace('/', ' '))
            crawler.upload_to_ftp('{}|{}|{}.pdf'.format(entity, entity_type, args.year).replace('/', ' '))
    crawler.close()