"""Check if AddressBase TOIDs will match MasterMap
"""
import csv
import glob
import os
import sys

from multiprocessing import Pool

csv.field_size_limit(sys.maxsize)

def main(ab_path, mm_path):
    ab_paths = sorted(glob.glob(os.path.join(ab_path, "*.gml.csv.filtered.csv")))
    mm_paths = sorted(glob.glob(os.path.join(mm_path, "*.gml.csv")))

    try:
        assert len(ab_paths) == len(mm_paths)
    except AssertionError:
        print(ab_paths)
        print(mm_paths)

    zipped_paths = zip(ab_paths, mm_paths)

    # parallel map over tiles
    with Pool() as p:
        p.starmap(check, zipped_paths)

def check(ab_path, mm_path):
    tile = str(os.path.basename(ab_path)).split(".")[0]
    output_base = os.path.dirname(ab_path)
    ab_toids = set()
    mm_toids = set()

    with open(ab_path, 'r') as fh:
        r = csv.DictReader(fh)
        for line in r:
            ab_toids.add(line['toid'])

    with open(mm_path, 'r') as fh:
        r = csv.DictReader(fh)
        for line in r:
            mm_toids.add(line['fid'])

    missing = ab_toids - mm_toids
    print(tile, "MasterMap:", len(mm_toids), "Addressbase:", len(ab_toids), "AB but not MM:", len(missing))

    with open(os.path.join(output_base, 'missing_toids_{}.txt'.format(tile)), 'w') as fh:
        for toid in missing:
            fh.write("{}\n".format(toid))

    with open(os.path.join(output_base, 'ab_toids_{}.txt'.format(tile)), 'w') as fh:
        for toid in ab_toids:
            fh.write("{}\n".format(toid))


if __name__ == '__main__':
    if len(sys.argv) != 3:
        print("Usage: check_ab_mm_match.py ./path/to/addressbase/dir ./path/to/mastermap/dir")
        exit(-1)
    main(sys.argv[1], sys.argv[2])