surveilling-surveillance/streetview/download.py
2021-05-20 13:22:04 -07:00

130 lines
4.2 KiB
Python

import time
import traceback
import sys
import random
import hashlib
import hmac
import base64
import fire
import os
import logging
import pandas as pd
import numpy as np
import multiprocessing as mp
from tqdm import tqdm
import requests as r
import urllib.parse as urlparse
from util import constants as C
def _init_downloader(*args):
global downloader
downloader = SVImageDownloader(*args)
def _download(key):
global downloader
return downloader.download(key)
class SVImageDownloader:
def __init__(self,
key_to_sec,
save_dir,
sleep_time=0.0):
self.key_to_sec = key_to_sec
self.sleep_time = sleep_time
self.save_dir = save_dir
def get_url(self, panoid, head, keysec):
key, secret = keysec
url = (f"https://maps.googleapis.com/maps/api/streetview?"
f"size={C.SV_SIZE}&pano={panoid}&fov={C.SV_FOV}&"
f"heading={head}&pitch={C.SV_PITCH}&key={key}")
url = urlparse.urlparse(url)
# We only need to sign the path+query part of the string
url_to_sign = url.path + "?" + url.query
# Decode the private key into its binary format
# We need to decode the URL-encoded private key
decoded_key = base64.urlsafe_b64decode(secret)
# Create a signature using the private key and the URL-encoded
# string using HMAC SHA1. This signature will be binary.
signature = hmac.new(decoded_key,
str.encode(url_to_sign),
hashlib.sha1)
# Encode the binary signature into base64 for use within a URL
encoded_signature = base64.urlsafe_b64encode(signature.digest())
original_url = f'{url.scheme}://{url.netloc}{url.path}?{url.query}'
return original_url + "&signature=" + encoded_signature.decode()
def download_image(self,
panoid,
head,
keysec,
save_path,
):
os.makedirs(save_path, exist_ok=True)
url = self.get_url(panoid, head, keysec)
resp = r.get(url)
img_binary = resp._content
write_path = os.path.join(save_path, f'{panoid}_{head}.jpg')
with open(write_path, "wb+") as f:
f.write(img_binary)
def download(self, rtuple):
rid, row = rtuple
time.sleep(np.random.rand() * self.sleep_time)
head = row['heading']
try:
key_idx = rid % len(self.key_to_sec)
keysec = list(self.key_to_sec)[key_idx]
self.download_image(panoid=row['panoid'],
head=head,
keysec=keysec,
save_path=self.save_dir)
except BaseException as e:
traceback.print_exception(*sys.exc_info())
return {"panoid": row['panoid'],
"heading": head,
"exception": str(e)}
return {"panoid": None}
class ParallelSVImageDownloader:
def __init__(self,
key_to_sec,
save_dir,
sleep_time=0.0,
nthread=10,
):
self.key_to_sec = key_to_sec
self.save_dir = save_dir
self.sleep_time = sleep_time
self.nthread = nthread
os.makedirs(self.save_dir, exist_ok=True)
def download(self, df, sample_frac=1.0):
df = df.sample(frac=sample_frac)
print("Start downloading ...")
with mp.Pool(self.nthread,
initializer=_init_downloader,
initargs=(self.key_to_sec, self.save_dir, self.sleep_time)) as p:
df = list(tqdm(p.imap(_download, df.iterrows()),
total=len(df),
smoothing=0.1))
image_errors = pd.DataFrame(df)
image_errors.dropna(subset=['panoid'], inplace=True)
return image_errors
def download_streetview_image(key, sec):
df = pd.read_csv("data/meta.csv")
downloader = ParallelSVImageDownloader(key_to_sec=[(key, sec)],
save_dir="./data/image")
downloader.download(df)