Skip to content

DEM

Copyright (c) 2023-2024 Jianfeng Zhu. All rights reserved.

download_by_stream(url, file_name)

通过stream下载url链接文件

Parameters:

Name Type Description Default
url str

url链接

required
file_name str

文件的本地存储地址

required
Source code in hydro_opendata/downloader/downloader.py
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
def download_by_stream(url, file_name):
    """
    通过stream下载url链接文件

    Args:
        url (str): url链接
        file_name (str): 文件的本地存储地址

    """

    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36 QIHU 360SE"
    }

    # 发起 head 请求,即只会获取响应头部信息
    head = requests.head(url, headers=headers)

    if head.status_code == 404:
        print("404 not found.")
    elif head.status_code == 200:
        response = requests.get(url, headers=headers, stream=True)
        with open(file_name, mode="wb") as f:
            # 写入分块文件
            for i, chunk in enumerate(response.iter_content(chunk_size=1024), start=1):
                f.write(chunk)
                print("\r", "已下载:%.2f MB" % (i / 1024), end="", flush=True)

            print(f"{file_name}下载完成。")

download_from_url(url, file_name)

通过url链接下载文件的一般方法

Parameters:

Name Type Description Default
url str

url链接

required
file_name str

文件的本地存储地址

required
Source code in hydro_opendata/downloader/downloader.py
109
110
111
112
113
114
115
116
117
118
119
120
121
122
def download_from_url(url, file_name):
    """
    通过url链接下载文件的一般方法

    Args:
        url (str): url链接
        file_name (str): 文件的本地存储地址

    """

    f = requests.get(url)
    with open(file_name, "wb") as tiff:
        tiff.write(f.content)
    f.close()

download_sigletasking(url, file_name)

单函数线程下载文件,显示进度条

Parameters:

Name Type Description Default
url str

文件链接

required
file_name str

文件名或文件路径

required
Source code in hydro_opendata/downloader/downloader.py
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
def download_sigletasking(url: str, file_name: str):
    """
    单函数线程下载文件,显示进度条

    Args:
        url (str): 文件链接
        file_name (str): 文件名或文件路径

    """
    # 文件下载直链
    # 请求头
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36 QIHU 360SE"
    }

    # 发起 head 请求,即只会获取响应头部信息
    head = requests.head(url, headers=headers)

    if head.status_code == 404:
        print("404 not found.")
    elif head.status_code == 200:
        # 文件大小,以 B 为单位
        file_size = head.headers.get("Content-Length")
        if file_size is not None:
            download_single_task_with_chunks(file_size, url, headers, file_name)
        else:
            # 未获取到文件大小,采用stream方法下载
            # download_from_url(url,file_name)
            download_by_stream(url, file_name)

unzip_file(zip_path, output_folder=None)

Unzips a ZIP file.

Parameters: - zip_path (str): The path to the ZIP file. - output_folder (str, optional): The folder where the ZIP file should be extracted to. Defaults to a folder named after the ZIP file in the ZIP file's directory.

Returns: - str: The path to the extracted folder.

Source code in hydro_opendata/downloader/downloader.py
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
def unzip_file(zip_path, output_folder=None):
    """
    Unzips a ZIP file.

    Parameters:
    - zip_path (str): The path to the ZIP file.
    - output_folder (str, optional): The folder where the ZIP file should be extracted to.
                                     Defaults to a folder named after the ZIP file in the ZIP file's directory.

    Returns:
    - str: The path to the extracted folder.
    """
    if not output_folder:
        output_folder = os.path.join(
            os.path.dirname(zip_path), os.path.splitext(os.path.basename(zip_path))[0]
        )

    # Check if the output folder already exists
    if not os.path.exists(output_folder):
        with ZipFile(zip_path, "r") as zip_ref:
            zip_ref.extractall(output_folder)
    else:
        print(f"Files already extracted to {output_folder}. Skipping extraction.")

    return output_folder

wget_download(url, save_path=None)

Downloads a file using wget.

Parameters: - url (str): The URL of the file to be downloaded. - save_path (str, optional): dir or file to save (default: current working directory).

Returns: - str: The path to the downloaded file.

Source code in hydro_opendata/downloader/downloader.py
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
def wget_download(url, save_path=None):
    """
    Downloads a file using wget.

    Parameters:
    - url (str): The URL of the file to be downloaded.
    - save_path (str, optional): dir or file to save (default: current working directory).

    Returns:
    - str: The path to the downloaded file.
    """
    if save_path and os.path.isdir(save_path):
        output_filename = os.path.join(save_path, os.path.basename(url))
    elif save_path:
        output_filename = save_path
    else:
        output_filename = os.path.basename(url)

    # Check if the file already exists
    if os.path.exists(output_filename):
        print(f"File {output_filename} already exists. Skipping download.")
        return output_filename

    return wget.download(url, out=output_filename)

Last update: 2023-08-30