ICESat-2 ATL03光子数据读取

ICESat-2数据处理的方式一般为将光子数据投影到沿轨距离和高程的二维空间。如下图:

ATL03数据读取

H5是一种数据存储结构,读取原理就是按照该结构获取数据,这里给出两种读取方式。

ATL03的数据字典:ATL03 Product Data Dictionary (nsidc.org)

使用pandas

import warnings
import pandas as pd


def read_hdf5_atl03_beam_pandas(filename, beam, verbose=False):
    # 打开HDF5文件进行读取
    h5_store = pd.HDFStore(filename, mode='r')
    root = h5_store.root
    # 为ICESat-2 ATL03变量和属性分配python字典
    atl03_mds = {}

    # 读取文件中每个输入光束
    # beams = [k for k in file_id.keys() if bool(re.match('gt\\d[lr]', k))]
    beams = ['gt1l', 'gt1r', 'gt2l', 'gt2r', 'gt3l', 'gt3r']
    if beam not in beams:
        print('请填入正确的光束代码')
        return

    atl03_mds['heights'] = {}
    atl03_mds['geolocation'] = {}

    # -- 获取每个HDF5变量
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        # -- ICESat-2 Heights Group
        heights_keys = ['dist_ph_across', 'dist_ph_along', 'h_ph', 'lat_ph', 'lon_ph', 'signal_conf_ph']
        for key in heights_keys:
            atl03_mds['heights'][key] = root[beam]['heights'][key][:]

        geolocation_keys = ['ref_elev', 'ph_index_beg', 'segment_id', 'segment_ph_cnt', 'segment_dist_x', 'segment_length']
        # -- ICESat-2 Geolocation Group
        for key in geolocation_keys:
            atl03_mds['geolocation'][key] = root[beam]['geolocation'][key][:]

    h5_store.close()
    return atl03_mds

使用h5py

import os
import h5py
import re

def read_hdf5_atl03_beam_h5py(filename, beam, verbose=False):
    """
    ATL03 原始数据读取
    Args:
        filename (str): h5文件路径
        beam (str): 光束
        verbose (bool): 输出HDF5信息

    Returns:
        返回ATL03光子数据的heights和geolocation信息
    """

    # 打开HDF5文件进行读取
    file_id = h5py.File(os.path.expanduser(filename), 'r')

    # 输出HDF5文件信息
    if verbose:
        print(file_id.filename)
        print(list(file_id.keys()))
        print(list(file_id['METADATA'].keys()))

    # 为ICESat-2 ATL03变量和属性分配python字典
    atl03_mds = {}

    # 读取文件中每个输入光束
    beams = [k for k in file_id.keys() if bool(re.match('gt\\d[lr]', k))]
    if beam not in beams:
        print('请填入正确的光束代码')
        return

    atl03_mds['heights'] = {}
    atl03_mds['geolocation'] = {}
    atl03_mds['bckgrd_atlas'] = {}

    # -- 获取每个HDF5变量
    # -- ICESat-2 Measurement Group
    for key, val in file_id[beam]['heights'].items():
        atl03_mds['heights'][key] = val[:]

    # -- ICESat-2 Geolocation Group
    for key, val in file_id[beam]['geolocation'].items():
        atl03_mds['geolocation'][key] = val[:]

    for key, val in file_id[beam]['bckgrd_atlas'].items():
        atl03_mds['bckgrd_atlas'][key] = val[:]

    return atl03_mds

重建沿轨道距离

在读取ICESat-2 ATL03数据后,我们需要根据分段信息重建每个光子的沿轨道距离,已知数据如下:

  • ATL03每个分段内的光子都有一个沿轨道距离(dist_ph_along),该距离始于当前分段。

  • ATL03每个分段有一个沿轨道距离(segment_dist_x),该距离始于赤道交叉口,即真正的沿轨道距离。

  • 当前分段内每个光子的相对沿轨道距离 + 当前分段的沿轨道距离 = 每个光子的真正沿轨道距离。

代码如下:

import numpy as np


def get_atl03_x_atc(atl03_mds):
    val = atl03_mds

    # 初始化
    val['heights']['x_atc'] = np.zeros_like(val['heights']['h_ph']) + np.NaN
    val['heights']['y_atc'] = np.zeros_like(val['heights']['h_ph']) + np.NaN
    val['geolocation']['ref_elev_all'] = np.zeros_like(val['heights']['h_ph'])

    # -- ATL03 Segment ID
    segment_id = val['geolocation']['segment_id']
    # -- 分段中的第一个光子(转换为基于0的索引)
    segment_index_begin = val['geolocation']['ph_index_beg'] - 1
    # -- 分段中的光子事件数
    segment_pe_count = val['geolocation']['segment_ph_cnt']
    # -- 每个ATL03段的沿轨道距离
    segment_distance = val['geolocation']['segment_dist_x']
    # -- 每个ATL03段的轨道长度
    segment_length = val['geolocation']['segment_length']

    # -- 对ATL03段进行迭代,以计算40m的平均值
    # -- 在ATL03中基于1的索引:无效==0
    # -- 此处为基于0的索引:无效==-1
    segment_indices, = np.nonzero((segment_index_begin[:-1] >= 0) &
                                  (segment_index_begin[1:] >= 0))
    for j in segment_indices:
        # -- j 段索引
        idx = segment_index_begin[j]
        # -- 分段中的光子数(使用2个ATL03分段)
        c1 = np.copy(segment_pe_count[j])
        c2 = np.copy(segment_pe_count[j + 1])
        cnt = c1 + c2

        # -- 沿轨道和跨轨道距离
        # -- 获取当前段光子列表,idx当前段(j)第一个光子数量,c1当前段光子数量,idx+c1当前段长度
        distance_along_x = np.copy(val['heights']['dist_ph_along'][idx: idx + cnt])
        ref_elev = np.copy(val['geolocation']['ref_elev'][j])
        # -- 给当前段的光子加上当前段沿轨道距离
        distance_along_x[:c1] += segment_distance[j]
        distance_along_x[c1:] += segment_distance[j + 1]
        distance_along_y = np.copy(val['heights']['dist_ph_across'][idx: idx + cnt])

        val['heights']['x_atc'][idx: idx + cnt] = distance_along_x
        val['heights']['y_atc'][idx: idx + cnt] = distance_along_y
        val['geolocation']['ref_elev_all'][idx: idx + c1] += ref_elev

ATL03数据截取

在处理ATL03时,我们一般都会获取经过研究区域内的光子数据,因此需要对数据进行截取操作,代码如下:

from glob import glob

from readers.get_ATL03_x_atc import get_atl03_x_atc
from readers.read_HDF5_ATL03 import read_hdf5_atl03_beam, read_hdf5_atl03_coordinate


def read_data(filepath, beam, mask_lat, mask_lon):
    """
    读取数据,返回沿轨道距离和高程距离
    :param filepath: h5文件路径
    :param beam: 轨道光束
    :param mask_lat: 维度范围
    :param mask_lon: 经度范围
    :return:
    """
    atl03_file = glob(filepath)
    is2_atl03_mds = read_hdf5_atl03_beam(atl03_file[0], beam=beam, verbose=False)
    # 添加沿轨道距离到数据中
    get_atl03_x_atc(is2_atl03_mds)
    # 选择范围
    d3 = is2_atl03_mds
    subset1 = (d3['heights']['lat_ph'] >= min(mask_lat)) & (d3['heights']['lat_ph'] <= max(mask_lat))
    if mask_lon is not None:
        if mask_lon[0] is not None and mask_lon[1] is None:
            subset1 = subset1 & (d3['heights']['x_atc'] >= mask_lon[0])
        elif mask_lon[0] is None and mask_lon[1] is not None:
            subset1 = subset1 & (d3['heights']['x_atc'] <= mask_lon[1])
        else:
            subset1 = subset1 & (d3['heights']['x_atc'] >= min(mask_lon)) & (d3['heights']['x_atc'] <= max(mask_lon))
    x_act = d3['heights']['x_atc'][subset1]
    h = d3['heights']['h_ph'][subset1]
    signal_conf_ph = d3['heights']['signal_conf_ph'][subset1]
    lat = d3['heights']['lat_ph'][subset1]
    lon = d3['heights']['lon_ph'][subset1]
    ref_elev = d3['geolocation']['ref_elev_all'][subset1]

    del d3, subset1
    return x_act, h, signal_conf_ph, lat, lon, ref_elev


def read_all_beam_coordinate(filepath, mask_lat, mask_lon):
    """
    读取所有波束的数据
    :param filepath:
    :param mask_lat:
    :param mask_lon:
    :return:
    """
    atl03_file = glob(filepath)
    is2_atl03_mds = read_hdf5_atl03_coordinate(atl03_file[0])

    # 禁止加载全部数据
    # if mask_lat is None or len(mask_lat) == 0 or mask_lon is None or len(mask_lon) == 0:
    #     return False

    d3 = is2_atl03_mds
    if mask_lon is None and mask_lat is None:
        # 加载全部数据
        return d3
    for beam in is2_atl03_mds.keys():
        subset1 = (d3[beam]['lat'] >= min(mask_lat)) & (d3[beam]['lat'] <= max(mask_lat))
        subset1 = subset1 & (d3[beam]['lon'] >= min(mask_lon)) & (d3[beam]['lon'] <= max(mask_lon))
        d3[beam]['lat'] = d3[beam]['lat'][subset1]
        d3[beam]['lon'] = d3[beam]['lon'][subset1]
    return d3

数据可视化

使用沿轨道距离和高程数据绘制散点图,示例代码如下:

def save2file(act, h, conf, lat, lon):
    """
    保存研究区域的一下数据
    :param act: act,沿轨道距离
    :param h: h,高程
    :param conf: 置信度
    :param lat: 维度
    :param lon: 经度
    """
    points = list(zip(act, h, lat, lon, conf))
    data = pd.DataFrame(points, columns=['沿轨道距离', '高程', '维度', '经度', '置信度'])
    data.to_csv('result/points_origin.csv', mode='w', index=False)


if __name__ == '__main__':
    filepath = r'D:\Users\SongW\Downloads\ATL03_20190222135159_08570207_005_01.h5'
    beam = 'gt3l'
    mask_lat = [16.533, 16.550]
    act, h, conf, lat, lon, ref_elev = read_data(filepath, beam, mask_lat, None)
    save2file(act, h, conf, lat, lon)
    plt.scatter(act, h)
    plt.show()

输出图像如下:

项目源码

sx-code - icesat-2-atl03 (github.com)

热门相关:前夫有毒:1000万夺子契约   甜蜜婚令:陆少的医神娇妻   万界点名册   剑道邪尊Ⅱ   明天和意外