diff --git a/modules/arrow/package.json b/modules/arrow/package.json index 88994138d3..28c7ed826d 100644 --- a/modules/arrow/package.json +++ b/modules/arrow/package.json @@ -47,6 +47,7 @@ }, "dependencies": { "@loaders.gl/loader-utils": "4.0.0-beta.8", + "@loaders.gl/gis": "4.0.0-beta.8", "@loaders.gl/schema": "4.0.0-beta.8", "apache-arrow": "^13.0.0" }, diff --git a/modules/arrow/src/arrow-loader.ts b/modules/arrow/src/arrow-loader.ts index 4554ec670a..e755ba8e97 100644 --- a/modules/arrow/src/arrow-loader.ts +++ b/modules/arrow/src/arrow-loader.ts @@ -1,5 +1,6 @@ // loaders.gl, MIT license // Copyright (c) vis.gl contributors + import type {Loader, LoaderOptions} from '@loaders.gl/loader-utils'; import type {ArrowTable} from './lib/arrow-table'; diff --git a/modules/arrow/src/arrow-writer.ts b/modules/arrow/src/arrow-writer.ts index 7ffa8a65e8..fd65f44dbb 100644 --- a/modules/arrow/src/arrow-writer.ts +++ b/modules/arrow/src/arrow-writer.ts @@ -1,4 +1,5 @@ // import type {} from '@loaders.gl/loader-utils'; + import type {Writer, WriterOptions} from '@loaders.gl/loader-utils'; import {ColumnarTable} from './lib/encode-arrow'; import {encodeArrowSync} from './lib/encode-arrow'; diff --git a/modules/arrow/src/geoarrow/convert-geoarrow-to-binary-geometry.ts b/modules/arrow/src/geoarrow/convert-geoarrow-to-binary-geometry.ts new file mode 100644 index 0000000000..f106dbb310 --- /dev/null +++ b/modules/arrow/src/geoarrow/convert-geoarrow-to-binary-geometry.ts @@ -0,0 +1,262 @@ +// loaders.gl, MIT license +// Copyright (c) vis.gl contributors + +import {Data, Vector} from 'apache-arrow'; +import {BinaryFeatureCollection as BinaryFeatures} from '@loaders.gl/schema'; +import {GeoArrowEncoding} from '@loaders.gl/gis'; +import {updateBoundsFromGeoArrowSamples} from './get-arrow-bounds'; + +/** + * Binary data from geoarrow column and can be used by e.g. deck.gl GeojsonLayer + */ +export type BinaryDataFromGeoArrow = { + binaryGeometries: BinaryFeatures[]; + bounds: [number, number, number, number]; + featureTypes: {polygon: boolean; point: boolean; line: boolean}; +}; + +type BinaryGeometryContent = { + featureIds: Uint32Array; + flatCoordinateArray: Float64Array; + nDim: number; + geomOffset: Int32Array; + geometryIndicies: Uint16Array; +}; + +// binary geometry template, see deck.gl BinaryGeometry +const BINARY_GEOMETRY_TEMPLATE = { + globalFeatureIds: {value: new Uint32Array(0), size: 1}, + positions: {value: new Float32Array(0), size: 2}, + properties: [], + numericProps: {}, + featureIds: {value: new Uint32Array(0), size: 1} +}; + +/** + * get binary geometries from geoarrow column + * + * @param geoColumn the geoarrow column, e.g. arrowTable.getChildAt(geoColumnIndex) + * @param geoEncoding the geo encoding of the geoarrow column, e.g. getGeoArrowEncoding(arrowTable.schema, geoColumnName) + * @returns BinaryDataFromGeoArrow + */ +export function getBinaryGeometriesFromArrow( + geoColumn: Vector, + geoEncoding: GeoArrowEncoding +): BinaryDataFromGeoArrow { + const featureTypes = { + polygon: geoEncoding === 'geoarrow.multipolygon' || geoEncoding === 'geoarrow.polygon', + point: geoEncoding === 'geoarrow.multipoint' || geoEncoding === 'geoarrow.point', + line: geoEncoding === 'geoarrow.multilinestring' || geoEncoding === 'geoarrow.linestring' + }; + + const chunks = geoColumn.data; + const bounds: [number, number, number, number] = [Infinity, Infinity, -Infinity, -Infinity]; + let globalFeatureIdOffset = 0; + const binaryGeometries: BinaryFeatures[] = []; + + for (let c = 0; c < chunks.length; c++) { + const geometries = chunks[c]; + const {featureIds, flatCoordinateArray, nDim, geomOffset} = getBinaryGeometriesFromChunk( + geometries, + geoEncoding + ); + + const numOfVertices = flatCoordinateArray.length / nDim; + const globalFeatureIds = new Uint32Array(numOfVertices); + for (let i = 0; i < numOfVertices; i++) { + globalFeatureIds[i] = featureIds[i] + globalFeatureIdOffset; + } + + const binaryContent = { + globalFeatureIds: {value: globalFeatureIds, size: 1}, + positions: { + value: flatCoordinateArray, + size: nDim + }, + featureIds: {value: featureIds, size: 1}, + // eslint-disable-next-line no-loop-func + properties: [...Array(geometries.length).keys()].map((i) => ({ + index: i + globalFeatureIdOffset + })) + }; + + // TODO: check if chunks are sequentially accessed + globalFeatureIdOffset += geometries.length; + + // NOTE: deck.gl defines the BinaryFeatures structure must have points, lines, polygons even if they are empty + binaryGeometries.push({ + shape: 'binary-feature-collection', + points: { + type: 'Point', + ...BINARY_GEOMETRY_TEMPLATE, + ...(featureTypes.point ? binaryContent : {}) + }, + lines: { + type: 'LineString', + ...BINARY_GEOMETRY_TEMPLATE, + ...(featureTypes.line ? binaryContent : {}), + pathIndices: {value: featureTypes.line ? geomOffset : new Uint16Array(0), size: 1} + }, + polygons: { + type: 'Polygon', + ...BINARY_GEOMETRY_TEMPLATE, + ...(featureTypes.polygon ? binaryContent : {}), + polygonIndices: { + // TODO why deck.gl's tessellatePolygon performance is not good when using geometryIndicies + // even when there is no hole in any polygon + value: featureTypes.polygon ? geomOffset : new Uint16Array(0), + size: 1 + }, + primitivePolygonIndices: { + value: featureTypes.polygon ? geomOffset : new Uint16Array(0), + size: 1 + } + } + }); + + updateBoundsFromGeoArrowSamples(flatCoordinateArray, nDim, bounds); + } + + return {binaryGeometries, bounds, featureTypes}; +} + +/** + * get binary geometries from geoarrow column + * @param chunk one chunk/batch of geoarrow column + * @param geoEncoding geo encoding of the geoarrow column + * @returns BinaryGeometryContent + */ +function getBinaryGeometriesFromChunk( + chunk: Data, + geoEncoding: GeoArrowEncoding +): BinaryGeometryContent { + switch (geoEncoding) { + case 'geoarrow.point': + case 'geoarrow.multipoint': + return getBinaryPointsFromChunk(chunk, geoEncoding); + case 'geoarrow.linestring': + case 'geoarrow.multilinestring': + return getBinaryLinesFromChunk(chunk, geoEncoding); + case 'geoarrow.polygon': + case 'geoarrow.multipolygon': + return getBinaryPolygonsFromChunk(chunk, geoEncoding); + default: + throw Error('invalid geoarrow encoding'); + } +} + +/** + * get binary polygons from geoarrow polygon column + * @param chunk one chunk of geoarrow polygon column + * @param geoEncoding the geo encoding of the geoarrow polygon column + * @returns BinaryGeometryContent + */ +function getBinaryPolygonsFromChunk(chunk: Data, geoEncoding: string): BinaryGeometryContent { + const isMultiPolygon = geoEncoding === 'geoarrow.multipolygon'; + + const polygonData = isMultiPolygon ? chunk.children[0] : chunk; + const ringData = polygonData.children[0]; + const pointData = ringData.children[0]; + const coordData = pointData.children[0]; + const nDim = pointData.stride; + const geomOffset = ringData.valueOffsets; + const flatCoordinateArray = coordData.values; + + const geometryIndicies = new Uint16Array(chunk.length + 1); + for (let i = 0; i < chunk.length; i++) { + geometryIndicies[i] = geomOffset[chunk.valueOffsets[i]]; + } + geometryIndicies[chunk.length] = flatCoordinateArray.length / nDim; + + const numOfVertices = flatCoordinateArray.length / nDim; + const featureIds = new Uint32Array(numOfVertices); + for (let i = 0; i < chunk.length - 1; i++) { + const startIdx = geomOffset[chunk.valueOffsets[i]]; + const endIdx = geomOffset[chunk.valueOffsets[i + 1]]; + for (let j = startIdx; j < endIdx; j++) { + featureIds[j] = i; + } + } + + return { + featureIds, + flatCoordinateArray, + nDim, + geomOffset, + geometryIndicies + }; +} + +/** + * get binary lines from geoarrow line column + * @param chunk one chunk/batch of geoarrow column + * @param geoEncoding the geo encoding of the geoarrow column + * @returns BinaryGeometryContent + */ +function getBinaryLinesFromChunk(chunk: Data, geoEncoding: string): BinaryGeometryContent { + const isMultiLineString = geoEncoding === 'geoarrow.multilinestring'; + + const lineData = isMultiLineString ? chunk.children[0] : chunk; + const pointData = lineData.children[0]; + const coordData = pointData.children[0]; + + const nDim = pointData.stride; + const geomOffset = lineData.valueOffsets; + const flatCoordinateArray = coordData.values; + + // geometryIndicies is not needed for line string + const geometryIndicies = new Uint16Array(0); + + const numOfVertices = flatCoordinateArray.length / nDim; + const featureIds = new Uint32Array(numOfVertices); + for (let i = 0; i < chunk.length; i++) { + const startIdx = geomOffset[i]; + const endIdx = geomOffset[i + 1]; + for (let j = startIdx; j < endIdx; j++) { + featureIds[j] = i; + } + } + + return { + featureIds, + flatCoordinateArray, + nDim, + geomOffset, + geometryIndicies + }; +} + +/** + * get binary points from geoarrow point column + * @param chunk one chunk/batch of geoarrow column + * @param geoEncoding geo encoding of the geoarrow column + * @returns BinaryGeometryContent + */ +function getBinaryPointsFromChunk(chunk: Data, geoEncoding: string): BinaryGeometryContent { + const isMultiPoint = geoEncoding === 'geoarrow.multipoint'; + + const pointData = isMultiPoint ? chunk.children[0] : chunk; + const coordData = pointData.children[0]; + + const nDim = pointData.stride; + const flatCoordinateArray = coordData.values; + + // geometryIndices is not needed for point + const geometryIndicies = new Uint16Array(0); + // geomOffset is not needed for point + const geomOffset = new Int32Array(0); + + const numOfVertices = flatCoordinateArray.length / nDim; + const featureIds = new Uint32Array(numOfVertices); + for (let i = 0; i < chunk.length; i++) { + featureIds[i] = i; + } + + return { + featureIds, + flatCoordinateArray, + nDim, + geomOffset, + geometryIndicies + }; +} diff --git a/modules/arrow/src/geoarrow/convert-geoarrow-to-geojson.ts b/modules/arrow/src/geoarrow/convert-geoarrow-to-geojson.ts new file mode 100644 index 0000000000..0e86d63d6a --- /dev/null +++ b/modules/arrow/src/geoarrow/convert-geoarrow-to-geojson.ts @@ -0,0 +1,192 @@ +// loaders.gl, MIT license +// Copyright (c) vis.gl contributors + +import {Vector} from 'apache-arrow'; +import { + Feature, + MultiPolygon, + Position, + Polygon, + MultiPoint, + Point, + MultiLineString, + LineString +} from '@loaders.gl/schema'; +import type {GeoArrowEncoding} from '@loaders.gl/gis'; + +type RawArrowFeature = { + encoding?: GeoArrowEncoding; + data: any; +}; + +/** + * parse geometry from arrow data that is returned from processArrowData() + * NOTE: this function could be duplicated with the binaryToFeature() in deck.gl, + * it is currently only used for picking because currently deck.gl returns only the index of the feature + * So the following functions could be deprecated once deck.gl returns the feature directly for binary geojson layer + * + * @param rawData the raw geometry data returned from processArrowData, which is an object with two properties: encoding and data + * @see processArrowData + * @returns Feature or null + */ +export function parseGeometryFromArrow(rawData: RawArrowFeature): Feature | null { + const encoding = rawData.encoding?.toLowerCase(); + const data = rawData.data; + if (!encoding || !data) { + return null; + } + + let geometry; + + switch (encoding) { + case 'geoarrow.multipolygon': + geometry = arrowMultiPolygonToFeature(data); + break; + case 'geoarrow.polygon': + geometry = arrowPolygonToFeature(data); + break; + case 'geoarrow.multipoint': + geometry = arrowMultiPointToFeature(data); + break; + case 'geoarrow.point': + geometry = arrowPointToFeature(data); + break; + case 'geoarrow.multilinestring': + geometry = arrowMultiLineStringToFeature(data); + break; + case 'geoarrow.linestring': + geometry = arrowLineStringToFeature(data); + break; + default: { + throw Error(`GeoArrow encoding not supported ${encoding}`); + } + } + return { + type: 'Feature', + geometry, + properties: {} + }; +} + +/** + * convert Arrow MultiPolygon to geojson Feature + */ +function arrowMultiPolygonToFeature(arrowMultiPolygon: Vector): MultiPolygon { + const multiPolygon: Position[][][] = []; + for (let m = 0; m < arrowMultiPolygon.length; m++) { + const arrowPolygon = arrowMultiPolygon.get(m); + const polygon: Position[][] = []; + for (let i = 0; arrowPolygon && i < arrowPolygon?.length; i++) { + const arrowRing = arrowPolygon?.get(i); + const ring: Position[] = []; + for (let j = 0; arrowRing && j < arrowRing.length; j++) { + const arrowCoord = arrowRing.get(j); + const coord: Position = Array.from(arrowCoord); + ring.push(coord); + } + polygon.push(ring); + } + multiPolygon.push(polygon); + } + const geometry: MultiPolygon = { + type: 'MultiPolygon', + coordinates: multiPolygon + }; + return geometry; +} + +/** + * convert Arrow Polygon to geojson Feature + */ +function arrowPolygonToFeature(arrowPolygon: Vector): Polygon { + const polygon: Position[][] = []; + for (let i = 0; arrowPolygon && i < arrowPolygon.length; i++) { + const arrowRing = arrowPolygon.get(i); + const ring: Position[] = []; + for (let j = 0; arrowRing && j < arrowRing.length; j++) { + const arrowCoord = arrowRing.get(j); + const coords: Position = Array.from(arrowCoord); + ring.push(coords); + } + polygon.push(ring); + } + const geometry: Polygon = { + type: 'Polygon', + coordinates: polygon + }; + return geometry; +} + +/** + * convert Arrow MultiPoint to geojson MultiPoint + */ +function arrowMultiPointToFeature(arrowMultiPoint: Vector): MultiPoint { + const multiPoint: Position[] = []; + for (let i = 0; arrowMultiPoint && i < arrowMultiPoint.length; i++) { + const arrowPoint = arrowMultiPoint.get(i); + if (arrowPoint) { + const coord: Position = Array.from(arrowPoint); + multiPoint.push(coord); + } + } + const geometry: MultiPoint = { + type: 'MultiPoint', + coordinates: multiPoint + }; + return geometry; +} + +/** + * convert Arrow Point to geojson Point + */ +function arrowPointToFeature(arrowPoint: Vector): Point { + const point: Position = Array.from(arrowPoint); + const geometry: Point = { + type: 'Point', + coordinates: point + }; + return geometry; +} + +/** + * convert Arrow MultiLineString to geojson MultiLineString + */ +function arrowMultiLineStringToFeature(arrowMultiLineString: Vector): MultiLineString { + const multiLineString: Position[][] = []; + for (let i = 0; arrowMultiLineString && i < arrowMultiLineString.length; i++) { + const arrowLineString = arrowMultiLineString.get(i); + const lineString: Position[] = []; + for (let j = 0; arrowLineString && j < arrowLineString.length; j++) { + const arrowCoord = arrowLineString.get(j); + if (arrowCoord) { + const coords: Position = Array.from(arrowCoord); + lineString.push(coords); + } + } + multiLineString.push(lineString); + } + const geometry: MultiLineString = { + type: 'MultiLineString', + coordinates: multiLineString + }; + return geometry; +} + +/** + * convert Arrow LineString to geojson LineString + */ +function arrowLineStringToFeature(arrowLineString: Vector): LineString { + const lineString: Position[] = []; + for (let i = 0; arrowLineString && i < arrowLineString.length; i++) { + const arrowCoord = arrowLineString.get(i); + if (arrowCoord) { + const coords: Position = Array.from(arrowCoord); + lineString.push(coords); + } + } + const geometry: LineString = { + type: 'LineString', + coordinates: lineString + }; + return geometry; +} diff --git a/modules/arrow/src/geoarrow/get-arrow-bounds.ts b/modules/arrow/src/geoarrow/get-arrow-bounds.ts new file mode 100644 index 0000000000..a94df028e6 --- /dev/null +++ b/modules/arrow/src/geoarrow/get-arrow-bounds.ts @@ -0,0 +1,41 @@ +// loaders.gl, MIT license +// Copyright (c) vis.gl contributors + +/** + * Update bounds from geoarrow sample data + * + * @param flatCoords the flattend coordinates array from one chunk of geoarrow column + * @param nDim the number of dimensions of the coordinates + * @param bounds the bounds to be updated + * @param sampleSize how many samples to be used to update the bounds, default is 1000 per chunk + * @returns the updated bounds + */ +export function updateBoundsFromGeoArrowSamples( + flatCoords: Float64Array, + nDim: number, + bounds: [number, number, number, number], + sampleSize: number = 100 +): [number, number, number, number] { + const numberOfFeatures = flatCoords.length / nDim; + const sampleStep = Math.max(Math.floor(numberOfFeatures / sampleSize), 1); + + const newBounds: [number, number, number, number] = [...bounds]; + for (let i = 0; i < numberOfFeatures; i += sampleStep) { + const lng = flatCoords[i * nDim]; + const lat = flatCoords[i * nDim + 1]; + if (lng < bounds[0]) { + newBounds[0] = lng; + } + if (lat < newBounds[1]) { + newBounds[1] = lat; + } + if (lng > newBounds[2]) { + newBounds[2] = lng; + } + if (lat > newBounds[3]) { + newBounds[3] = lat; + } + } + + return newBounds; +} diff --git a/modules/arrow/src/index.ts b/modules/arrow/src/index.ts index b9d95915df..304f43388c 100644 --- a/modules/arrow/src/index.ts +++ b/modules/arrow/src/index.ts @@ -1,4 +1,5 @@ -// loaders.gl, MIT +// loaders.gl, MIT license +// Copyright (c) vis.gl contributors import type {LoaderWithParser} from '@loaders.gl/loader-utils'; import type {ArrowLoaderOptions} from './arrow-loader'; @@ -12,12 +13,25 @@ import {parseArrowInBatches} from './lib/parse-arrow-in-batches'; import {ArrowTableBatchAggregator} from './lib/arrow-table-batch'; -// SCHEMA +// Make the ArrowBatch type available +TableBatchBuilder.ArrowBatch = ArrowTableBatchAggregator; + +// TYPES export {getArrowType} from './schema/arrow-type-utils'; -// Make the ArrowBatch type available -TableBatchBuilder.ArrowBatch = ArrowTableBatchAggregator; +// SCHEMA + +export { + serializeArrowSchema, + deserializeArrowSchema, + serializeArrowMetadata, + deserializeArrowMetadata, + serializeArrowField, + deserializeArrowField, + serializeArrowType, + deserializeArrowType +} from './schema/convert-arrow-schema'; // Types export type {ArrowTable, ArrowTableBatch} from './lib/arrow-table'; @@ -44,3 +58,13 @@ export const ArrowLoader: LoaderWithParser< parseSync, parseInBatches: parseArrowInBatches }; + +// Arrow Utils +export type {GeoArrowEncoding} from '@loaders.gl/gis'; +// getGeometryColumnsFromArrowTable, +// getGeoArrowEncoding + +export type {BinaryDataFromGeoArrow} from './geoarrow/convert-geoarrow-to-binary-geometry'; +export {getBinaryGeometriesFromArrow} from './geoarrow/convert-geoarrow-to-binary-geometry'; + +export {parseGeometryFromArrow} from './geoarrow/convert-geoarrow-to-geojson'; diff --git a/modules/arrow/src/schema/convert-arrow-schema.ts b/modules/arrow/src/schema/convert-arrow-schema.ts index 18fc4210ca..a8c55656ec 100644 --- a/modules/arrow/src/schema/convert-arrow-schema.ts +++ b/modules/arrow/src/schema/convert-arrow-schema.ts @@ -9,7 +9,7 @@ import { Null, Binary, Bool, - // Int, + Int, Int8, Int16, Int32, @@ -18,27 +18,34 @@ import { Uint16, Uint32, Uint64, - // Float, + Float, Float16, Float32, Float64, + Precision, Utf8, - // Date, + Date_, + DateUnit, DateDay, DateMillisecond, - // Time, + Time, TimeMillisecond, TimeSecond, - // Timestamp, + Timestamp, TimestampSecond, TimestampMillisecond, TimestampMicrosecond, TimestampNanosecond, - // Interval, + Interval, + IntervalUnit, IntervalDayTime, IntervalYearMonth, FixedSizeList, - Struct + Struct, + TimeUnit, + TimeMicrosecond, + TimeNanosecond, + List } from 'apache-arrow'; /** Convert Apache Arrow Schema (class instance) to a serialized Schema (plain data) */ @@ -97,7 +104,9 @@ export function serializeArrowType(arrowType: ArrowDataType): DataType { return 'binary'; case Bool: return 'bool'; - // case Int: return 'int'; + case Int: + const intType = arrowType as Int; + return `${intType.isSigned ? 'u' : ''}int${intType.bitWidth}`; case Int8: return 'int8'; case Int16: @@ -114,7 +123,19 @@ export function serializeArrowType(arrowType: ArrowDataType): DataType { return 'uint32'; case Uint64: return 'uint64'; - // case Float: return 'float'; + case Float: + const precision = (arrowType as Float).precision; + // return `float(precision + 1) * 16`; + switch (precision) { + case Precision.HALF: + return 'float16'; + case Precision.SINGLE: + return 'float32'; + case Precision.DOUBLE: + return 'float64'; + default: + return 'float16'; + } case Float16: return 'float16'; case Float32: @@ -123,17 +144,49 @@ export function serializeArrowType(arrowType: ArrowDataType): DataType { return 'float64'; case Utf8: return 'utf8'; - // case Date: return 'date'; + case Date: + const dateUnit = (arrowType as Date_).unit; + return dateUnit === DateUnit.DAY ? 'date-day' : 'date-millisecond'; case DateDay: return 'date-day'; case DateMillisecond: return 'date-millisecond'; - // case Time: return 'time'; + case Time: + const timeUnit = (arrowType as Time).unit; + switch (timeUnit) { + case TimeUnit.SECOND: + return 'time-second'; + case TimeUnit.MILLISECOND: + return 'time-millisecond'; + case TimeUnit.MICROSECOND: + return 'time-microsecond'; + case TimeUnit.NANOSECOND: + return 'time-nanosecond'; + default: + return 'time-second'; + } case TimeMillisecond: return 'time-millisecond'; case TimeSecond: return 'time-second'; - // case Timestamp: return 'timestamp'; + case TimeMicrosecond: + return 'time-microsecond'; + case TimeNanosecond: + return 'time-nanosecond'; + case Timestamp: + const timeStampUnit = (arrowType as Timestamp).unit; + switch (timeStampUnit) { + case TimeUnit.SECOND: + return 'timestamp-second'; + case TimeUnit.MILLISECOND: + return 'timestamp-millisecond'; + case TimeUnit.MICROSECOND: + return 'timestamp-microsecond'; + case TimeUnit.NANOSECOND: + return 'timestamp-nanosecond'; + default: + return 'timestamp-second'; + } case TimestampSecond: return 'timestamp-second'; case TimestampMillisecond: @@ -142,11 +195,27 @@ export function serializeArrowType(arrowType: ArrowDataType): DataType { return 'timestamp-microsecond'; case TimestampNanosecond: return 'timestamp-nanosecond'; - // case Interval: return 'interval'; + case Interval: + const intervalUnit = (arrowType as Interval).unit; + switch (intervalUnit) { + case IntervalUnit.DAY_TIME: + return 'interval-daytime'; + case IntervalUnit.YEAR_MONTH: + return 'interval-yearmonth'; + default: + return 'interval-daytime'; + } case IntervalDayTime: return 'interval-daytime'; case IntervalYearMonth: return 'interval-yearmonth'; + case List: + const listType = arrowType as List; + const listField = listType.valueField; + return { + type: 'list', + children: [serializeArrowField(listField)] + }; case FixedSizeList: return { type: 'fixed-size-list', @@ -165,6 +234,9 @@ export function serializeArrowType(arrowType: ArrowDataType): DataType { export function deserializeArrowType(dataType: DataType): ArrowDataType { if (typeof dataType === 'object') { switch (dataType.type) { + case 'list': + const field = deserializeArrowField(dataType.children[0]); + return new List(field); case 'fixed-size-list': const child = deserializeArrowField(dataType.children[0]); return new FixedSizeList(dataType.listSize, child); @@ -183,7 +255,6 @@ export function deserializeArrowType(dataType: DataType): ArrowDataType { return new Binary(); case 'bool': return new Bool(); - // case 'int': return new Int(); case 'int8': return new Int8(); case 'int16': @@ -200,7 +271,6 @@ export function deserializeArrowType(dataType: DataType): ArrowDataType { return new Uint32(); case 'uint64': return new Uint64(); - // case 'float': return new Float(); case 'float16': return new Float16(); case 'float32': @@ -209,17 +279,18 @@ export function deserializeArrowType(dataType: DataType): ArrowDataType { return new Float64(); case 'utf8': return new Utf8(); - // case 'date': return new Date(); case 'date-day': return new DateDay(); case 'date-millisecond': return new DateMillisecond(); - // case 'time': return new Time(); - case 'time-millisecond': - return new TimeMillisecond(); case 'time-second': return new TimeSecond(); - // case 'timestamp': return new Timestamp(); + case 'time-millisecond': + return new TimeMillisecond(); + case 'time-microsecond': + return new TimeMicrosecond(); + case 'time-nanosecond': + return new TimeNanosecond(); case 'timestamp-second': return new TimestampSecond(); case 'timestamp-millisecond': @@ -228,7 +299,6 @@ export function deserializeArrowType(dataType: DataType): ArrowDataType { return new TimestampMicrosecond(); case 'timestamp-nanosecond': return new TimestampNanosecond(); - // case 'interval': return new Interval(); case 'interval-daytime': return new IntervalDayTime(); case 'interval-yearmonth': diff --git a/modules/arrow/src/tables/convert-arrow-to-table.ts b/modules/arrow/src/tables/convert-arrow-to-table.ts index e5b12ac46d..675e0aa0e7 100644 --- a/modules/arrow/src/tables/convert-arrow-to-table.ts +++ b/modules/arrow/src/tables/convert-arrow-to-table.ts @@ -1,5 +1,6 @@ // loaders.gl, MIT license // Copyright (c) vis.gl contributors + import type {ColumnarTable, ObjectRowTable} from '@loaders.gl/schema'; import type {Table as ApacheArrowTable} from 'apache-arrow'; import type {ArrowTable} from '../lib/arrow-table'; diff --git a/modules/arrow/src/types.ts b/modules/arrow/src/types.ts index 3915c012ce..eda3750981 100644 --- a/modules/arrow/src/types.ts +++ b/modules/arrow/src/types.ts @@ -1,3 +1,6 @@ +// loaders.gl, MIT license +// Copyright (c) vis.gl contributors + type TypedIntArray = Int8Array | Uint8Array | Int16Array | Uint16Array | Int32Array | Uint32Array; type TypedFloatArray = Float32Array | Float64Array; diff --git a/modules/arrow/src/workers/arrow-worker.ts b/modules/arrow/src/workers/arrow-worker.ts index ef229f3e84..65dbc761c8 100644 --- a/modules/arrow/src/workers/arrow-worker.ts +++ b/modules/arrow/src/workers/arrow-worker.ts @@ -1,3 +1,6 @@ +// loaders.gl, MIT license +// Copyright (c) vis.gl contributors + import {createLoaderWorker} from '@loaders.gl/loader-utils'; import {ArrowLoader} from '../index'; diff --git a/modules/arrow/test/arrow-loader.spec.ts b/modules/arrow/test/arrow-loader.spec.ts index 174f28970e..d25bf13b0d 100644 --- a/modules/arrow/test/arrow-loader.spec.ts +++ b/modules/arrow/test/arrow-loader.spec.ts @@ -1,3 +1,6 @@ +// loaders.gl, MIT license +// Copyright (c) vis.gl contributors + import test from 'tape-promise/tape'; import {validateLoader} from 'test/common/conformance'; diff --git a/modules/arrow/test/arrow-writer.spec.ts b/modules/arrow/test/arrow-writer.spec.ts index ca5f1acd6f..7f801a97dd 100644 --- a/modules/arrow/test/arrow-writer.spec.ts +++ b/modules/arrow/test/arrow-writer.spec.ts @@ -1,3 +1,6 @@ +// loaders.gl, MIT license +// Copyright (c) vis.gl contributors + import test from 'tape-promise/tape'; import {validateWriter} from 'test/common/conformance'; diff --git a/modules/arrow/test/geoarrow/convert-geoarrow-to-geojson.spec.ts b/modules/arrow/test/geoarrow/convert-geoarrow-to-geojson.spec.ts new file mode 100644 index 0000000000..24e97e8613 --- /dev/null +++ b/modules/arrow/test/geoarrow/convert-geoarrow-to-geojson.spec.ts @@ -0,0 +1,248 @@ +import test, {Test} from 'tape-promise/tape'; + +import {tableFromIPC} from 'apache-arrow'; +import {fetchFile} from '@loaders.gl/core'; +import {serializeArrowSchema, parseGeometryFromArrow} from '@loaders.gl/arrow'; +import {getGeometryColumnsFromSchema} from '@loaders.gl/gis'; + +const POINT_ARROW_FILE = '@loaders.gl/arrow/test/data/point.arrow'; +const MULTIPOINT_ARROW_FILE = '@loaders.gl/arrow/test/data/multipoint.arrow'; +const LINE_ARROW_FILE = '@loaders.gl/arrow/test/data/line.arrow'; +const MULTILINE_ARROW_FILE = '@loaders.gl/arrow/test/data/multiline.arrow'; +const POLYGON_ARROW_FILE = '@loaders.gl/arrow/test/data/polygon.arrow'; +const MULTIPOLYGON_ARROW_FILE = '@loaders.gl/arrow/test/data/multipolygon.arrow'; + +/** Array containing all encodings */ +const GEOARROW_ENCODINGS = [ + 'geoarrow.multipolygon', + 'geoarrow.polygon', + 'geoarrow.multilinestring', + 'geoarrow.linestring', + 'geoarrow.multipoint', + 'geoarrow.point', + 'geoarrow.wkb', + 'geoarrow.wkt' +]; + +// a simple geojson contains one point +const expectedPointGeojson = { + type: 'FeatureCollection', + features: [ + { + type: 'Feature', + properties: { + id: 2, + name: 'name2' + }, + geometry: { + type: 'Point', + coordinates: [1, 1] + } + } + ] +}; + +// a simple geojson contains one linestring +const expectedLineStringGeoJson = { + type: 'FeatureCollection', + features: [ + { + type: 'Feature', + properties: { + id: 1, + name: 'name1' + }, + geometry: { + type: 'LineString', + coordinates: [ + [0, 0], + [1, 1] + ] + } + } + ] +}; + +// a simple geojson contains one polygon +const expectedPolygonGeojson = { + type: 'FeatureCollection', + features: [ + { + type: 'Feature', + properties: { + id: 1, + name: 'name1' + }, + geometry: { + type: 'Polygon', + coordinates: [ + [ + [0, 0], + [1, 1], + [2, 2], + [0, 0] + ] + ] + } + } + ] +}; + +// a simple geojson contains one MultiPoint +const expectedMultiPointGeoJson = { + type: 'FeatureCollection', + features: [ + { + type: 'Feature', + properties: { + id: 2, + name: 'name2' + }, + geometry: { + type: 'MultiPoint', + coordinates: [ + [1, 1], + [2, 2] + ] + } + } + ] +}; + +// a simple geojson contains one MultiLinestring +const expectedMultiLineStringGeoJson = { + type: 'FeatureCollection', + features: [ + { + type: 'Feature', + properties: { + id: 2, + name: 'name2' + }, + geometry: { + type: 'MultiLineString', + coordinates: [ + [ + [1, 1], + [2, 2] + ], + [ + [3, 3], + [4, 4] + ] + ] + } + } + ] +}; + +// a simple geojson contains one MultiPolygon +const expectedMultiPolygonGeojson = { + type: 'FeatureCollection', + features: [ + { + type: 'Feature', + properties: { + id: 1, + name: 'name1' + }, + geometry: { + type: 'MultiPolygon', + coordinates: [ + [ + [ + [0, 0], + [0, 1], + [1, 1], + [1, 0], + [0, 0] + ] + ], + [ + [ + [2, 2], + [2, 3], + [3, 3], + [3, 2], + [2, 2] + ] + ] + ] + } + } + ] +}; + +test('ArrowUtils#parseGeometryFromArrow', (t) => { + const testCases = [ + [POINT_ARROW_FILE, expectedPointGeojson], + [MULTIPOINT_ARROW_FILE, expectedMultiPointGeoJson], + [LINE_ARROW_FILE, expectedLineStringGeoJson], + [MULTILINE_ARROW_FILE, expectedMultiLineStringGeoJson], + [POLYGON_ARROW_FILE, expectedPolygonGeojson], + [MULTIPOLYGON_ARROW_FILE, expectedMultiPolygonGeojson] + ]; + + testCases.forEach((testCase) => { + testParseFromArrow(t, testCase[0], testCase[1]); + }); + + t.end(); +}); + +async function testParseFromArrow(t: Test, arrowFile, expectedGeojson): Promise { + // TODO: use the following code instead of apache-arrow to load arrow table + // const arrowTable = await parse(fetchFile(arrowFile), ArrowLoader, {worker: false}); + const response = await fetchFile(arrowFile); + const arrayBuffer = await response.arrayBuffer(); + const arrowTable = tableFromIPC(new Uint8Array(arrayBuffer)); + + // check if the arrow table is loaded correctly + t.equal( + arrowTable.numRows, + expectedGeojson.features.length, + `arrow table has ${expectedGeojson.features.length} row` + ); + + const colNames = [...Object.keys(expectedGeojson.features[0].properties), 'geometry']; + t.equal(arrowTable.numCols, colNames.length, `arrow table has ${colNames.length} columns`); + + // check fields exist in arrow table schema + arrowTable.schema.fields.map((field) => + t.equal(colNames.includes(field.name), true, `arrow table has ${field.name} column`) + ); + + const schema = serializeArrowSchema(arrowTable.schema); + const geometryColumns = getGeometryColumnsFromSchema(schema); + + // check 'geometry' is in geometryColumns (geometryColumns is a Map object) + t.equal(Boolean(geometryColumns.geometry), true, 'geometryColumns has geometry column'); + + // get encoding from geometryColumns['geometry'] + const encoding = geometryColumns.geometry.encoding; + + // check encoding is one of GEOARROW_ENCODINGS + t.ok( + Object.values(GEOARROW_ENCODINGS).includes(encoding!), + 'encoding is one of GEOARROW_ENCODINGS' + ); + + // get first geometry from arrow geometry column + const firstArrowGeometry = arrowTable.getChild('geometry')?.get(0); + const firstArrowGeometryObject = { + encoding, + data: firstArrowGeometry + }; + + // parse arrow geometry to geojson feature + const firstFeature = parseGeometryFromArrow(firstArrowGeometryObject); + + // check if geometry in firstFeature is equal to the original geometry in expectedPointGeojson + t.deepEqual( + firstFeature?.geometry, + expectedGeojson.features[0].geometry, + 'firstFeature.geometry is equal to expectedGeojson.features[0].geometry' + ); + + return Promise.resolve(); +} diff --git a/modules/arrow/test/index.ts b/modules/arrow/test/index.ts index 60ea1563f1..9d2ce6c457 100644 --- a/modules/arrow/test/index.ts +++ b/modules/arrow/test/index.ts @@ -1,2 +1,8 @@ +// loaders.gl, MIT license +// Copyright (c) vis.gl contributors + import './arrow-loader.spec'; import './arrow-writer.spec'; + +// import './convert-geoarrow-to-binary-geometry.spec'; +import './geoarrow/convert-geoarrow-to-geojson.spec'; diff --git a/modules/arrow/tsconfig.json b/modules/arrow/tsconfig.json index 014fcad457..003021f3af 100644 --- a/modules/arrow/tsconfig.json +++ b/modules/arrow/tsconfig.json @@ -9,6 +9,7 @@ }, "references": [ {"path": "../core"}, + {"path": "../gis"}, {"path": "../loader-utils"}, {"path": "../schema"} ] diff --git a/modules/gis/src/index.ts b/modules/gis/src/index.ts index 65ecf185ad..31b0a12309 100644 --- a/modules/gis/src/index.ts +++ b/modules/gis/src/index.ts @@ -1,6 +1,5 @@ // Types from `@loaders.gl/schema` -// Geo metadata // Geo Metadata // import {default as GEOPARQUET_METADATA_SCHEMA} from './lib/geo/geoparquet-metadata-schema.json'; // export {GEOPARQUET_METADATA_SCHEMA}; @@ -10,6 +9,9 @@ export type {GeoMetadata} from './lib/geo/geoparquet-metadata'; export {getGeoMetadata, setGeoMetadata, unpackGeoMetadata} from './lib/geo/geoparquet-metadata'; export {unpackJSONStringMetadata} from './lib/geo/geoparquet-metadata'; +export type {GeoArrowEncoding, GeoArrowMetadata} from './lib/geo/geoarrow-metadata'; +export {getGeometryColumnsFromSchema} from './lib/geo/geoarrow-metadata'; + // Table conversion export {convertWKBTableToGeoJSON} from './lib/tables/convert-table-to-geojson'; diff --git a/modules/gis/src/lib/geo/geoarrow-metadata.ts b/modules/gis/src/lib/geo/geoarrow-metadata.ts index fbb3137f5a..15046d75a1 100644 --- a/modules/gis/src/lib/geo/geoarrow-metadata.ts +++ b/modules/gis/src/lib/geo/geoarrow-metadata.ts @@ -13,6 +13,7 @@ export type GeoArrowEncoding = | 'geoarrow.wkb' | 'geoarrow.wkt'; +/** Array containing all encodings */ const GEOARROW_ENCODINGS = [ 'geoarrow.multipolygon', 'geoarrow.polygon', @@ -24,11 +25,11 @@ const GEOARROW_ENCODINGS = [ 'geoarrow.wkt' ]; -const GEOARROW_METADATA_COLUMN_ENCODING = 'ARROW:extension:name'; -const GEOARROW_METADATA_COLUMN_METADATA = 'ARROW:extension:metadata'; +const GEOARROW_COLUMN_METADATA_ENCODING = 'ARROW:extension:name'; +const GEOARROW_COLUMN_METADATA_METADATA = 'ARROW:extension:metadata'; -/** Column metadata extracted from Apache Arrow metadata */ -type GeoArrowMetadata = { +/** Geospatial metadata for one column, extracted from Apache Arrow metadata */ +export type GeoArrowMetadata = { encoding?: GeoArrowEncoding; crs?: Record; egdes?: 'spherical'; @@ -53,7 +54,7 @@ export function getGeometryMetadataForField(field: Field): GeoArrowMetadata | nu let metadata: GeoArrowMetadata | null = null; // Check for GeoArrow metadata - const columnMetadata = field.metadata?.[GEOARROW_METADATA_COLUMN_METADATA]; + const columnMetadata = field.metadata?.[GEOARROW_COLUMN_METADATA_METADATA]; if (columnMetadata) { try { metadata = JSON.parse(columnMetadata); @@ -63,7 +64,7 @@ export function getGeometryMetadataForField(field: Field): GeoArrowMetadata | nu } // Check for GeoArrow column encoding - let geoEncoding = field.metadata?.[GEOARROW_METADATA_COLUMN_ENCODING]; + let geoEncoding = field.metadata?.[GEOARROW_COLUMN_METADATA_ENCODING]; if (geoEncoding) { geoEncoding = geoEncoding.toLowerCase(); if (!GEOARROW_ENCODINGS.includes(geoEncoding)) { diff --git a/modules/gis/src/lib/geo/geoparquet-metadata.ts b/modules/gis/src/lib/geo/geoparquet-metadata.ts index e234dd49aa..6abc0547d1 100644 --- a/modules/gis/src/lib/geo/geoparquet-metadata.ts +++ b/modules/gis/src/lib/geo/geoparquet-metadata.ts @@ -4,7 +4,8 @@ import {Schema, Field} from '@loaders.gl/schema'; /* eslint-disable camelcase */ -type GeometryType = +/** A GeoParquet metadata geometry type */ +type GeoParquetGeometryType = | 'Point' | 'LineString' | 'Polygon' @@ -24,7 +25,7 @@ type GeometryType = * A geoarrow / geoparquet geo metadata object * (stored in stringified form in the top level metadata 'geo' key) * @see https://github.com/opengeospatial/geoparquet/blob/main/format-specs/geoparquet.md - * @see https://github.com/geoarrow/geoarrow/blob/main/metadata.md + * @see https://github.com/geoarrow/geoarrow * */ export type GeoMetadata = { version?: string; @@ -36,7 +37,7 @@ export type GeoMetadata = { /** A geoarrow / geoparquet geo metadata for one geometry column */ export type GeoColumnMetadata = { encoding: 'wkb' | 'wkt'; - geometry_types: GeometryType[]; + geometry_types: GeoParquetGeometryType[]; crs?: object | null; orientation?: 'counterclockwise'; bbox?: [number, number, number, number] | [number, number, number, number, number, number];