Public Access
1
0

chore: improve postal data import observability

Add progress logging and a status script for postal imports and neighbor builds, and ignore local raw and generated postal datasets.
This commit is contained in:
pguerrerox
2026-04-12 23:22:36 +00:00
parent cc00a439bf
commit dc7686f507
11 changed files with 267 additions and 23 deletions
+27 -5
View File
@@ -1,7 +1,8 @@
import path from 'node:path';
import { fileURLToPath } from 'node:url';
import { getDbPool } from '../../server/src/db/pool.js';
import { getFeatureGeometry, getStringProperty, normalizePostalCode, readFeatureCollection, type PostalDatasetConfig } from './postal-import-utils.js';
import { createScriptLogger } from './postal-logging.js';
import { assertGeometryIsWgs84, getFeatureGeometry, getStringProperty, normalizePostalCode, streamFeatureCollection, type PostalDatasetConfig } from './postal-import-utils.js';
const currentDir = path.dirname(fileURLToPath(import.meta.url));
const datasetsRoot = path.resolve(currentDir, '../datasets/postal');
@@ -23,33 +24,46 @@ const datasetConfigs: PostalDatasetConfig[] = [
},
];
const logEvery = Math.max(1, Number.parseInt(process.env.POSTAL_IMPORT_LOG_EVERY ?? '500', 10) || 500);
async function importDataset(config: PostalDatasetConfig) {
const pool = getDbPool();
const client = await pool.connect();
const logger = createScriptLogger(`postal-import:${config.countryCode.toLowerCase()}`);
let index = 0;
try {
const collection = await readFeatureCollection(config.filePath);
let insertedCount = 0;
let skippedCount = 0;
logger.info(`Starting ${config.label} import from ${config.filePath}`);
await client.query('begin');
for (const [index, feature] of collection.features.entries()) {
for await (const feature of streamFeatureCollection(config.filePath)) {
index += 1;
const rawPostalCode = getStringProperty(feature.properties, config.postalCodeKeys);
if (!rawPostalCode) {
skippedCount += 1;
if (index % logEvery === 0) {
logger.info(`Processed ${index} features, inserted ${insertedCount}, skipped ${skippedCount}`);
}
continue;
}
const normalizedPostalCode = normalizePostalCode(config.countryCode, rawPostalCode);
if (!normalizedPostalCode) {
skippedCount += 1;
if (index % logEvery === 0) {
logger.info(`Processed ${index} features, inserted ${insertedCount}, skipped ${skippedCount}`);
}
continue;
}
const displayName = getStringProperty(feature.properties, config.displayNameKeys) || normalizedPostalCode;
const geometry = getFeatureGeometry(feature, config.filePath, index);
assertGeometryIsWgs84(geometry, config.filePath, index);
await client.query(
`
@@ -72,7 +86,7 @@ async function importDataset(config: PostalDatasetConfig) {
$4,
ST_Multi(ST_SetSRID(ST_GeomFromGeoJSON($5), 4326)),
ST_Centroid(ST_SetSRID(ST_GeomFromGeoJSON($5), 4326))::geography,
greatest(1000, round(sqrt(ST_Area(ST_SetSRID(ST_GeomFromGeoJSON($5), 4326)::geography) / pi()))::integer),
greatest(1000, round(sqrt(ST_Area(ST_Transform(ST_SetSRID(ST_GeomFromGeoJSON($5), 4326), 3857)) / pi()))::integer),
$6::jsonb,
now(),
now()
@@ -98,12 +112,17 @@ async function importDataset(config: PostalDatasetConfig) {
);
insertedCount += 1;
if (index % logEvery === 0) {
logger.info(`Processed ${index} features, inserted ${insertedCount}, skipped ${skippedCount}`);
}
}
await client.query('commit');
console.log(`Imported ${insertedCount} ${config.label} areas from ${config.filePath}. Skipped ${skippedCount}.`);
logger.info(`Completed ${config.label} import. Inserted ${insertedCount} areas, skipped ${skippedCount}.`);
} catch (error) {
await client.query('rollback');
logger.error(`Import failed after processing ${index} features.`);
throw error;
} finally {
client.release();
@@ -111,10 +130,13 @@ async function importDataset(config: PostalDatasetConfig) {
}
async function run() {
const logger = createScriptLogger('postal-import');
for (const config of datasetConfigs) {
await importDataset(config);
}
logger.info('All postal datasets imported successfully.');
await getDbPool().end();
}