chore: improve postal data import observability
Add progress logging and a status script for postal imports and neighbor builds, and ignore local raw and generated postal datasets.
This commit is contained in:
@@ -1,7 +1,8 @@
|
||||
import path from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
import { getDbPool } from '../../server/src/db/pool.js';
|
||||
import { getFeatureGeometry, getStringProperty, normalizePostalCode, readFeatureCollection, type PostalDatasetConfig } from './postal-import-utils.js';
|
||||
import { createScriptLogger } from './postal-logging.js';
|
||||
import { assertGeometryIsWgs84, getFeatureGeometry, getStringProperty, normalizePostalCode, streamFeatureCollection, type PostalDatasetConfig } from './postal-import-utils.js';
|
||||
|
||||
const currentDir = path.dirname(fileURLToPath(import.meta.url));
|
||||
const datasetsRoot = path.resolve(currentDir, '../datasets/postal');
|
||||
@@ -23,33 +24,46 @@ const datasetConfigs: PostalDatasetConfig[] = [
|
||||
},
|
||||
];
|
||||
|
||||
const logEvery = Math.max(1, Number.parseInt(process.env.POSTAL_IMPORT_LOG_EVERY ?? '500', 10) || 500);
|
||||
|
||||
async function importDataset(config: PostalDatasetConfig) {
|
||||
const pool = getDbPool();
|
||||
const client = await pool.connect();
|
||||
const logger = createScriptLogger(`postal-import:${config.countryCode.toLowerCase()}`);
|
||||
let index = 0;
|
||||
|
||||
try {
|
||||
const collection = await readFeatureCollection(config.filePath);
|
||||
let insertedCount = 0;
|
||||
let skippedCount = 0;
|
||||
|
||||
logger.info(`Starting ${config.label} import from ${config.filePath}`);
|
||||
|
||||
await client.query('begin');
|
||||
|
||||
for (const [index, feature] of collection.features.entries()) {
|
||||
for await (const feature of streamFeatureCollection(config.filePath)) {
|
||||
index += 1;
|
||||
const rawPostalCode = getStringProperty(feature.properties, config.postalCodeKeys);
|
||||
|
||||
if (!rawPostalCode) {
|
||||
skippedCount += 1;
|
||||
if (index % logEvery === 0) {
|
||||
logger.info(`Processed ${index} features, inserted ${insertedCount}, skipped ${skippedCount}`);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
const normalizedPostalCode = normalizePostalCode(config.countryCode, rawPostalCode);
|
||||
if (!normalizedPostalCode) {
|
||||
skippedCount += 1;
|
||||
if (index % logEvery === 0) {
|
||||
logger.info(`Processed ${index} features, inserted ${insertedCount}, skipped ${skippedCount}`);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
const displayName = getStringProperty(feature.properties, config.displayNameKeys) || normalizedPostalCode;
|
||||
const geometry = getFeatureGeometry(feature, config.filePath, index);
|
||||
assertGeometryIsWgs84(geometry, config.filePath, index);
|
||||
|
||||
await client.query(
|
||||
`
|
||||
@@ -72,7 +86,7 @@ async function importDataset(config: PostalDatasetConfig) {
|
||||
$4,
|
||||
ST_Multi(ST_SetSRID(ST_GeomFromGeoJSON($5), 4326)),
|
||||
ST_Centroid(ST_SetSRID(ST_GeomFromGeoJSON($5), 4326))::geography,
|
||||
greatest(1000, round(sqrt(ST_Area(ST_SetSRID(ST_GeomFromGeoJSON($5), 4326)::geography) / pi()))::integer),
|
||||
greatest(1000, round(sqrt(ST_Area(ST_Transform(ST_SetSRID(ST_GeomFromGeoJSON($5), 4326), 3857)) / pi()))::integer),
|
||||
$6::jsonb,
|
||||
now(),
|
||||
now()
|
||||
@@ -98,12 +112,17 @@ async function importDataset(config: PostalDatasetConfig) {
|
||||
);
|
||||
|
||||
insertedCount += 1;
|
||||
|
||||
if (index % logEvery === 0) {
|
||||
logger.info(`Processed ${index} features, inserted ${insertedCount}, skipped ${skippedCount}`);
|
||||
}
|
||||
}
|
||||
|
||||
await client.query('commit');
|
||||
console.log(`Imported ${insertedCount} ${config.label} areas from ${config.filePath}. Skipped ${skippedCount}.`);
|
||||
logger.info(`Completed ${config.label} import. Inserted ${insertedCount} areas, skipped ${skippedCount}.`);
|
||||
} catch (error) {
|
||||
await client.query('rollback');
|
||||
logger.error(`Import failed after processing ${index} features.`);
|
||||
throw error;
|
||||
} finally {
|
||||
client.release();
|
||||
@@ -111,10 +130,13 @@ async function importDataset(config: PostalDatasetConfig) {
|
||||
}
|
||||
|
||||
async function run() {
|
||||
const logger = createScriptLogger('postal-import');
|
||||
|
||||
for (const config of datasetConfigs) {
|
||||
await importDataset(config);
|
||||
}
|
||||
|
||||
logger.info('All postal datasets imported successfully.');
|
||||
await getDbPool().end();
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user