Let's first prepare our company database. In order to do so, we first need to obtain the boundaries of the administrative divisions of Poland. The official data can be downloaded from http://www.codgik.gov.pl/index.php/darmowe-dane/prg.html, but this dataset is quite large and requires some further processing. Instead, we will use a level three administrative boundaries data extract based on the official data and provided by the folks at GIS Support (http://www.gis-support.pl/downloads/gminy.zip).
We already know how to load shapefiles using different techniques, so let's take a little detour before we start reading the JSON data and load our shapefile using Node.js. The process will look like this:
- Download the data.
- Extract the files.
- Make sure our database has an appropriate schema.
- Import the shapefile data.
Once our module project has been created, we need some extra modules that will simplify our work:
npm install pg --save
npm install unzip --save
Let's code our first step:
/**
* downloads a file
*/
const download = function(url, destination){
return new Promise((resolve, reject) => {
console.log(`Downloading ${url} to ${destination}...`);
let file = fs.createWriteStream(destination);
let request = http.get(url, function(response){
response.on('data', (chunk)=>{ progressIndicator.next() });
response.pipe(file);
file.on('finish', () => {
progressIndicator.reset();
console.log("File downloaded!");
file.close();
resolve(destination);
});
}).on('error', (err)=>{
fs.unlink(destination);
reject(err.message);
});
});
}
This is a simple download function that downloads a specified file off the Internet and saves it under the specified file name.
Once we have a ZIP archive locally, we need to extract it in order to import our shapefile to the database:
/**
* unzips a specified file to the same directory
*/
const unzipFile = function(zipFile){
return new Promise((resolve, reject) => {
console.log(`Unzipping '${zipFile}'...`);
//Note: the archive is unzipped to the directory it resides in
fs.createReadStream(zipFile)
.on('data', (chunk)=>{ progressIndicator.next() })
.pipe(unzip.Extract({ path: path.dirname(zipFile) }))
//when ready return file name, so can use it to load a file to
the db...
.on('close', ()=>{
progressIndicator.reset();
console.log('Unzipped!');
resolve(zipFile.replace('zip', 'shp')); //Note: in this
case shp file name is same as the archive name!
});
});
}
At this stage, we should have our shapefile extracted and ready to be imported. However, before we initialize the import, let's make sure our schema exists. For this, we will just try to create it, if does not exist:
/**
* checks if database is ready for data import
*/
const dbCheckup = function(shp){
return new Promise((resolve, reject) => {
console.log('Checking up the database...');
let client = new pg.Client(dbCredentials);
client.connect((err) => {
if(err){
reject(err.message);
return;
}
client.query(`CREATE SCHEMA IF NOT EXISTS ${schemaName};`,
(err, result) => {
if(err){
reject(err.message);
}
else {
client.end();
console.log('Database ready!');
resolve(shp);
}
});
});
});
}
Since we got here, it looks like we are ready to import the shapefile to the database. In order to do so, we will simply call ogr2ogr and pass it the parameters we are after, as we did in the data import chapter:
/**
* loads a shapefile to a database
*/
const dbLoad = function(shp){
return new Promise((resolve, reject) => {
console.log('Loading shapefile...');
let dbc = dbCredentials;
let cmd = `ogr2ogr -f "PostgreSQL" PG:"host=${dbc.host}
port=${dbc.port} user=${dbc.user} dbname=${dbc.database}" "${shp}"
-t_srs
EPSG:2180 -nlt PROMOTE_TO_MULTI -nln ${schemaName}.${tblName}
-overwrite -lco GEOMETRY_NAME=geom`;
console.log(`Executing command: ${cmd}`);
exec(cmd, (err, stdout, stderr) => {
if(err){
reject(err.message);
return;
}
console.log(stdout || stderr);
resolve();
});
});
}
Also, you may have noticed the usage of the -nlt PROMOTE_TO_MULTI parameter. Our shapefile contains MultiPolygons, and since ogr2ogr assumes a polygon geometry type by default when importing areas, it is needed in order to avoid import errors for MultiPolygons.
At this stage, all the data should be present in the table, so a final step will be checking up on the number of imported records:
/**
* counts imported records
*/
const dbLoadTest = function(){
return new Promise((resolve, reject) => {
console.log('Verifying import...');
let client = new pg.Client(dbCredentials);
client.connect((err) => {
if(err){
reject(err.message);
return;
}
client.query(`SELECT Count(*) as rec_count FROM
${schemaName}.${tblName};`, (err, result) => {
if(err){
reject(err.message);
}
else {
client.end();
console.log(`Imported ${result.rows[0].rec_count}
records!`);
resolve();
}
});
});
});
}
In order to execute the code we were patiently putting together, we will need to add the following to our script:
//chain all the stuff together
download(downloadUrl, path.join(downloadDir, fileName))
.then(unzipFile)
.then(dbCheckup)
.then(dbLoad)
.then(dbLoadTest)
.catch(err => console.log(`uups, an error has occured: ${err}`));
When you execute our script, the console output should be similar to the following:
Downloading http://www.gis-support.pl/downloads/gminy.zip to F:\mastering_postgis\chapter07\gminy.zip...
File downloaded!
Unzipping 'F:\mastering_postgis\chapter07\gminy.zip'...
Unzipped!
Checking up the database...
Database ready!
Loading shapefile...
Executing command: ogr2ogr -f "PostgreSQL" PG:"host=localhost port=5434 user=postgres dbname=mastering_postgis" "F:\mastering_postgis\chapter07\gminy.shp" -t_srs EPSG:2180 -nlt PROMOTE_TO_M
ULTI -nln weather_alerts.gminy -overwrite -geomfield geom
Verifying import...
Imported 2481 records!
It is not hard to imagine that our hypothetical data provider delivers some specialized data that we need to process in order to deliver value to our customers. If such a procedure happens once a month, perhaps preparing the data manually will not be a problem. If it needs to be executed more often, say once a day or every hour, the benefits of using a script and a scheduled task that executes it at a specified interval (or on demand) become immediately clear.