Using the Bing API to geocode data in SSIS
During my talk at SQLRally Nordic Bringing maps to SQL Server I showed how to geocode data in SSIS.
The way I did this was to create a custom component in SSIS. In this post I will show how this was done.
The first thing that I did was to create a new project in Visual Studio 2012. The type of the project is a Visual C# Class Library project.
In order to program a SSIS component I first need to add some using statements:
using Microsoft.SqlServer.Dts.Pipeline;
using Microsoft.SqlServer.Dts.Runtime.Wrapper;
using Microsoft.SqlServer.Dts.Pipeline.Wrapper;
I also need to add these as references in the project
This sets me up for coding custom tasks in SSIS.
In order to call the Bing API I need to add a Service Reference to the GeoCodeService that I will use in my project. I do this by right-click on the Service References and choose to Add Service Reference
In the address field I add the following URL
https://dev.virtualearth.net/webservices/v1/geocodeservice/geocodeservice.svc?wsdl
I add the Namespace called GeocodeService.
Now I can start adding the code that I will use in my Geocode task.
namespace GeoCoderTask
{
[DtsPipelineComponent(DisplayName = "GeoCoder Task", ComponentType = ComponentType.Transform)]
public class GeocoderTask : PipelineComponent
{
private int inputColumnBufferIndex = -1;
private int Latitude_OutBufferIndex = -1;
private int Longitude_OutBufferIndex = -1;
string key = "<Add your bing maps key>";
private GeocodeService.GeocodeRequest geocodeRequest = null;
private GeocodeService.ConfidenceFilter[] filters = null;
private GeocodeService.GeocodeOptions geocodeOptions = null;
private GeocodeService.GeocodeServiceClient geocodeService = null;
public override void PostExecute()
{
base.PostExecute();
geocodeRequest = null;
filters = null;
geocodeOptions = null;
geocodeService = null;
}
First in the task I specify what type of SSIS component that I would like to create in my case the ComponentType.Transform since I want to create transformation component.
You also need to add your Bing maps key that will be used in your component. You can get your key here https://www.bingmapsportal.com/
Add the key to the variable called key.
Next thing is to start working with the ProvideComponentProperties method. In this method I setup the columns that will be used as inputs and outputs in the component. In this case I specify that I will take one string column as input and output two
public override void ProvideComponentProperties()
{
base.ProvideComponentProperties();
base.RemoveAllInputsOutputsAndCustomProperties();
IDTSInput100 input = this.ComponentMetaData.InputCollection.New();
input.Name = "Input";
IDTSOutput100 output = this.ComponentMetaData.OutputCollection.New();
output.Name = "Output";
output.SynchronousInputID = input.ID;
IDTSOutputColumn100 Latitude_Out = output.OutputColumnCollection.New();
Latitude_Out.Name = "Latitude_Out";
Latitude_Out.SetDataTypeProperties(
Microsoft.SqlServer.Dts.Runtime.Wrapper.DataType.DT_R8, 0, 0, 0, 0);
IDTSOutputColumn100 Longitude_Out = output.OutputColumnCollection.New();
Longitude_Out.Name = "Longitude_Out";
Longitude_Out.SetDataTypeProperties(
Microsoft.SqlServer.Dts.Runtime.Wrapper.DataType.DT_R8, 0, 0, 0, 0);
}
When this have been done I can start adding some logic to the PreExecute method that will be executed before any rows are passed through the component.
public override void PreExecute()
{
base.PreExecute();
//Setup the columns used
IDTSInput100 input = ComponentMetaData.InputCollection[0];
inputColumnBufferIndex = BufferManager.FindColumnByLineageID(input.Buffer,
input.InputColumnCollection[0].LineageID);
IDTSOutput100 output = ComponentMetaData.OutputCollection[0];
Latitude_OutBufferIndex = BufferManager.FindColumnByLineageID(input.Buffer,
output.OutputColumnCollection[0].LineageID);
Longitude_OutBufferIndex = BufferManager.FindColumnByLineageID(input.Buffer,
output.OutputColumnCollection[1].LineageID);
//Initialize the GeocodeRequest
geocodeRequest = new GeocodeService.GeocodeRequest();
geocodeRequest.Credentials = new GeocodeService.Credentials();
geocodeRequest.Credentials.ApplicationId = key;
//Initialize the confidencefilter
filters = new GeocodeService.ConfidenceFilter[1];
filters[0] = new GeocodeService.ConfidenceFilter();
filters[0].MinimumConfidence = GeocodeService.Confidence.High;
//Initialize the geocodeoptions
geocodeOptions = new GeocodeService.GeocodeOptions();
geocodeOptions.Filters = filters;
geocodeRequest.Options = geocodeOptions;
//Initialize the Geocodeserviceclient
geocodeService = new GeocodeService.GeocodeServiceClient("BasicHttpBinding_IGeocodeService");
}
In this method I have logic for binding the output and input of the component to the correct columns as well as setting up the connection to the Bing API.
The last thing to do is to add the code to the ProcessInput that is the code that runs for every row that flows through the component.
public override void ProcessInput(int inputID, PipelineBuffer buffer)
{
//base.ProcessInput(inputID, buffer);
if (buffer.EndOfRowset == false)
{
try
{
//meat of the implementation
while (buffer.NextRow())
{
//Check for null values
if (buffer.IsNull(inputColumnBufferIndex))
{
buffer.SetNull(Latitude_OutBufferIndex);
buffer.SetNull(Longitude_OutBufferIndex);
}
else
{
//buffer.SetDouble(Latitude_OutBufferIndex, 200.00);
//buffer.SetDouble(Longitude_OutBufferIndex, 300.00);
//Save the input adress
string inputadress = buffer.GetString(inputColumnBufferIndex);
// Set the full address query
geocodeRequest.Query = inputadress;
// Make the query
GeocodeService.GeocodeResponse geocodeResponse = geocodeService.Geocode(geocodeRequest);
double Latitude = geocodeResponse.Results[0].Locations[0].Latitude;
double Longitude = geocodeResponse.Results[0].Locations[0].Longitude;
buffer.SetDouble(Latitude_OutBufferIndex, Latitude);
buffer.SetDouble(Longitude_OutBufferIndex, Longitude);
}
}
}
catch (System.Exception ex)
{
bool cancel = false;
ComponentMetaData.FireError(0, ComponentMetaData.Name, ex.Message, string.Empty, 0, out cancel);
throw new Exception("Could not process input buffer");
}
}
}
This is the method that actually calls the Bing web service that will perform the geocoding.
Now that you have finished writing your component you need to compile it. I created a 32-bit component. This needs then to be copied to C:\Program Files (x86)\Microsoft SQL Server\110\DTS\PipelineComponents. In order to run it you also need to add some information to the C:\Program Files (x86)\Microsoft SQL Server\110\DTS\Binn\DtsDebugHost.exe.CONFIG and C:\Program Files (x86)\Microsoft SQL Server\110\DTS\Binn\DTExec.exe.config. The reason for this is that these are the exe files that will execute the package and in order for them to call the web service you need it to find the service this done through adding information to the system.servicemodel section in the config file.
<system.serviceModel>
<bindings>
<basicHttpBinding>
<binding name="BasicHttpBinding_IGeocodeService" />
</basicHttpBinding>
<customBinding>
<binding name="CustomBinding_IGeocodeService">
<binaryMessageEncoding />
<httpTransport />
</binding>
</customBinding>
</bindings>
<client>
<endpoint address="https://dev.virtualearth.net/webservices/v1/geocodeservice/GeocodeService.svc"
binding="basicHttpBinding" bindingConfiguration="BasicHttpBinding_IGeocodeService"
contract="GeocodeService.IGeocodeService" name="BasicHttpBinding_IGeocodeService" />
<endpoint address="https://dev.virtualearth.net/webservices/v1/geocodeservice/GeocodeService.svc/binaryHttp"
binding="customBinding" bindingConfiguration="CustomBinding_IGeocodeService"
contract="GeocodeService.IGeocodeService" name="CustomBinding_IGeocodeService" />
</client>
</system.serviceModel>
Now you should be able to create your own component that can be used to geocode information from your databases. I have also added the code to this post so that you can have a look at the entire project, it contains some more code mainly to handle errors.
Comments
Anonymous
October 30, 2012
The comment has been removedAnonymous
October 31, 2012
You should not need to add the control as a .net toolbox item. Just copy the file that you have compiled to C:Program Files (x86)Microsoft SQL Server110DTSPipelineComponents and then from SQL Data Tools right click on the SSIS toolbox and choose to do a Refresh toolboxAnonymous
April 23, 2013
The comment has been removedAnonymous
April 23, 2013
KP what you need to do is to remove the files from C:Program Files (x86)Microsoft SQL Server110DTSPipelineComponents and restart SQL Data Tools, then you can add it again and then create a new package. The following post explains what is happening social.msdn.microsoft.com/.../E02EF3B0-7A3B-4A02-966C-AEB74D8D1A5A