question

JasonBian-5628 avatar image
0 Votes"
JasonBian-5628 asked PRADEEPCHEEKATLA-MSFT commented

Is there a way to extract purview schema classifications using existing packages?

Hi, I'm trying get extract the schema classifications from Purview GUI for a single purview table:

194883-image.png



Below is my attempts at using the pyapacheatlas library to do this. Can't seem to find any information on classifications within the return json.

import os
from pyapacheatlas.auth import ServicePrincipalAuthentication
from pyapacheatlas.core.client import PurviewClient
from pyapacheatlas.core.typedef import TypeCategory
import json



client = PurviewClient(
account_name=os.environ.get("PURVIEW_NAME", "ltg-wus-analytics-pa"),
authentication=oauth
)

results = client.get_entity(guid="e75b2cf4-14b5-4a65-a67d-e7f46670e943")
print(json.dumps(results["entities"][0], indent=2))



{
"typeName": "azure_datalake_gen2_resource_set",
"attributes": {
"owner": null,
"modifiedTime": 0,
"replicatedTo": null,
"userDescription": null,
"replicatedFrom": null,
"qualifiedName": "https://ltguatwusanalyticsdl.dfs.core.windows.net/[Redacted]





/{SparkPartitions}",
"displayName": null,
"aggregatedProperties": null,
"partitionTypes": null,
"description": null,
"totalSizeBytes": 0,
"partitionCount": 0,
"schemaCount": 0,
"name": "SalesTable"
},
"lastModifiedTS": "1",
"guid": "e75b2cf4-14b5-4a65-a67d-e7f46670e943",
"isIncomplete": false,
"status": "ACTIVE",
"createdBy": "ServiceAdmin",
"updatedBy": "ServiceAdmin",
"createTime": 1648662851647,
"updateTime": 1648662851647,
"version": 0,
"source": "DataScan",
"collectionId": "lxuxnr",
"relationshipAttributes": {
"schema": [],
"inputToProcesses": [],
"attachedSchema": [],
"meanings": [],
"outputFromProcesses": [],
"tabular_schema": {
"guid": "b527e03c-294c-4524-9735-0cf6f6f60000",
"typeName": "tabular_schema",
"entityStatus": "ACTIVE",
"displayText": "tabular_schema",
"relationshipType": "tabular_schema_datasets",
"relationshipGuid": "959bc2d2-5cd9-4517-98f2-80950639be94",
"relationshipStatus": "ACTIVE",
"relationshipAttributes": {
"typeName": "tabular_schema_datasets"
}
}
},
"labels": []
}





typedefs = client.get_typedef(TypeCategory.ENTITY, name="tabular_schema")
print(json.dumps(typedefs, indent=2))





{
"category": "ENTITY",
"guid": "3e1dfdb9-2971-2165-7863-faf0bdb9ea61",
"createdBy": "admin",
"updatedBy": "admin",
"createTime": 1648014315199,
"updateTime": 1648014315199,
"version": 1,
"name": "tabular_schema",
"description": "tabular_schema",
"typeVersion": "1.0",
"options": {
"schemaElementsAttribute": "columns"
},
"lastModifiedTS": "1",
"attributeDefs": [
{
"name": "format",
"typeName": "string",
"isOptional": true,
"cardinality": "SINGLE",
"valuesMinCount": 0,
"valuesMaxCount": 1,
"isUnique": false,
"isIndexable": false,
"includeInNotification": false
}
],
"superTypes": [
"DataSet"
],
"subTypes": [],
"relationshipAttributeDefs": [
{
"name": "schema",
"typeName": "array<avro_schema>",
"isOptional": true,
"cardinality": "SET",
"valuesMinCount": -1,
"valuesMaxCount": -1,
"isUnique": false,
"isIndexable": false,
"includeInNotification": false,
"relationshipTypeName": "avro_schema_associatedEntities",
"isLegacyAttribute": false
},
{
"name": "inputToProcesses",
"typeName": "array<Process>",
"isOptional": true,
"cardinality": "SET",
"valuesMinCount": -1,
"valuesMaxCount": -1,
"isUnique": false,
"isIndexable": false,
"includeInNotification": false,
"relationshipTypeName": "dataset_process_inputs",
"isLegacyAttribute": false
},
{
"name": "pipeline",
"typeName": "spark_ml_pipeline",
"isOptional": true,
"cardinality": "SINGLE",
"valuesMinCount": -1,
"valuesMaxCount": -1,
"isUnique": false,
"isIndexable": false,
"includeInNotification": false,
"relationshipTypeName": "spark_ml_pipeline_dataset",
"isLegacyAttribute": false
},
{
"name": "associatedDataSets",
"typeName": "array<DataSet>",
"isOptional": true,
"cardinality": "SET",
"valuesMinCount": -1,
"valuesMaxCount": -1,
"isUnique": false,
"isIndexable": false,
"includeInNotification": false,
"relationshipTypeName": "tabular_schema_datasets",
"isLegacyAttribute": false
},
{
"name": "columns",
"typeName": "array<column>",
"isOptional": true,
"cardinality": "SET",
"valuesMinCount": -1,
"valuesMaxCount": -1,
"isUnique": false,
"isIndexable": false,
"includeInNotification": false,
"constraints": [
{
"type": "ownedRef"
}
],
"relationshipTypeName": "tabular_schema_columns",
"isLegacyAttribute": false
},
{
"name": "model",
"typeName": "spark_ml_model",
"isOptional": true,
"cardinality": "SINGLE",
"valuesMinCount": -1,
"valuesMaxCount": -1,
"isUnique": false,
"isIndexable": false,
"includeInNotification": false,
"relationshipTypeName": "spark_ml_model_dataset",
"isLegacyAttribute": false
},
{
"name": "attachedSchema",
"typeName": "array<schema>",
"isOptional": true,
"cardinality": "SET",
"valuesMinCount": -1,
"valuesMaxCount": -1,
"isUnique": false,
"isIndexable": false,
"includeInNotification": false,
"relationshipTypeName": "dataset_attached_schemas",
"isLegacyAttribute": false
},
{
"name": "meanings",
"typeName": "array<AtlasGlossaryTerm>",
"isOptional": true,
"cardinality": "SET",
"valuesMinCount": -1,
"valuesMaxCount": -1,
"isUnique": false,
"isIndexable": false,
"includeInNotification": false,
"relationshipTypeName": "AtlasGlossarySemanticAssignment",
"isLegacyAttribute": false
},
{
"name": "outputFromProcesses",
"typeName": "array<Process>",
"isOptional": true,
"cardinality": "SET",
"valuesMinCount": -1,
"valuesMaxCount": -1,
"isUnique": false,
"isIndexable": false,
"includeInNotification": false,
"relationshipTypeName": "process_dataset_outputs",
"isLegacyAttribute": false
},
{
"name": "tabular_schema",
"typeName": "tabular_schema",
"isOptional": true,
"cardinality": "SINGLE",
"valuesMinCount": -1,
"valuesMaxCount": -1,
"isUnique": false,
"isIndexable": false,
"includeInNotification": false,
"relationshipTypeName": "tabular_schema_datasets",
"isLegacyAttribute": false
}
],
"businessAttributeDefs": {}
}





azure-purview
image.png (61.1 KiB)
· 2
5 |1600 characters needed characters left characters exceeded

Up to 10 attachments (including images) can be used with a maximum of 3.0 MiB each and 30.0 MiB total.

Hello @JasonBian-5628,

Thank you for the question and using MS Q&A platform.

We are reaching out to the internal team to get more details on this ask. I will be update you once I hear back from the team.

0 Votes 0 ·

Hello @JasonBian-5628,

Following up to see if the below suggestion was helpful. And, if you have any further query do let us know.


  • Please don't forget to click on 130616-image.png or upvote 130671-image.png button whenever the information provided helps you.

0 Votes 0 ·

1 Answer

PRADEEPCHEEKATLA-MSFT avatar image
1 Vote"
PRADEEPCHEEKATLA-MSFT answered PRADEEPCHEEKATLA-MSFT commented

Hello @JasonBian-5628,

Thanks for the question and using MS Q&A platform.

This can be achieved by cycling through the referredEntities.

Example: Get the entity information for an Azure SQL Table by using the command - pv entity read --guid "AZURE_SQL_TABLE_GUID"

Within the JSON response, there are referredEntities, which includes the Azure SQL Columns and their classifications (if applicable).

195386-image.png

For more details, refer Azure Purview CLI.

Hope this will help. Please let us know if any further queries.


  • Please don't forget to click on 130616-image.png or upvote 130671-image.png button whenever the information provided helps you. Original posters help the community find answers faster by identifying the correct answer. Here is how

  • Want a reminder to come back and check responses? Here is how to subscribe to a notification

  • If you are interested in joining the VM program and help shape the future of Q&A: Here is how you can be part of Q&A Volunteer Moderators


image.png (35.9 KiB)
· 8
5 |1600 characters needed characters left characters exceeded

Up to 10 attachments (including images) can be used with a maximum of 3.0 MiB each and 30.0 MiB total.

Hello @JasonBian-5628,

Just checking in to see if the above answer helped. If this answers your query, do click Accept Answer and Up-Vote for the same. And, if you have any further query do let us know.

1 Vote 1 ·
JasonBian-5628 avatar image JasonBian-5628 PRADEEPCHEEKATLA-MSFT ·

Hey, this was able to solve my problem. Thanks for the quick replies

1 Vote 1 ·

Hello @JasonBian-5628,

Glad to know that your issue has resolved. You can accept it as answer(200053-image.png). This can be beneficial to other community members. Thank you.


0 Votes 0 ·
image.png (2.9 KiB)

Thanks, this solved my issue and thank you for the quick replies.

1 Vote 1 ·

Hello @JasonBian-5628,

Glad to know that your issue has been resolved.

0 Votes 0 ·

Ah, this is a good tool. I'm currently using this for a delta table and I don't see anything in the referredEntities.

195444-image.png



This is what my full path looks like. Wondering if delta tables in adls has classifications nested differently?


Fully qualified name https://[redacted]/ltg/Query/CurrentState/DynamicsExport/SalesTable/{SparkPartitions}

URL: https://web.purview.azure.com/resource/ltg-wus-analytics-pa/main/catalog/entity?guid=e75b2cf4-14b5-4a65-a67d-e7f46670e943&section=details&feature.tenant=217183f8-60f1-4066-9515-6e34cdfdf4e4



0 Votes 0 ·
image.png (53.1 KiB)

Hello @JasonBian-5628,

Working example for ADLS resource set using purview cli with PowerShell:

196084-image.png

Code Sample:

 $env:PURVIEW_NAME = "YOUR_PURVIEW_ACCOUNT_NAME"
 $guid = "YOUR_ADLS_GEN2_RESOURCE_SET_GUID"
 $adls_resource_set = pv entity read --guid $guid | ConvertFrom-Json
 $tabular_schema_guid = $adls_resource_set.entity.relationshipAttributes.tabular_schema.guid
 $tabular_schema = pv entity read --guid $tabular_schema_guid | ConvertFrom-Json
 ForEach ($x in $tabular_schema.referredEntities.PSObject.Properties) {$x.Value.classifications.typeName}

0 Votes 0 ·
image.png (158.7 KiB)
JasonBian-5628 avatar image JasonBian-5628 PRADEEPCHEEKATLA-MSFT ·

Hey Pradeep:

Is there a special method for spark partitions? We are currently using delta tables.

https://[redacted]/ltg/Query/CurrentState/DynamicsExport/SalesTable/{SparkPartitions}

I'm using the above method and while I see classifications in the purview gui, there is nothing in the json return under the referredEntities key:

197364-image.png


0 Votes 0 ·
image.png (78.0 KiB)