Databricks Error in SQL statement: ParseException: mismatched input 'Service_Date

AzeemK 516 Reputation points
2021-08-10T19:36:24.837+00:00

I am running this script in Azure Databricks using spark SQL , getting this error below , I am new to Databricks so wondering any tips on troubleshooting this

Error in SQL statement: ParseException:
mismatched input 'Service_Date' expecting {'(', 'DESC', 'DESCRIBE', 'FROM', 'MAP', 'REDUCE', 'SELECT', 'TABLE', 'VALUES', 'WITH'}(line 16, pos 0)

CREATE OR REPLACE VIEW operations_staging.v_claims AS (

/*
WITH Snapshot_Date AS
(
SELECT T1.claim_number,
T1.source_system,
MAX(T1.snapshot_date) snapshot_date
FROM bhc_claim.medical_claim T1
GROUP BY T1.claim_number,
T1.source_system
),
*/

Service_Date AS
(
SELECT T1.claim_number,
T1.source_system,
MIN(T1.service_from_date) claim_service_date
FROM bhc_claim.medical_claim_detail T1
GROUP BY T1.claim_number,
T1.source_system
),

Pend_Step1 AS
(
SELECT T1.claim_num Claim_Number,
T1.tax_id,
T1.provider provider_name,
TO_DATE(T1.incurred,"MM/dd/yyyy") Service_Date,
TO_DATE(T1.received,"MM/dd/yyyy") Received_Date,
TO_DATE(T1.report_date,"MM/dd/yyyy") Report_Date,
T1.pending_amount Pend_Amount,
T1.pend_code Pend_Code,
T1.pend_code_description Pend_Code_Desc,
T1.hold_reason_code Hold_Code,
T1.hold_code_description Hold_Code_Desc
FROM loomis_2021.pu T1 -- 277,011
GROUP BY T1.claim_num,
T1.tax_id,
T1.provider,
T1.incurred,
T1.received,
T1.report_date
T1.pending_amount,
T1.pend_code,
T1.pend_code_description,
T1.hold_reason_code,
T1.hold_code_description
),

Pend_Step2 AS
(
SELECT Claim_Number,
concat_ws(",", collect_set(DISTINCT T1.Hold_Code)) Hold_Code,
concat_ws(",", collect_set(DISTINCT T1.Pend_Code)) Pend_Code
FROM Pend_Step1 T1
GROUP BY Claim_Number
),

Pend_Step3 AS
(
SELECT T1.Claim_Number,
T1.tax_id,
T1.provider_name,
T1.Service_Date,
T1.Received_Date,
T1.Report_Date,
T2.Hold_Code,
T2.Pend_Code,
T1.Pend_Amount
FROM Pend_Step1 T1
LEFT JOIN Pend_Step2 T2
ON T1.Claim_Number = T2.Claim_Number
GROUP BY T1.Claim_Number,
T1.tax_id,
T1.provider_name,
T1.Service_Date,
T1.Received_Date,
T1.Report_Date,
T2.Hold_Code,
T2.Pend_Code,
T1.Pend_Amount
),

Pend_Step4 AS
(
SELECT T1.Claim_Number,
T1.tax_id,
T1.provider_name,
T1.Service_Date,
T1.Received_Date,
T1.Hold_Code,
T1.Pend_Code,
SUM(T1.Pend_Amount) Pend_Amount
FROM Pend_Step3 T1 -- 277,011
GROUP BY T1.Claim_Number,
T1.tax_id,
T1.provider_name,
T1.Service_Date,
T1.Received_Date,
T1.Hold_Code,
T1.Pend_Code
),

Paid_Previous_Step1 AS
(
SELECT MAX(claim_received_date) Max_Received_Date
FROM bhc_claim.medical_claim
WHERE DAYOFWEEK(claim_received_date) = 1
AND claim_received_date < NOW()
),

Paid_Previous_Step2 AS
(
SELECT T1.claim_# Claim_Number,
T1.tax_id Tax_ID,
CASE WHEN T1.provider_group_name IS NOT NULL THEN T1.provider_group_name
ELSE CONCAT(T1.provider_first_name,T1.provider_last_name)
END provider_name,
TO_DATE(T1.last_refresh_date,"yyyyMMdd") Refresh_Date,
TO_DATE(T1.received_date,"yyyyMMdd") Received_Date,
TO_DATE(T1.processed_date,"yyyyMMdd") Processed_Date,
MIN(TO_DATE(T1.from_dos,"yyyyMMdd")) Service_Date,
'Issued' Status,
SUM(T1.covered) Paid_Amount,
SUM(T1.billed) Billed_Amount
FROM Loomis_2021.paid_previous T1
CROSS JOIN Paid_Previous_Step1 T2
WHERE TO_DATE(T1.last_refresh_date,"yyyyMMdd") >= T2.Max_Received_Date
GROUP BY T1.claim_#,
T1.tax_id,
CASE WHEN T1.provider_group_name IS NOT NULL THEN T1.provider_group_name
ELSE CONCAT(T1.provider_first_name,T1.provider_last_name)
END,
T1.last_refresh_date,
T1.received_date,
T1.processed_date
),

Paid_Previous_Step3 AS
(
SELECT T1.Claim_Number,
MAX(T1.Refresh_Date) Refresh_Date
FROM Paid_Previous_Step2 T1
GROUP BY T1.Claim_Number
),

Paid_Previous_Step4 AS
(
SELECT T1.Claim_Number,
T1.Tax_ID,
T1.provider_name,
T1.Refresh_Date,
T1.Received_Date,
T1.Processed_Date,
T1.Service_Date,
T1.Status,
T1.Paid_Amount,
T1.Billed_Amount
FROM Paid_Previous_Step2 T1 -- 60,746
INNER JOIN Paid_Previous_Step3 T2 -- 60,746
ON T1.Claim_Number = T2.Claim_Number --3,816,359
AND T1.Refresh_Date = T2.Refresh_Date
),

Claim_Detail_Step1 AS
(
SELECT T1.claim_number,
T1.source_system,
MAX(T1.snapshot_date) snapshot_date
FROM bhc_claim.medical_claim_detail T1 -- 277,011
GROUP BY T1.claim_number,
T1.source_system
),

Revenue_Code_Step1 AS
(
SELECT T1.claim_number,
T1.source_system,
T1.snapshot_date,
concat_ws(",", collect_set(DISTINCT T2.revenue_code)) Revenue_Code
FROM Claim_Detail_Step1 T1
INNER JOIN bhc_claim.medical_claim_detail T2
ON T1.claim_number = T2.claim_number
AND T1.source_system = T2.source_system
AND T1.snapshot_date = T2.snapshot_date -- 277,011
GROUP BY T1.claim_number,
T1.source_system,
T1.snapshot_date
),

Provider_Detail_Step1 AS
(
SELECT T1.claim_number,
T1.source_system,
T2.provider_npi,
T2.provider_tin,
T2.provider_type,
CASE WHEN provider_group_name IS NOT NULL THEN provider_group_name
ELSE CONCAT(T2.provider_first_name,T2.provider_last_name)
END provider_name,
T2.sequence_number
FROM Claim_Detail_Step1 T1
INNER JOIN bhc_claim.medical_claim_detail_provider T2
ON T1.claim_number = T2.claim_number
AND T1.source_system = T2.source_system
AND T1.snapshot_date = T2.snapshot_date
WHERE T2.provider_type = 'BILLING'
GROUP BY T1.claim_number,
T1.source_system,
T2.provider_npi,
T2.provider_tin,
T2.provider_type,
CASE WHEN provider_group_name IS NOT NULL THEN provider_group_name
ELSE CONCAT(T2.provider_first_name,T2.provider_last_name)
END,
T2.sequence_number
),

Market_Detail_Step1 AS
(
SELECT REPLACE(T1.hios_plan_id_standard_component_variant, '-', '') Hios_ID,
MAX(T1.plan_year) plan_year
FROM bdp.plans T1
GROUP BY 1
),

Market_Detail_Step2 AS
(
SELECT T1.Hios_ID,
T1.plan_year,
TRIM(T2.market) market
FROM Market_Detail_Step1 T1
INNER JOIN bdp.plans T2
ON T1.Hios_ID = REPLACE(T2.hios_plan_id_standard_component_variant, '-', '')
AND T1.plan_year = T2.plan_year
),

Market_Detail_Step3 AS
(
SELECT T1.member_id,
TO_DATE(T1.source_start_date) source_start_date,
TO_DATE(T1.source_end_date) source_end_date,
T2.market
FROM dev.enrollment__base_enrollment T1 -- 3,568,717 | 3,568,717
LEFT JOIN Market_Detail_Step2 T2
ON T2.Hios_ID = T1.plan_id
WHERE T1.source_start_date <> T1.source_end_date
AND T1.effectuation_date_utc IS NOT NULL
GROUP BY T1.member_id,
TO_DATE(T1.source_start_date),
TO_DATE(T1.source_end_date),
T2.market
),

Remark_Code_Step1 AS
(
SELECT T1.claim_number,
T1.source_system,
concat_ws(",", collect_set(DISTINCT T1.remark_code)) Remark_Code
FROM bhc_claim.medical_claim_detail_remark T1 -- 3,731,653 | 3,731,653
INNER JOIN bhc_claim.medical_claim_detail T2
ON T1.claim_number = T2.claim_number
AND T1.source_system = T2.source_system
AND T1.snapshot_date = T2.snapshot_date
GROUP BY T1.claim_number,
T1.source_system
),

Integration_Step1 AS
(
SELECT CASE WHEN T1.claim_number IS NOT NULL THEN T1.claim_number
WHEN T7.Claim_Number IS NOT NULL THEN T7.Claim_Number
WHEN T10.Claim_Number IS NOT NULL THEN T10.Claim_Number
END Claim_Number,
TO_DATE(CASE WHEN T1.claim_received_date IS NOT NULL THEN T1.claim_received_date
WHEN T7.Received_Date IS NOT NULL THEN T7.Received_Date
WHEN T10.Received_Date IS NOT NULL THEN T10.Received_Date
END) Received_Date,
TO_DATE(CASE WHEN T1.claim_processed_date IS NOT NULL THEN T1.claim_processed_date
WHEN T10.Processed_Date IS NOT NULL THEN T10.Processed_Date
END) Processed_Date,
TO_DATE(CASE WHEN T3.claim_service_date IS NOT NULL THEN T3.claim_service_date
WHEN T7.Service_Date IS NOT NULL THEN T7.Service_Date
WHEN T10.Service_Date IS NOT NULL THEN T10.Service_Date
END) Service_Date,
TO_DATE(T1.check_date) Check_Date,
CASE WHEN T7.Claim_Number IS NOT NULL THEN 'Loomis'
WHEN T10.Claim_Number IS NOT NULL THEN 'Loomis'
ELSE T1.source_system
END Source_System,
CASE WHEN T1.claim_status_description = 'P' AND T1.total_excluded_amount = T1.total_original_claim_amount THEN 'Denied'
WHEN T10.status = 'Issued' THEN 'Issued'
ELSE T1.claim_status_description
END Status,
T1.payment_status_description Payment_Status,
CASE WHEN T7.Claim_Number IS NOT NULL
THEN 'Pending' END Pend_Status,
CASE WHEN T10.Claim_Number IS NOT NULL
THEN 'Paid Previous' END Paid_Previous_Status,
T1.claim_submission_type_description Submission_Type,
T1.line_of_business Segment,
T7.Hold_Code,
T7.Pend_Code,
T5.Remark_Code,
T9.Revenue_Code,
CASE WHEN T7.Pend_Code IN('17','18','19','44','60','63','86','89','97')
OR T7.Hold_Code LIKE('%13%')
OR T7.Hold_Code LIKE('%70%')
THEN 'Bright'
ELSE 'Loomis'
END Bucket_Owner,
T1.member_id Member_ID,
T1.subscriber_id Subscriber_ID,
T1.subscriber_group_number Subscriber_Group,
T4.provider_npi,
CASE WHEN T4.provider_tin IS NOT NULL THEN T4.provider_tin
WHEN T7.tax_id IS NOT NULL THEN T7.tax_id
WHEN T10.Tax_ID IS NOT NULL THEN T10.Tax_ID
END provider_tin,
CASE WHEN T4.provider_name IS NOT NULL THEN T4.provider_name
WHEN T7.provider_name IS NOT NULL THEN T7.provider_name
WHEN T10.provider_name IS NOT NULL THEN T10.provider_name
END provider_name,
CASE WHEN T10.Billed_Amount IS NOT NULL THEN T10.Billed_Amount
ELSE (T1.total_billed_amount/100)
END Billed_Amount,
(T1.total_processed_amount/100) Processed_Amount,
CASE WHEN T10.Paid_Amount IS NOT NULL THEN T10.Paid_Amount
ELSE (T1.total_paid_amount/100)
END Paid_Amount,
(T1.total_interest_paid_amount/100) Interest_Paid_Amount,
T7.Pend_Amount
FROM bhc_claim.medical_claim T1 -- 3,472,165
INNER JOIN Snapshot_Date T2 -- 3,472,165
ON T1.claim_number = T2.claim_number
AND T1.source_system = T2.source_system
AND T1.snapshot_date = T2.snapshot_date
LEFT JOIN Service_Date T3 -- 3,472,165
ON T1.claim_number = T3.claim_number
AND T1.source_system = T3.source_system
LEFT JOIN Provider_Detail_Step1 T4 -- 3,498,170 MATCH
ON T1.claim_number = T4.claim_number
AND T1.source_system = T4.source_system
LEFT JOIN Remark_Code_Step1 T5
ON T1.claim_number = T5.claim_number
AND T1.source_system = T5.source_system
FULL OUTER JOIN Pend_Step4 T7 -- 3,472,419 | Match Disctinct 3,472,419
ON LEFT(T1.claim_number,10) = T7.Claim_Number
LEFT JOIN Revenue_Code_Step1 T9
ON T1.claim_number = T9.claim_number
AND T1.source_system = T9.source_system
FULL OUTER JOIN Paid_Previous_Step4 T10 --3,816,359 | Match Disctinct 3,472,419
ON LEFT(T1.claim_number,10) = T10.Claim_Number
),

FINAL AS
(
SELECT T1.Claim_Number,
T1.Received_Date,
T1.Processed_Date,
T1.Service_Date,
T1.Report_Date
T1.Check_Date,
T1.Source_System,
T1.Status,
T1.Payment_Status,
T1.Pend_Status,
T1.Paid_Previous_Status,
T1.Submission_Type,
T1.Segment,
MIN(T2.Market) Market,
T1.Hold_Code,
T1.Pend_Code,
T1.Remark_Code,
T1.Revenue_Code,
T1.Bucket_Owner,
T1.Member_ID,
T1.Subscriber_ID,
T1.Subscriber_Group,
T1.provider_npi,
T1.provider_tin,
T1.provider_name,
T1.Billed_Amount,
T1.Processed_Amount,
T1.Paid_Amount,
T1.Interest_Paid_Amount,
T1.Pend_Amount
FROM Integration_Step1 T1
LEFT JOIN Market_Detail_Step3 T2
ON T1.member_id = T2.member_id
AND T1.Service_Date >= T2.source_start_date
AND T1.Service_Date <= T2.source_end_date
GROUP BY T1.Claim_Number,
T1.Received_Date,
T1.Processed_Date,
T1.Service_Date,
T1.Report_Date,
T1.Check_Date,
T1.Source_System,
T1.Status,
T1.Payment_Status,
T1.Pend_Status,
T1.Paid_Previous_Status,
T1.Submission_Type,
T1.Segment,
T1.Hold_Code,
T1.Pend_Code,
T1.Remark_Code,
T1.Revenue_Code,
T1.Bucket_Owner,
T1.Member_ID,
T1.Subscriber_ID,
T1.Subscriber_Group,
T1.provider_npi,
T1.provider_tin,
T1.provider_name,
T1.Billed_Amount,
T1.Processed_Amount,
T1.Paid_Amount,
T1.Interest_Paid_Amount,
T1.Pend_Amount
)

SELECT T1.Claim_Number,
T1.Received_Date,
T1.Processed_Date,
T1.Service_Date,
T1.Report_Date,
T1.Check_Date,
T1.Source_System,
T1.Status,
T1.Payment_Status,
T1.Pend_Status,
T1.Paid_Previous_Status,
T1.Submission_Type,
T1.Segment,
T1.Market,
T1.Hold_Code,
T1.Pend_Code,
T1.Remark_Code,
T1.Revenue_Code,
T1.Bucket_Owner,
T1.Member_ID,
T1.Subscriber_ID,
T1.Subscriber_Group,
T1.provider_npi,
T1.provider_tin,
T1.provider_name,
T1.Billed_Amount,
T1.Processed_Amount,
T1.Paid_Amount,
T1.Interest_Paid_Amount,
T1.Pend_Amount
FROM FINAL T1-- 3,789,713
)
;

Azure Databricks
Azure Databricks
An Apache Spark-based analytics platform optimized for Azure.
2,005 questions
0 comments No comments
{count} votes

Accepted answer
  1. Martin Cairney 2,246 Reputation points
    2021-08-11T03:03:06.453+00:00

    I'm assuming your first segment:

     /
     WITH Snapshot_Date AS
     (
     SELECT T1.claim_number,
     T1.source_system,
     MAX(T1.snapshot_date) snapshot_date
     FROM bhc_claim.medical_claim T1
     GROUP BY T1.claim_number,
     T1.source_system
     ),
     /
    

    is supposed to be commented out? If so then your actual initial commands are:

    CREATE OR REPLACE VIEW operations_staging.v_claims AS (
    
    Service_Date AS
    (
    

    Therefore you have ended up omitting the
    WITH
    statement to define the CTE.

    0 comments No comments

0 additional answers

Sort by: Most helpful