There are known issues in older versions of pandas dealing with fancy quote (0x92). You can clean up the csv file, or try other codepages as per this SO thread: https://stackoverflow.com/questions/46000191/utf-8-codec-cant-decode-byte-0x92-in-position-18-invalid-start-byte
Unable to read a CSV using AzureOPENAI and Langchain with create_csv_agent(AzureOpenAI())
Satya Ramadas Metla
15
Reputation points
I am trying to create a BOT on top of csv file using AzureOPENAI (llm) and Langchain framework. But i am getting "UnicodeDecodeError: 'utf-8' codec can't decode byte 0x92 in position 12062: invalid start byte" error when executed. Please refer my code snippet below and correct me if something is wrong
from langchain_experimental.agents.agent_toolkits.csv.base import create_csv_agent
from langchain_openai import AzureOpenAI
from dotenv import load_dotenv
import os
import streamlit as st
def main():
load_dotenv()
OPENAI_API_KEY = ""
OPENAI_API_VERSION = "0301"
os.environ["OPENAI_API_TYPE"] = "azure"
os.environ["OPENAI_API_VERSION"] = "2023-05-15"
os.environ["AZURE_OPENAI_ENDPOINT"] = "xxxx"
os.environ["OPENAI_API_KEY"] = "xxxx"
st.set_page_config(page_title="XXXXX")
st.header("XXXXX ")
llm = AzureOpenAI(
deployment_name="name",
model_name="gpt-3.5-turbo",
)
agent = create_csv_agent(llm, 'Data.csv')
user_question = st.text_input("Ask your question ")
if user_question is not None and user_question != "":
with st.spinner(text="In progress..."):
st.write(agent.run(user_question))
if __name__ == "__main__":
main()```
Error Message below:
UnicodeDecodeError: 'utf-8' codec can't decode byte 0x92 in position 12062: invalid start byte
Traceback:
File "C:\Program Files\Python39\lib\site-packages\streamlit\runtime\scriptrunner\script_runner.py", line 535, in _run_script
exec(code, module.__dict__)
File "C:\UsersXXXmain.py", line 38, in <module>
main()
File "C:\Users\XXXX\XXXX\main.py", line 27, in main
agent = create_csv_agent(llm, 'Data.csv')
File "C:\Program Files\Python39\lib\site-packages\langchain_experimental\agents\agent_toolkits\csv\base.py", line 28, in create_csv_agent
df = pd.read_csv(path, **_kwargs)
File "C:\Program Files\Python39\lib\site-packages\pandas\io\parsers\readers.py", line 1024, in read_csv
return _read(filepath_or_buffer, kwds)
File "C:\Program Files\Python39\lib\site-packages\pandas\io\parsers\readers.py", line 618, in _read
parser = TextFileReader(filepath_or_buffer, **kwds)
File "C:\Program Files\Python39\lib\site-packages\pandas\io\parsers\readers.py", line 1618, in __init__
self._engine = self._make_engine(f, self.engine)
File "C:\Program Files\Python39\lib\site-packages\pandas\io\parsers\readers.py", line 1896, in _make_engine
return mapping[engine](f, **self.options)
File "C:\Program Files\Python39\lib\site-packages\pandas\io\parsers\c_parser_wrapper.py", line 93, in __init__
self._reader = parsers.TextReader(src, **kwds)
File "parsers.pyx", line 574, in pandas._libs.parsers.TextReader.__cinit__
File "parsers.pyx", line 663, in pandas._libs.parsers.TextReader._get_header
File "parsers.pyx", line 874, in pandas._libs.parsers.TextReader._tokenize_rows
File "parsers.pyx", line 891, in pandas._libs.parsers.TextReader._check_tokenize_status
File "parsers.pyx", line 2053, in pandas._libs.parsers.raise_parser_error
Azure OpenAI Service
Azure OpenAI Service
An Azure service that provides access to OpenAI’s GPT-3 models with enterprise capabilities.
4,080 questions
2 answers
Sort by: Most helpful
-
-
Saurabh Sharma 23,846 Reputation points Microsoft Employee Moderator
2024-02-13T23:25:06.9966667+00:00 @Satya Ramadas Metla I am able to run it without any issues.
Can you please try with the below code if this works for you -
from langchain_experimental.agents.agent_toolkits.csv.base import create_csv_agent from langchain_openai import AzureOpenAI from dotenv import load_dotenv import os import streamlit as st def main(): load_dotenv() endpoint = os.getenv("AOAIEndpoint") api_key = os.getenv("AOAIKey") api_version = "2023-09-15-preview" deployment_name="test_chatgpt" model_name="gpt-35-turbo" st.set_page_config(page_title="Chat on CSV Data") st.header("Ask Questions about the CSV Data using Azure OpenAI!") llm = AzureOpenAI(azure_endpoint=endpoint, openai_api_key=api_key, api_version=api_version, deployment_name=deployment_name, model_name=model_name) agent = create_csv_agent(llm, './Jupyter Notebooks/data/Sales_Performance_Report_DQLab_Store.csv') user_question = st.text_input("Ask your question here:") if user_question is not None and user_question != "": with st.spinner(text="In progress..."): st.write(agent.run(user_question)) if __name__ == "__main__": main()
Please let me know if you have any other questions. Thanks Saurabh