Allow for data analysis

This commit is contained in:
2025-09-08 12:29:20 -05:00
parent 951a58f2fa
commit 14d8211715
4 changed files with 236 additions and 83 deletions

View File

@@ -0,0 +1,103 @@
import pandas as pd
import io
from typing import AsyncGenerator
from langchain_core.prompts import ChatPromptTemplate
from langchain_ollama import OllamaLLM
from langchain_core.output_parsers import StrOutputParser
class AsyncDataAnalysisService:
"""Asynchronous service for performing data analysis with an LLM."""
def __init__(self):
# A model with a large context window and strong analytical skills is best
self.llm = OllamaLLM(
model="llama3.2",
temperature=0.3,
num_ctx=8192,
)
self.output_parser = StrOutputParser()
self._setup_chain()
def _setup_chain(self):
"""Set up the LLM chain with a prompt tailored for data analysis."""
template = """You are an expert data analyst. A user has provided a summary and sample of a dataset and is asking a question about it.
Analyze the provided information and answer the user's question. If a calculation is requested, perform it based on the summary statistics provided. If the data is not suitable for the request, explain why.
---
Data Summary:
{data_summary}
---
User's Question: {query}
Answer:"""
self.prompt = ChatPromptTemplate.from_template(template)
self.analysis_chain = (
{
"data_summary": lambda x: x["data_summary"],
"query": lambda x: x["query"],
}
| self.prompt
| self.llm
| self.output_parser
)
def _get_dataframe_summary(self, df: pd.DataFrame) -> str:
"""Generates a structured summary of the DataFrame for the LLM."""
num_rows, num_cols = df.shape
summary_lines = [
f"DataFrame has {num_rows} rows and {num_cols} columns.",
"Column Information (Name, Dtype, Non-Null Count):",
"--------------------------------------------------",
]
# Add a concise summary using df.info()
info_buffer = io.StringIO()
df.info(buf=info_buffer, verbose=True, show_counts=True)
summary_lines.append(info_buffer.getvalue())
summary_lines.append("\nDescriptive Statistics (for numerical columns):")
summary_lines.append("--------------------------------------------")
summary_lines.append(df.describe().to_string())
summary_lines.append("\nSample of Data:")
summary_lines.append("-----------------")
# Show the first 5 rows and a few random rows to give a feel for the data
summary_lines.append(df.head(5).to_string())
return "\n".join(summary_lines)
async def generate_response(
self,
query: str,
decoded_file: bytes,
file_type: str,
) -> AsyncGenerator[str, None]:
"""Generate a response based on the uploaded data and user query."""
try:
# Read the file content into a DataFrame
if file_type == "csv":
df = pd.read_csv(io.BytesIO(decoded_file))
elif file_type == "xlsx":
df = pd.read_excel(io.BytesIO(decoded_file))
else:
yield "I can only analyze CSV and XLSX files at this time."
return
# Get the structured summary instead of the full data
data_summary = self._get_dataframe_summary(df)
# Prepare the input for the LLM chain
chain_input = {
"data_summary": data_summary,
"query": query,
}
async for chunk in self.analysis_chain.astream(chain_input):
yield chunk
except Exception as e:
yield f"An error occurred while processing the file: {e}"

View File

@@ -9,6 +9,7 @@ class PromptType(Enum):
GENERAL_CHAT = auto() GENERAL_CHAT = auto()
RAG = auto() RAG = auto()
IMAGE_GENERATION = auto() IMAGE_GENERATION = auto()
DATA_ANALYSIS = auto()
UNKNOWN = auto() UNKNOWN = auto()
@@ -35,43 +36,45 @@ class PromptClassifier(BaseService):
1. GENERAL_CHAT - Casual conversation, personal questions, or non-specific inquiries 1. GENERAL_CHAT - Casual conversation, personal questions, or non-specific inquiries
2. RAG - ONLY when explicitly requesting document/search-based knowledge 2. RAG - ONLY when explicitly requesting document/search-based knowledge
3. IMAGE_GENERATION - Specific requests to create/modify images 3. IMAGE_GENERATION - Specific requests to create/modify images
4. UNKNOWN - If none of the above fit 4. DATA_ANALYSIS - When a user is asking questions about an uploaded spreadsheet or CSV file. The user's message contains the data from the file.
5. UNKNOWN - If none of the above fit
1. IMAGE_GENERATION - ONLY if: 1. IMAGE_GENERATION - ONLY if:
- Explicitly contains: "generate/create/draw/make an image/picture/photo/art/illustration" - Explicitly contains: "generate/create/draw/make an image/picture/photo/art/illustration"
- Requests visual content creation - Requests visual content creation
- Example: "Make a picture of a castle" → IMAGE_GENERATION - Example: "Make a picture of a castle" → IMAGE_GENERATION
2. RAG - ONLY if: 2. RAG - ONLY if:
- Explicitly mentions documents/files/data - Explicitly mentions documents/files/data
- Uses search terms: "find/search/lookup in [source]" - Uses search terms: "find/search/lookup in [source]"
- Example: "What does contracts.pdf say?" → RAG - Example: "What does contracts.pdf say?" → RAG
3. GENERAL_CHAT - DEFAULT category when: 3. DATA_ANALYSIS - ONLY if:
- Doesn't meet above criteria - The message explicitly contains structured data from a file (e.g., a DataFrame string)
- Conversational/general knowledge - The user is asking to analyze, summarize, or plot the data
- Uncertain cases - Example: "Here is the sales data. What is the average revenue per product?" -> DATA_ANALYSIS
- Example: "Tell me a joke" → GENERAL_CHAT
4. GENERAL_CHAT - DEFAULT category when:
- Doesn't meet above criteria
- Conversational/general knowledge
- Uncertain cases
- Example: "Tell me a joke" → GENERAL_CHAT
Examples: Examples:
[Definitely RAG] [Definitely RAG]
- "What does the uploaded PDF say about quarterly results?" - "What does the uploaded PDF say about quarterly results?"
- "Search our documents for the 2023 marketing strategy" - "Search our documents for the 2023 marketing strategy"
- "Find the contract clause about termination"
[Definitely DATA_ANALYSIS]
- "Here is the file content. What is the sum of all 'Sales'?"
- "Based on this CSV data, show me the top 5 customers."
[Definitely GENERAL_CHAT] [Definitely GENERAL_CHAT]
- "How does photosynthesis work?" (General knowledge) - "How does photosynthesis work?" (General knowledge)
- "Tell me a joke" - "Tell me a joke"
- "What's your opinion on AI?"
[Borderline GENERAL_CHAT] [Borderline -> GENERAL_CHAT]
- "What's our company policy on X?" (No doc reference general) - "What's our company policy on X?" (No doc reference -> general)
- "Explain quantum computing" (General knowledge)
- "Summarize the meeting" (No doc reference)
[Definitely NOT IMAGE_GENERATION]
- "Great, can you make it about a duck now"
- "highlight the features of the backyard playset if they were to choose us and make the language more long form"
Return ONLY the label, no explanations.""", Return ONLY the label, no explanations.""",
), ),

View File

@@ -44,6 +44,7 @@ from django.conf import settings
import json import json
import base64 import base64
import pandas as pd import pandas as pd
import io
# For email support # For email support
from django.core.mail import EmailMultiAlternatives from django.core.mail import EmailMultiAlternatives
@@ -68,16 +69,22 @@ from .services.rag_services import AsyncRAGService
from .services.title_generator import title_generator from .services.title_generator import title_generator
from .services.moderation_classifier import moderation_classifier, ModerationLabel from .services.moderation_classifier import moderation_classifier, ModerationLabel
from .services.prompt_classifier.prompt_classifier import PromptClassifier, PromptType from .services.prompt_classifier.prompt_classifier import PromptClassifier, PromptType
from .services.data_analysis_service import AsyncDataAnalysisService
from langchain.chains import create_retrieval_chain from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_ollama import ChatOllama from langchain_ollama import ChatOllama
import logging
logger = logging.getLogger(__name__)
CHANNEL_NAME: str = "llm_messages" CHANNEL_NAME: str = "llm_messages"
MODEL_NAME: str = "llama3.2" MODEL_NAME: str = "llama3.2"
# Create your views here. # Create your views here.
class CustomObtainTokenView(TokenObtainPairView): class CustomObtainTokenView(TokenObtainPairView):
permission_classes = (permissions.AllowAny,) permission_classes = (permissions.AllowAny,)
@@ -99,8 +106,8 @@ class CustomUserCreate(APIView):
def send_invite_email(slug, email_to_invite): def send_invite_email(slug, email_to_invite):
print("Sending invite email") logger.info("Sending invite email")
print(f"url : https://chat.aimloperations.com/set_password?slug={slug}") logger.info(f"url : https://chat.aimloperations.com/set_password?slug={slug}")
url = f"https://chat.aimloperations.com/set_password?slug={slug}" url = f"https://chat.aimloperations.com/set_password?slug={slug}"
subject = "Welcome to AI ML Operations, LLC Chat Services" subject = "Welcome to AI ML Operations, LLC Chat Services"
from_email = "ryan@aimloperations.com" from_email = "ryan@aimloperations.com"
@@ -115,8 +122,8 @@ def send_invite_email(slug, email_to_invite):
def send_password_reset_email(slug, email_to_invite): def send_password_reset_email(slug, email_to_invite):
print("Sending reset email") logger.info("Sending reset email")
print(f"url : https://www.chat.aimloperations.com/set_password?slug={slug}") logger.info(f"url : https://www.chat.aimloperations.com/set_password?slug={slug}")
url = f"https://www.chat.aimloperations.com/set_password?slug={slug}" url = f"https://www.chat.aimloperations.com/set_password?slug={slug}"
subject = "Password reset for AI ML Operations, LLC Chat Services" subject = "Password reset for AI ML Operations, LLC Chat Services"
from_email = "ryan@aimloperations.com" from_email = "ryan@aimloperations.com"
@@ -131,7 +138,7 @@ def send_password_reset_email(slug, email_to_invite):
def send_feedback_email(feedback_obj): def send_feedback_email(feedback_obj):
print("Sending feedback email") logger.info("Sending feedback email")
subject = "New Feedback for Chat by AI ML Operations, LLC" subject = "New Feedback for Chat by AI ML Operations, LLC"
from_email = "ryan@aimloperations.com" from_email = "ryan@aimloperations.com"
to = "ryan@aimloperations.com" to = "ryan@aimloperations.com"
@@ -145,7 +152,7 @@ def send_feedback_email(feedback_obj):
def send_password_reset_email(slug, email_to_invite): def send_password_reset_email(slug, email_to_invite):
print("Sending Password reset email") logger.info("Sending Password reset email")
url = f"https://www.chat.aimloperations.com/set_password?slug={slug}" url = f"https://www.chat.aimloperations.com/set_password?slug={slug}"
subject = "Password reset for Chat by AI ML Operations, LLC" subject = "Password reset for Chat by AI ML Operations, LLC"
from_email = "ryan@aimloperations.com" from_email = "ryan@aimloperations.com"
@@ -233,7 +240,7 @@ class ResetUserPassword(APIView):
Send an email with a set password link to the set password page Send an email with a set password link to the set password page
Also disable the account Also disable the account
""" """
print(f"Password reset for requests. {request.data}") logger.info(f"Password reset for requests. {request.data}")
token = request.data.get("recaptchaToken") token = request.data.get("recaptchaToken")
payload = { payload = {
"secret": settings.CAPTCHA_SECRET_KEY, "secret": settings.CAPTCHA_SECRET_KEY,
@@ -252,7 +259,7 @@ class ResetUserPassword(APIView):
# send the email # send the email
send_password_reset_email(user.slug, email) send_password_reset_email(user.slug, email)
else: else:
print("Captcha secret failed") logger.error("Captcha secret failed")
return Response(status=status.HTTP_200_OK) return Response(status=status.HTTP_200_OK)
@@ -284,14 +291,14 @@ class CustomUserGet(APIView):
email = request.user.email email = request.user.email
username = request.user.username username = request.user.username
user = CustomUser.objects.filter(email=email).last() user = CustomUser.objects.filter(email=email).last()
print(f"Getting the user: {user}") logger.info(f"Getting the user: {user}")
try: try:
serializer = CustomUserSerializer(user) serializer = CustomUserSerializer(user)
print(f"serializer: {serializer}") logger.debug(f"serializer: {serializer}")
print(serializer.data) logger.debug(serializer.data)
return Response(serializer.data, status=status.HTTP_200_OK) return Response(serializer.data, status=status.HTTP_200_OK)
except Exception as e: except Exception as e:
print(f"Exception: {e}") logger.error(f"Exception: {e}")
return Response({}, status=status.HTTP_400_BAD_REQUEST) return Response({}, status=status.HTTP_400_BAD_REQUEST)
@@ -300,7 +307,7 @@ class FeedbackView(APIView):
def post(self, request, format="json"): def post(self, request, format="json"):
serializer = FeedbackSerializer(data=request.data) serializer = FeedbackSerializer(data=request.data)
print(request.data) logger.debug(request.data)
if serializer.is_valid(): if serializer.is_valid():
feedback_obj = serializer.save() feedback_obj = serializer.save()
@@ -310,7 +317,7 @@ class FeedbackView(APIView):
send_feedback_email(feedback_obj) send_feedback_email(feedback_obj)
return Response(serializer.data, status=status.HTTP_201_CREATED) return Response(serializer.data, status=status.HTTP_201_CREATED)
else: else:
print(serializer.errors) logger.error(serializer.errors)
return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST) return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)
def get(self, request, format="json"): def get(self, request, format="json"):
@@ -453,7 +460,7 @@ class ConversationDetailView(APIView):
return Response(serailzer.data, status=status.HTTP_200_OK) return Response(serailzer.data, status=status.HTTP_200_OK)
def post(self, request, format="json"): def post(self, request, format="json"):
print("In the post") logger.info("In the post")
# Add the prompt to the database # Add the prompt to the database
# make sure there is a conversation for it # make sure there is a conversation for it
# if there is not a conversation create a title for it # if there is not a conversation create a title for it
@@ -481,7 +488,7 @@ class ConversationDetailView(APIView):
prompt_instance = serializer.save() prompt_instance = serializer.save()
# set up the streaming response if it is from the user # set up the streaming response if it is from the user
print(f"Do we have a valid user? {is_user}") logger.info(f"Do we have a valid user? {is_user}")
if is_user: if is_user:
messages = [] messages = []
for prompt_obj in Prompt.objects.filter( for prompt_obj in Prompt.objects.filter(
@@ -495,12 +502,12 @@ class ConversationDetailView(APIView):
) )
channel_layer = get_channel_layer() channel_layer = get_channel_layer()
print(f"Sending to the channel: {CHANNEL_NAME}") logger.info(f"Sending to the channel: {CHANNEL_NAME}")
async_to_sync(channel_layer.group_send)( async_to_sync(channel_layer.group_send)(
CHANNEL_NAME, {"type": "receive", "content": messages} CHANNEL_NAME, {"type": "receive", "content": messages}
) )
except: except:
print( logger.error(
f"Error trying to submit to conversation_id: {conversation_id} with request.data: {request.data}" f"Error trying to submit to conversation_id: {conversation_id} with request.data: {request.data}"
) )
pass pass
@@ -740,7 +747,7 @@ def get_messages(conversation_id, prompt, file_string: str = None, file_type: st
messages = [] messages = []
conversation = Conversation.objects.get(id=conversation_id) conversation = Conversation.objects.get(id=conversation_id)
print(file_string) logger.debug(file_string)
# add the prompt to the conversation # add the prompt to the conversation
serializer = PromptSerializer( serializer = PromptSerializer(
@@ -843,27 +850,43 @@ def update_prompt_metric(prompt_metric, status):
@database_sync_to_async @database_sync_to_async
def finish_prompt_metric(prompt_metric, response_length): def finish_prompt_metric(prompt_metric, response_length):
print(f"finish_prompt_metric: {response_length}") logger.info(f"finish_prompt_metric: {response_length}")
prompt_metric.end_time = timezone.now() prompt_metric.end_time = timezone.now()
prompt_metric.reponse_length = response_length prompt_metric.reponse_length = response_length
prompt_metric.event = "FINISHED" prompt_metric.event = "FINISHED"
prompt_metric.save(update_fields=["end_time", "reponse_length", "event"]) prompt_metric.save(update_fields=["end_time", "reponse_length", "event"])
print("finish_prompt_metric saved") logger.info("finish_prompt_metric saved")
@database_sync_to_async @database_sync_to_async
def get_retriever(conversation_id): def get_retriever(conversation_id):
print(f"getting workspace from conversation: {conversation_id}") logger.info(f"getting workspace from conversation: {conversation_id}")
conversation = Conversation.objects.get(id=conversation_id) conversation = Conversation.objects.get(id=conversation_id)
print(f"Got conversation: {conversation}") logger.info(f"Got conversation: {conversation}")
workspace = DocumentWorkspace.objects.get(company=conversation.user.company) workspace = DocumentWorkspace.objects.get(company=conversation.user.company)
print(f"Got workspace: {conversation}") logger.info(f"Got workspace: {conversation}")
vectorstore = Chroma( vectorstore = Chroma(
persist_directory=f"./chroma_db/", persist_directory=f"./chroma_db/",
embedding=OllamaEmbeddings(model="llama3.2"), embedding=OllamaEmbeddings(model="llama3.2"),
) )
return vectorstore.as_retriever() return vectorstore.as_retriever()
async def get_conversation_file_async(conversation_id):
try:
# Get the very first prompt in the conversation that has a file
prompt_with_file = await Prompt.objects.filter(
conversation_id=conversation_id
).exclude(file='').order_by('created').afirst()
if prompt_with_file and prompt_with_file.file:
# You must use sync_to_async to access the file's binary content
file_data = await sync_to_async(prompt_with_file.file.read)()
file_type = prompt_with_file.file_type
return file_data, file_type
except Exception as e:
logger.error(f"Error retrieving file from conversation history: {e}")
return None, None
PROMPT_CLASSIFIER = PromptClassifier() PROMPT_CLASSIFIER = PromptClassifier()
class ChatConsumerAgain(AsyncWebsocketConsumer): class ChatConsumerAgain(AsyncWebsocketConsumer):
@@ -874,8 +897,8 @@ class ChatConsumerAgain(AsyncWebsocketConsumer):
await self.close() await self.close()
async def receive(self, text_data=None, bytes_data=None): async def receive(self, text_data=None, bytes_data=None):
print(f"Text Data: {text_data}") logger.debug(f"Text Data: {text_data}")
print(f"Bytes Data: {bytes_data}") logger.debug(f"Bytes Data: {bytes_data}")
if text_data: if text_data:
data = json.loads(text_data) data = json.loads(text_data)
message = data.get("message", None) message = data.get("message", None)
@@ -896,21 +919,26 @@ class ChatConsumerAgain(AsyncWebsocketConsumer):
if file: if file:
decoded_file = base64.b64decode(file) decoded_file = base64.b64decode(file)
print(decoded_file) logger.debug(decoded_file)
# The `altered_message` should only be created if a file exists
# and you want to pass its content directly to the classifier.
# Here, we'll let the classifier decide based on the user's prompt
# and then handle the file content separately.
altered_message = message
if "csv" in file_type: if "csv" in file_type:
file_type = "csv" file_type = "csv"
altered_message = f"{message}\n The file type is csv and the file contents are: {decoded_file}" #altered_message = f"{message}\n The file type is csv and the file contents are: {decoded_file}"
elif "xmlformats-officedocument" in file_type: elif "xmlformats-officedocument" in file_type:
file_type = "xlsx" file_type = "xlsx"
df = pd.read_excel(decoded_file) #df = pd.read_excel(decoded_file)
altered_message = f"{message}\n The file type is xlsx and the file contents are: {df}" #altered_message = f"{message}\n The file type is xlsx and the file contents are: {df}"
elif "text" in file_type: elif "text" in file_type:
file_type = "txt" file_type = "txt"
altered_message = f"{message}\n The file type is txt and the file contents are: {decoded_file}" #altered_message = f"{message}\n The file type is txt and the file contents are: {decoded_file}"
else: else:
file_type = "Not Sure" file_type = "Not Sure"
print(f'received: "{message}" for conversation {conversation_id}') logger.info(f'received: "{message}" for conversation {conversation_id}')
# check the moderation here # check the moderation here
if ( if (
@@ -918,7 +946,7 @@ class ChatConsumerAgain(AsyncWebsocketConsumer):
== ModerationLabel.NSFW == ModerationLabel.NSFW
): ):
response = "Prompt has been marked as NSFW. If this is in error, submit a feedback with the prompt text." response = "Prompt has been marked as NSFW. If this is in error, submit a feedback with the prompt text."
print("this prompt has been marked as NSFW") logger.warning("this prompt has been marked as NSFW")
await self.send("CONVERSATION_ID") await self.send("CONVERSATION_ID")
await self.send(str(conversation_id)) await self.send(str(conversation_id))
await self.send("START_OF_THE_STREAM_ENDER_GAME_42") await self.send("START_OF_THE_STREAM_ENDER_GAME_42")
@@ -934,11 +962,19 @@ class ChatConsumerAgain(AsyncWebsocketConsumer):
messages, prompt = await get_messages( messages, prompt = await get_messages(
conversation_id, message, decoded_file, file_type conversation_id, message, decoded_file, file_type
) )
if not decoded_file:
decoded_file, file_type = await get_conversation_file_async(conversation_id)
prompt_type = await PROMPT_CLASSIFIER.classify_async(message) prompt_type = await PROMPT_CLASSIFIER.classify_async(message)
logger.info(f"prompt_type: {prompt_type} for {message}")
print(f"prompt_type: {prompt_type} for {message}") print(f"prompt_type: {prompt_type} for {message}")
if file: # Check for a file AND the new DATA_ANALYSIS type
# The classifier might not correctly identify a data analysis prompt
# without the file contents. So, we'll add a check to override.
if decoded_file and (prompt_type == PromptType.DATA_ANALYSIS or 'analyze' in message.lower() or 'data' in message.lower()):
prompt_type = PromptType.DATA_ANALYSIS
elif decoded_file:
# If a decoded_file is uploaded but the query is general, default to GENERAL_CHAT
prompt_type = PromptType.GENERAL_CHAT prompt_type = PromptType.GENERAL_CHAT
prompt_metric = await create_prompt_metric( prompt_metric = await create_prompt_metric(
@@ -952,7 +988,7 @@ class ChatConsumerAgain(AsyncWebsocketConsumer):
if file: if file:
# udpate with the altered_message # udpate with the altered_message
messages = messages[:-1] + [HumanMessage(content=altered_message)] messages = messages[:-1] + [HumanMessage(content=altered_message)]
print(messages) logger.info(messages)
# send it to the LLM # send it to the LLM
# stream the response back # stream the response back
@@ -965,19 +1001,27 @@ class ChatConsumerAgain(AsyncWebsocketConsumer):
service = AsyncRAGService() service = AsyncRAGService()
# await service.ingest_documents() # await service.ingest_documents()
workspace = await get_workspace(conversation_id) workspace = await get_workspace(conversation_id)
print("Time to get the rag response") logger.info("Time to get the rag response")
async for chunk in service.generate_response( async for chunk in service.generate_response(
messages, prompt.message, workspace messages, prompt.message, workspace
): ):
response += chunk response += chunk
await self.send(chunk) await self.send(chunk)
elif prompt_type == PromptType.DATA_ANALYSIS:
service = AsyncDataAnalysisService()
if not decoded_file:
await self.send("Please upload a file to perform data analysis.")
else:
async for chunk in service.generate_response(prompt.message, decoded_file, file_type):
response += chunk
await self.send(chunk)
elif prompt_type == PromptType.IMAGE_GENERATION: elif prompt_type == PromptType.IMAGE_GENERATION:
response = "Image Generation is not supported at this time, but it will be soon." response = "Image Generation is not supported at this time, but it will be soon."
await self.send(response) await self.send(response)
else: else:
print(f"using the AsyncLLMService\n\n{messages}\n{prompt.message}") logger.info(f"using the AsyncLLMService\n\n{messages}\n{prompt.message}")
service = AsyncLLMService() service = AsyncLLMService()
async for chunk in service.generate_response( async for chunk in service.generate_response(
messages, prompt.message, conversation_id messages, prompt.message, conversation_id
@@ -990,7 +1034,7 @@ class ChatConsumerAgain(AsyncWebsocketConsumer):
await finish_prompt_metric(prompt_metric, len(response)) await finish_prompt_metric(prompt_metric, len(response))
if bytes_data: if bytes_data:
print("we have byte data") logger.info("we have byte data")
# Document Views # Document Views
@@ -1014,7 +1058,7 @@ class DocumentUploadView(APIView):
# permission_classes = [permissions.IsAuthenticated]Z # permission_classes = [permissions.IsAuthenticated]Z
def get(self, request): def get(self, request):
print(f"request_3: {request}") logger.debug(f"request_3: {request}")
try: try:
workspace = DocumentWorkspace.objects.get(company=request.user.company) workspace = DocumentWorkspace.objects.get(company=request.user.company)
serializer = DocumentSerializer( serializer = DocumentSerializer(
@@ -1028,7 +1072,7 @@ class DocumentUploadView(APIView):
) )
def post(self, request): def post(self, request):
print(f"request: {request}") logger.debug(f"request: {request}")
try: try:
workspace = DocumentWorkspace.objects.get(company=request.user.company) workspace = DocumentWorkspace.objects.get(company=request.user.company)
@@ -1038,14 +1082,14 @@ class DocumentUploadView(APIView):
{"error": "Workspace not found"}, status=status.HTTP_404_NOT_FOUND {"error": "Workspace not found"}, status=status.HTTP_404_NOT_FOUND
) )
print(request.FILES) logger.info(request.FILES)
file = request.FILES.get("file") file = request.FILES.get("file")
if not file: if not file:
return Response( return Response(
{"error": "No file provided"}, status=status.HTTP_400_BAD_REQUEST {"error": "No file provided"}, status=status.HTTP_400_BAD_REQUEST
) )
print("have the workspace and the file") logger.info("have the workspace and the file")
document = Document.objects.create(workspace=workspace, file=file) document = Document.objects.create(workspace=workspace, file=file)
@@ -1072,7 +1116,7 @@ class DocumentDetailView(APIView):
# permission_classes = [permissions.IsAuthenticated] # permission_classes = [permissions.IsAuthenticated]
def get(self, request, document_id): def get(self, request, document_id):
print(f"request: {request}") logger.info(f"request: {request}")
try: try:
workspace = DocumentWorkspace.objects.get(company=request.user.company) workspace = DocumentWorkspace.objects.get(company=request.user.company)

View File

@@ -43,8 +43,9 @@ duckdb==1.2.1
durationpy==0.9 durationpy==0.9
effdet==0.4.1 effdet==0.4.1
emoji==2.14.1 emoji==2.14.1
et_xmlfile==2.0.0
eval_type_backport==0.2.2 eval_type_backport==0.2.2
Faker Faker==37.0.0
fastapi==0.115.9 fastapi==0.115.9
filelock==3.17.0 filelock==3.17.0
filetype==1.2.0 filetype==1.2.0
@@ -52,13 +53,13 @@ flatbuffers==25.2.10
fonttools==4.56.0 fonttools==4.56.0
frozenlist==1.6.0 frozenlist==1.6.0
fsspec==2025.2.0 fsspec==2025.2.0
google-api-core google-api-core==2.24.2
google-auth google-auth==2.39.0
google-cloud-vision google-cloud-vision==3.10.1
googleapis-common-protos googleapis-common-protos==1.70.0
greenlet==3.1.1 greenlet==3.1.1
grpcio grpcio==1.72.0rc1
grpcio-status grpcio-status==1.72.0rc1
h11==0.14.0 h11==0.14.0
html5lib==1.1 html5lib==1.1
httpcore==1.0.7 httpcore==1.0.7
@@ -121,13 +122,14 @@ oauthlib==3.2.2
olefile==0.47 olefile==0.47
ollama==0.4.7 ollama==0.4.7
omegaconf==2.3.0 omegaconf==2.3.0
onnx onnx==1.18.0
onnxruntime onnxruntime==1.21.1
openai==1.65.4 openai==1.65.4
opencv-python==4.11.0.86 opencv-python==4.11.0.86
opentelemetry-api openpyxl==3.1.5
opentelemetry-exporter-otlp-proto-common opentelemetry-api==1.32.1
opentelemetry-exporter-otlp-proto-grpc opentelemetry-exporter-otlp-proto-common==1.32.1
opentelemetry-exporter-otlp-proto-grpc==1.32.1
opentelemetry-instrumentation==0.53b1 opentelemetry-instrumentation==0.53b1
opentelemetry-instrumentation-asgi==0.53b1 opentelemetry-instrumentation-asgi==0.53b1
opentelemetry-instrumentation-fastapi==0.53b1 opentelemetry-instrumentation-fastapi==0.53b1
@@ -138,8 +140,9 @@ opentelemetry-util-http==0.53b1
orjson==3.10.15 orjson==3.10.15
overrides==7.7.0 overrides==7.7.0
packaging==24.2 packaging==24.2
pandas pandas==2.2.3
pandasai pandasai==2.4.2
parameterized==0.9.0
pathspec==0.12.1 pathspec==0.12.1
pdf2image==1.17.0 pdf2image==1.17.0
pdfminer.six==20250506 pdfminer.six==20250506
@@ -150,7 +153,7 @@ platformdirs==4.3.6
posthog==4.0.1 posthog==4.0.1
propcache==0.3.1 propcache==0.3.1
proto-plus==1.26.1 proto-plus==1.26.1
protobuf protobuf==6.31.0rc2
psutil==7.0.0 psutil==7.0.0
pyasn1==0.6.1 pyasn1==0.6.1
pyasn1_modules==0.4.1 pyasn1_modules==0.4.1
@@ -160,7 +163,7 @@ pydantic==2.11.4
pydantic-settings==2.9.1 pydantic-settings==2.9.1
pydantic_core==2.33.2 pydantic_core==2.33.2
Pygments==2.19.1 Pygments==2.19.1
PyJWT PyJWT==2.10.1
pyOpenSSL==25.0.0 pyOpenSSL==25.0.0
pyparsing==3.2.1 pyparsing==3.2.1
pypdf==5.4.0 pypdf==5.4.0