first commit
This commit is contained in:
commit
8bcb52c93a
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
||||
.venv
|
||||
12
Makefile
Normal file
12
Makefile
Normal file
@ -0,0 +1,12 @@
|
||||
PYTHON := 3.12
|
||||
VENV_PATH := .venv
|
||||
PORT := 8501
|
||||
|
||||
.PHONY: venv
|
||||
|
||||
venv:
|
||||
uv venv --python $(PYTHON) --allow-existing $(VENV_PATH)
|
||||
source $(VENV_PATH)/bin/activate && uv pip install -r requirements.txt
|
||||
|
||||
run: venv
|
||||
source $(VENV_PATH)/bin/activate && $(VENV_PATH)/bin/streamlit run app.py --server.port $(PORT)
|
||||
185
app.py
Normal file
185
app.py
Normal file
@ -0,0 +1,185 @@
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import plotly.graph_objects as go
|
||||
import streamlit as st
|
||||
|
||||
def process_and_plot_csv(df):
|
||||
"""Processes the uploaded CSV file and creates interactive Plotly plots."""
|
||||
|
||||
# Remove unnamed columns
|
||||
df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
|
||||
|
||||
# Drop rows where book names are missing
|
||||
df = df.dropna(subset=[df.columns[0]])
|
||||
|
||||
# Extract book names
|
||||
book_names = df.iloc[:, 0].astype(str)
|
||||
|
||||
# Extract only numeric cumulative chapter values
|
||||
cumulative_books = df.iloc[:, 2:].apply(pd.to_numeric, errors='coerce')
|
||||
|
||||
# Drop rows where all cumulative values are NaN
|
||||
cumulative_books = cumulative_books.dropna(how='all')
|
||||
|
||||
# Convert to numpy array
|
||||
cumulative_books = cumulative_books.values
|
||||
|
||||
# Remove books with only a single cumulative value
|
||||
valid_books = [book for book in cumulative_books if np.count_nonzero(~np.isnan(book)) > 1]
|
||||
valid_book_names = [book_names.iloc[i] for i in range(len(cumulative_books)) if np.count_nonzero(~np.isnan(cumulative_books[i])) > 1]
|
||||
|
||||
# Debug info
|
||||
st.write(f"Total Books Before Filtering: {len(cumulative_books)}, After Filtering: {len(valid_books)}")
|
||||
|
||||
# Normalize to 100 points for each book
|
||||
target_points = 100
|
||||
normalised_books = []
|
||||
normalised_book_names = []
|
||||
change_values = []
|
||||
|
||||
for i, book in enumerate(valid_books):
|
||||
book = np.array(book)
|
||||
book = book[~np.isnan(book)] # Remove NaNs
|
||||
|
||||
original_points = np.arange(len(book)) # Original chapter indices
|
||||
target_indices = np.linspace(0, len(book) - 1, target_points) # Normalize to 100 points
|
||||
normalised = np.interp(target_indices, original_points, book) # Interpolation
|
||||
|
||||
normalised_books.append(normalised[:target_points]) # Ensure exactly 100 points
|
||||
normalised_book_names.append(valid_book_names[i])
|
||||
change_values.append(normalised[-1] - normalised[0]) # Store change from start to end
|
||||
|
||||
# Separate books into positive and negative groups based on start vs. end value
|
||||
positive_books = []
|
||||
negative_books = []
|
||||
positive_names = []
|
||||
negative_names = []
|
||||
|
||||
# Creating bins dynamically for the pie chart
|
||||
min_change = min(change_values)
|
||||
max_change = max(change_values)
|
||||
num_bins = 16 # Adjust this number for more or fewer bins
|
||||
|
||||
bin_edges = np.linspace(min_change, max_change, num_bins + 1)
|
||||
book_change_categories = {f"Change: {bin_edges[i]:.1f} to {bin_edges[i+1]:.1f}": [] for i in range(num_bins)}
|
||||
|
||||
positive_vs_negative = {"Positive Books": [], "Negative Books": []}
|
||||
|
||||
for i, book in enumerate(normalised_books):
|
||||
start_value = book[0]
|
||||
final_value = book[-1]
|
||||
change = final_value - start_value
|
||||
|
||||
# Classify as positive or negative
|
||||
if final_value >= start_value:
|
||||
positive_books.append(book)
|
||||
positive_names.append(normalised_book_names[i])
|
||||
positive_vs_negative["Positive Books"].append(normalised_book_names[i])
|
||||
else:
|
||||
negative_books.append(book)
|
||||
negative_names.append(normalised_book_names[i])
|
||||
positive_vs_negative["Negative Books"].append(normalised_book_names[i])
|
||||
|
||||
# Assign book to appropriate bin in the pie chart
|
||||
for j in range(num_bins):
|
||||
if bin_edges[j] <= change < bin_edges[j + 1]:
|
||||
category = f"Change: {bin_edges[j]:.1f} to {bin_edges[j + 1]:.1f}"
|
||||
book_change_categories[category].append(normalised_book_names[i])
|
||||
break
|
||||
|
||||
# Convert categories to count data
|
||||
category_counts = {key: len(value) for key, value in book_change_categories.items()}
|
||||
positive_vs_negative_counts = {key: len(value) for key, value in positive_vs_negative.items()}
|
||||
|
||||
# Function to plot books
|
||||
def plot_books(books, names, title):
|
||||
fig = go.Figure()
|
||||
|
||||
for i, book in enumerate(books):
|
||||
fig.add_trace(go.Scatter(
|
||||
x=np.arange(1, target_points + 1),
|
||||
y=book,
|
||||
mode='lines',
|
||||
name=names[i],
|
||||
hoverinfo="x+y+name"
|
||||
))
|
||||
|
||||
fig.update_layout(
|
||||
title=title,
|
||||
xaxis_title="Normalised Chapter Index (0 to 100)",
|
||||
yaxis_title="Cumulative Change",
|
||||
legend_title="Books",
|
||||
hovermode="x unified"
|
||||
)
|
||||
|
||||
return fig
|
||||
|
||||
# Display the plots
|
||||
st.write("### 📈 Overall Normalised Cumulative Chapter Changes")
|
||||
st.plotly_chart(plot_books(normalised_books, normalised_book_names, "All Books"))
|
||||
|
||||
st.write("### 📗 Positive End Books (Started At or Above End Level)")
|
||||
st.plotly_chart(plot_books(positive_books, positive_names, "Positive End Books"))
|
||||
|
||||
st.write("### 📕 Negative End Books (Ended Lower Than Start)")
|
||||
st.plotly_chart(plot_books(negative_books, negative_names, "Negative End Books"))
|
||||
|
||||
# Create a Pie Chart for Change in Points (Start to End)
|
||||
fig_pie_change = go.Figure(
|
||||
data=[go.Pie(
|
||||
labels=list(category_counts.keys()),
|
||||
values=list(category_counts.values()),
|
||||
hole=0.3
|
||||
)]
|
||||
)
|
||||
|
||||
fig_pie_change.update_layout(
|
||||
title="📊 Book Change Distribution (Start to End Difference)",
|
||||
clickmode='event+select' # Enable click events
|
||||
)
|
||||
|
||||
st.write("### 📊 Book Change Categories (Start vs. End Points)")
|
||||
st.plotly_chart(fig_pie_change, use_container_width=True)
|
||||
|
||||
# Create a Pie Chart for Positive vs. Negative Books
|
||||
fig_pie_pos_neg = go.Figure(
|
||||
data=[go.Pie(
|
||||
labels=list(positive_vs_negative_counts.keys()),
|
||||
values=list(positive_vs_negative_counts.values()),
|
||||
hole=0.3,
|
||||
marker=dict(colors=["green", "red"]) # Green for positive, Red for negative
|
||||
)]
|
||||
)
|
||||
|
||||
fig_pie_pos_neg.update_layout(
|
||||
title="📊 Positive vs. Negative Books",
|
||||
clickmode='event+select' # Enable click events
|
||||
)
|
||||
|
||||
st.write("### 📊 Positive vs. Negative Books")
|
||||
st.plotly_chart(fig_pie_pos_neg, use_container_width=True)
|
||||
|
||||
# Dropdown for selecting book categories
|
||||
selected_category = st.selectbox("Select a category to see book names:",
|
||||
["None"] + list(book_change_categories.keys()) + list(positive_vs_negative.keys()))
|
||||
|
||||
if selected_category != "None":
|
||||
st.write(f"### 📚 Books in {selected_category}:")
|
||||
if selected_category in book_change_categories:
|
||||
st.write(book_change_categories[selected_category])
|
||||
else:
|
||||
st.write(positive_vs_negative[selected_category])
|
||||
|
||||
# Streamlit UI
|
||||
st.title("📖 Book Analysis: Cumulative Chapter Changes (Interactive)")
|
||||
|
||||
# File uploader
|
||||
uploaded_file = st.file_uploader("📂 Upload a CSV File", type=["csv"])
|
||||
|
||||
if uploaded_file is not None:
|
||||
df = pd.read_csv(uploaded_file) # Read uploaded file
|
||||
st.write("### 📊 Data Preview:")
|
||||
st.dataframe(df) # Show uploaded data
|
||||
|
||||
st.write("### 📈 Interactive Plots for Normalised Chapter Changes:")
|
||||
process_and_plot_csv(df) # Process and plot the data
|
||||
4
requirements.txt
Normal file
4
requirements.txt
Normal file
@ -0,0 +1,4 @@
|
||||
matplotlib==3.10.1
|
||||
pandas==2.2.3
|
||||
plotly==6.0.0
|
||||
streamlit==1.43.1
|
||||
Loading…
Reference in New Issue
Block a user