commit 8bcb52c93ab6d28d2f61ede9ea407023abd9eb20 Author: Ben Vincent Date: Sun Mar 9 16:44:45 2025 +1100 first commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..1d17dae --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +.venv diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..d5abb7e --- /dev/null +++ b/Makefile @@ -0,0 +1,12 @@ +PYTHON := 3.12 +VENV_PATH := .venv +PORT := 8501 + +.PHONY: venv + +venv: + uv venv --python $(PYTHON) --allow-existing $(VENV_PATH) + source $(VENV_PATH)/bin/activate && uv pip install -r requirements.txt + +run: venv + source $(VENV_PATH)/bin/activate && $(VENV_PATH)/bin/streamlit run app.py --server.port $(PORT) diff --git a/app.py b/app.py new file mode 100644 index 0000000..94c1ce6 --- /dev/null +++ b/app.py @@ -0,0 +1,185 @@ +import numpy as np +import pandas as pd +import plotly.graph_objects as go +import streamlit as st + +def process_and_plot_csv(df): + """Processes the uploaded CSV file and creates interactive Plotly plots.""" + + # Remove unnamed columns + df = df.loc[:, ~df.columns.str.contains('^Unnamed')] + + # Drop rows where book names are missing + df = df.dropna(subset=[df.columns[0]]) + + # Extract book names + book_names = df.iloc[:, 0].astype(str) + + # Extract only numeric cumulative chapter values + cumulative_books = df.iloc[:, 2:].apply(pd.to_numeric, errors='coerce') + + # Drop rows where all cumulative values are NaN + cumulative_books = cumulative_books.dropna(how='all') + + # Convert to numpy array + cumulative_books = cumulative_books.values + + # Remove books with only a single cumulative value + valid_books = [book for book in cumulative_books if np.count_nonzero(~np.isnan(book)) > 1] + valid_book_names = [book_names.iloc[i] for i in range(len(cumulative_books)) if np.count_nonzero(~np.isnan(cumulative_books[i])) > 1] + + # Debug info + st.write(f"Total Books Before Filtering: {len(cumulative_books)}, After Filtering: {len(valid_books)}") + + # Normalize to 100 points for each book + target_points = 100 + normalised_books = [] + normalised_book_names = [] + change_values = [] + + for i, book in enumerate(valid_books): + book = np.array(book) + book = book[~np.isnan(book)] # Remove NaNs + + original_points = np.arange(len(book)) # Original chapter indices + target_indices = np.linspace(0, len(book) - 1, target_points) # Normalize to 100 points + normalised = np.interp(target_indices, original_points, book) # Interpolation + + normalised_books.append(normalised[:target_points]) # Ensure exactly 100 points + normalised_book_names.append(valid_book_names[i]) + change_values.append(normalised[-1] - normalised[0]) # Store change from start to end + + # Separate books into positive and negative groups based on start vs. end value + positive_books = [] + negative_books = [] + positive_names = [] + negative_names = [] + + # Creating bins dynamically for the pie chart + min_change = min(change_values) + max_change = max(change_values) + num_bins = 16 # Adjust this number for more or fewer bins + + bin_edges = np.linspace(min_change, max_change, num_bins + 1) + book_change_categories = {f"Change: {bin_edges[i]:.1f} to {bin_edges[i+1]:.1f}": [] for i in range(num_bins)} + + positive_vs_negative = {"Positive Books": [], "Negative Books": []} + + for i, book in enumerate(normalised_books): + start_value = book[0] + final_value = book[-1] + change = final_value - start_value + + # Classify as positive or negative + if final_value >= start_value: + positive_books.append(book) + positive_names.append(normalised_book_names[i]) + positive_vs_negative["Positive Books"].append(normalised_book_names[i]) + else: + negative_books.append(book) + negative_names.append(normalised_book_names[i]) + positive_vs_negative["Negative Books"].append(normalised_book_names[i]) + + # Assign book to appropriate bin in the pie chart + for j in range(num_bins): + if bin_edges[j] <= change < bin_edges[j + 1]: + category = f"Change: {bin_edges[j]:.1f} to {bin_edges[j + 1]:.1f}" + book_change_categories[category].append(normalised_book_names[i]) + break + + # Convert categories to count data + category_counts = {key: len(value) for key, value in book_change_categories.items()} + positive_vs_negative_counts = {key: len(value) for key, value in positive_vs_negative.items()} + + # Function to plot books + def plot_books(books, names, title): + fig = go.Figure() + + for i, book in enumerate(books): + fig.add_trace(go.Scatter( + x=np.arange(1, target_points + 1), + y=book, + mode='lines', + name=names[i], + hoverinfo="x+y+name" + )) + + fig.update_layout( + title=title, + xaxis_title="Normalised Chapter Index (0 to 100)", + yaxis_title="Cumulative Change", + legend_title="Books", + hovermode="x unified" + ) + + return fig + + # Display the plots + st.write("### 📈 Overall Normalised Cumulative Chapter Changes") + st.plotly_chart(plot_books(normalised_books, normalised_book_names, "All Books")) + + st.write("### 📗 Positive End Books (Started At or Above End Level)") + st.plotly_chart(plot_books(positive_books, positive_names, "Positive End Books")) + + st.write("### 📕 Negative End Books (Ended Lower Than Start)") + st.plotly_chart(plot_books(negative_books, negative_names, "Negative End Books")) + + # Create a Pie Chart for Change in Points (Start to End) + fig_pie_change = go.Figure( + data=[go.Pie( + labels=list(category_counts.keys()), + values=list(category_counts.values()), + hole=0.3 + )] + ) + + fig_pie_change.update_layout( + title="📊 Book Change Distribution (Start to End Difference)", + clickmode='event+select' # Enable click events + ) + + st.write("### 📊 Book Change Categories (Start vs. End Points)") + st.plotly_chart(fig_pie_change, use_container_width=True) + + # Create a Pie Chart for Positive vs. Negative Books + fig_pie_pos_neg = go.Figure( + data=[go.Pie( + labels=list(positive_vs_negative_counts.keys()), + values=list(positive_vs_negative_counts.values()), + hole=0.3, + marker=dict(colors=["green", "red"]) # Green for positive, Red for negative + )] + ) + + fig_pie_pos_neg.update_layout( + title="📊 Positive vs. Negative Books", + clickmode='event+select' # Enable click events + ) + + st.write("### 📊 Positive vs. Negative Books") + st.plotly_chart(fig_pie_pos_neg, use_container_width=True) + + # Dropdown for selecting book categories + selected_category = st.selectbox("Select a category to see book names:", + ["None"] + list(book_change_categories.keys()) + list(positive_vs_negative.keys())) + + if selected_category != "None": + st.write(f"### 📚 Books in {selected_category}:") + if selected_category in book_change_categories: + st.write(book_change_categories[selected_category]) + else: + st.write(positive_vs_negative[selected_category]) + +# Streamlit UI +st.title("📖 Book Analysis: Cumulative Chapter Changes (Interactive)") + +# File uploader +uploaded_file = st.file_uploader("📂 Upload a CSV File", type=["csv"]) + +if uploaded_file is not None: + df = pd.read_csv(uploaded_file) # Read uploaded file + st.write("### 📊 Data Preview:") + st.dataframe(df) # Show uploaded data + + st.write("### 📈 Interactive Plots for Normalised Chapter Changes:") + process_and_plot_csv(df) # Process and plot the data diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..4335efe --- /dev/null +++ b/requirements.txt @@ -0,0 +1,4 @@ +matplotlib==3.10.1 +pandas==2.2.3 +plotly==6.0.0 +streamlit==1.43.1