first commit

2025-03-09 16:44:45 +11:00 · 2025-03-09 16:44:45 +11:00 · 8bcb52c93a
commit 8bcb52c93a
4 changed files with 202 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1 @@
+.venv
--- a/12
+++ b/12
@ -0,0 +1,12 @@
+PYTHON := 3.12
+VENV_PATH := .venv
+PORT := 8501
+
+.PHONY: venv
+
+venv:
+	uv venv --python $(PYTHON) --allow-existing $(VENV_PATH)
+	source $(VENV_PATH)/bin/activate && uv pip install -r requirements.txt
+
+run: venv
+	source $(VENV_PATH)/bin/activate && $(VENV_PATH)/bin/streamlit run app.py --server.port $(PORT)
--- a/app.py
+++ b/app.py
@ -0,0 +1,185 @@
+import numpy as np
+import pandas as pd
+import plotly.graph_objects as go
+import streamlit as st
+
+def process_and_plot_csv(df):
+    """Processes the uploaded CSV file and creates interactive Plotly plots."""
+
+    # Remove unnamed columns
+    df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
+
+    # Drop rows where book names are missing
+    df = df.dropna(subset=[df.columns[0]])
+
+    # Extract book names
+    book_names = df.iloc[:, 0].astype(str)
+
+    # Extract only numeric cumulative chapter values
+    cumulative_books = df.iloc[:, 2:].apply(pd.to_numeric, errors='coerce')
+
+    # Drop rows where all cumulative values are NaN
+    cumulative_books = cumulative_books.dropna(how='all')
+
+    # Convert to numpy array
+    cumulative_books = cumulative_books.values
+
+    # Remove books with only a single cumulative value
+    valid_books = [book for book in cumulative_books if np.count_nonzero(~np.isnan(book)) > 1]
+    valid_book_names = [book_names.iloc[i] for i in range(len(cumulative_books)) if np.count_nonzero(~np.isnan(cumulative_books[i])) > 1]
+
+    # Debug info
+    st.write(f"Total Books Before Filtering: {len(cumulative_books)}, After Filtering: {len(valid_books)}")
+
+    # Normalize to 100 points for each book
+    target_points = 100
+    normalised_books = []
+    normalised_book_names = []
+    change_values = []
+
+    for i, book in enumerate(valid_books):
+        book = np.array(book)
+        book = book[~np.isnan(book)]  # Remove NaNs
+
+        original_points = np.arange(len(book))  # Original chapter indices
+        target_indices = np.linspace(0, len(book) - 1, target_points)  # Normalize to 100 points
+        normalised = np.interp(target_indices, original_points, book)  # Interpolation
+
+        normalised_books.append(normalised[:target_points])  # Ensure exactly 100 points
+        normalised_book_names.append(valid_book_names[i])
+        change_values.append(normalised[-1] - normalised[0])  # Store change from start to end
+
+    # Separate books into positive and negative groups based on start vs. end value
+    positive_books = []
+    negative_books = []
+    positive_names = []
+    negative_names = []
+
+    # Creating bins dynamically for the pie chart
+    min_change = min(change_values)
+    max_change = max(change_values)
+    num_bins = 16  # Adjust this number for more or fewer bins
+
+    bin_edges = np.linspace(min_change, max_change, num_bins + 1)
+    book_change_categories = {f"Change: {bin_edges[i]:.1f} to {bin_edges[i+1]:.1f}": [] for i in range(num_bins)}
+
+    positive_vs_negative = {"Positive Books": [], "Negative Books": []}
+
+    for i, book in enumerate(normalised_books):
+        start_value = book[0]
+        final_value = book[-1]
+        change = final_value - start_value
+
+        # Classify as positive or negative
+        if final_value >= start_value:
+            positive_books.append(book)
+            positive_names.append(normalised_book_names[i])
+            positive_vs_negative["Positive Books"].append(normalised_book_names[i])
+        else:
+            negative_books.append(book)
+            negative_names.append(normalised_book_names[i])
+            positive_vs_negative["Negative Books"].append(normalised_book_names[i])
+
+        # Assign book to appropriate bin in the pie chart
+        for j in range(num_bins):
+            if bin_edges[j] <= change < bin_edges[j + 1]:
+                category = f"Change: {bin_edges[j]:.1f} to {bin_edges[j + 1]:.1f}"
+                book_change_categories[category].append(normalised_book_names[i])
+                break
+
+    # Convert categories to count data
+    category_counts = {key: len(value) for key, value in book_change_categories.items()}
+    positive_vs_negative_counts = {key: len(value) for key, value in positive_vs_negative.items()}
+
+    # Function to plot books
+    def plot_books(books, names, title):
+        fig = go.Figure()
+
+        for i, book in enumerate(books):
+            fig.add_trace(go.Scatter(
+                x=np.arange(1, target_points + 1),
+                y=book,
+                mode='lines',
+                name=names[i],
+                hoverinfo="x+y+name"
+            ))
+
+        fig.update_layout(
+            title=title,
+            xaxis_title="Normalised Chapter Index (0 to 100)",
+            yaxis_title="Cumulative Change",
+            legend_title="Books",
+            hovermode="x unified"
+        )
+
+        return fig
+
+    # Display the plots
+    st.write("### 📈 Overall Normalised Cumulative Chapter Changes")
+    st.plotly_chart(plot_books(normalised_books, normalised_book_names, "All Books"))
+
+    st.write("### 📗 Positive End Books (Started At or Above End Level)")
+    st.plotly_chart(plot_books(positive_books, positive_names, "Positive End Books"))
+
+    st.write("### 📕 Negative End Books (Ended Lower Than Start)")
+    st.plotly_chart(plot_books(negative_books, negative_names, "Negative End Books"))
+
+    # Create a Pie Chart for Change in Points (Start to End)
+    fig_pie_change = go.Figure(
+        data=[go.Pie(
+            labels=list(category_counts.keys()),
+            values=list(category_counts.values()),
+            hole=0.3
+        )]
+    )
+
+    fig_pie_change.update_layout(
+        title="📊 Book Change Distribution (Start to End Difference)",
+        clickmode='event+select'  # Enable click events
+    )
+
+    st.write("### 📊 Book Change Categories (Start vs. End Points)")
+    st.plotly_chart(fig_pie_change, use_container_width=True)
+
+    # Create a Pie Chart for Positive vs. Negative Books
+    fig_pie_pos_neg = go.Figure(
+        data=[go.Pie(
+            labels=list(positive_vs_negative_counts.keys()),
+            values=list(positive_vs_negative_counts.values()),
+            hole=0.3,
+            marker=dict(colors=["green", "red"])  # Green for positive, Red for negative
+        )]
+    )
+
+    fig_pie_pos_neg.update_layout(
+        title="📊 Positive vs. Negative Books",
+        clickmode='event+select'  # Enable click events
+    )
+
+    st.write("### 📊 Positive vs. Negative Books")
+    st.plotly_chart(fig_pie_pos_neg, use_container_width=True)
+
+    # Dropdown for selecting book categories
+    selected_category = st.selectbox("Select a category to see book names:", 
+                                     ["None"] + list(book_change_categories.keys()) + list(positive_vs_negative.keys()))
+
+    if selected_category != "None":
+        st.write(f"### 📚 Books in {selected_category}:")
+        if selected_category in book_change_categories:
+            st.write(book_change_categories[selected_category])
+        else:
+            st.write(positive_vs_negative[selected_category])
+
+# Streamlit UI
+st.title("📖 Book Analysis: Cumulative Chapter Changes (Interactive)")
+
+# File uploader
+uploaded_file = st.file_uploader("📂 Upload a CSV File", type=["csv"])
+
+if uploaded_file is not None:
+    df = pd.read_csv(uploaded_file)  # Read uploaded file
+    st.write("### 📊 Data Preview:")
+    st.dataframe(df)  # Show uploaded data
+
+    st.write("### 📈 Interactive Plots for Normalised Chapter Changes:")
+    process_and_plot_csv(df)  # Process and plot the data
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,4 @@
+matplotlib==3.10.1
+pandas==2.2.3
+plotly==6.0.0
+streamlit==1.43.1