import numpy as np import pandas as pd import plotly.graph_objects as go import streamlit as st def process_and_plot_csv(df): """Processes the uploaded CSV file and creates interactive Plotly plots.""" # Remove unnamed columns df = df.loc[:, ~df.columns.str.contains('^Unnamed')] # Drop rows where book names are missing df = df.dropna(subset=[df.columns[0]]) # Extract book names book_names = df.iloc[:, 0].astype(str) # Extract only numeric cumulative chapter values cumulative_books = df.iloc[:, 2:].apply(pd.to_numeric, errors='coerce') # Drop rows where all cumulative values are NaN cumulative_books = cumulative_books.dropna(how='all') # Convert to numpy array cumulative_books = cumulative_books.values # Remove books with only a single cumulative value valid_books = [book for book in cumulative_books if np.count_nonzero(~np.isnan(book)) > 1] valid_book_names = [book_names.iloc[i] for i in range(len(cumulative_books)) if np.count_nonzero(~np.isnan(cumulative_books[i])) > 1] # Debug info st.write(f"Total Books Before Filtering: {len(cumulative_books)}, After Filtering: {len(valid_books)}") # Normalize to 100 points for each book target_points = 100 normalised_books = [] normalised_book_names = [] change_values = [] for i, book in enumerate(valid_books): book = np.array(book) book = book[~np.isnan(book)] # Remove NaNs original_points = np.arange(len(book)) # Original chapter indices target_indices = np.linspace(0, len(book) - 1, target_points) # Normalize to 100 points normalised = np.interp(target_indices, original_points, book) # Interpolation normalised_books.append(normalised[:target_points]) # Ensure exactly 100 points normalised_book_names.append(valid_book_names[i]) change_values.append(normalised[-1] - normalised[0]) # Store change from start to end # Separate books into positive and negative groups based on start vs. end value positive_books = [] negative_books = [] positive_names = [] negative_names = [] # Creating bins dynamically for the pie chart min_change = min(change_values) max_change = max(change_values) num_bins = 16 # Adjust this number for more or fewer bins bin_edges = np.linspace(min_change, max_change, num_bins + 1) book_change_categories = {f"Change: {bin_edges[i]:.1f} to {bin_edges[i+1]:.1f}": [] for i in range(num_bins)} positive_vs_negative = {"Positive Books": [], "Negative Books": []} for i, book in enumerate(normalised_books): start_value = book[0] final_value = book[-1] change = final_value - start_value # Classify as positive or negative if final_value >= start_value: positive_books.append(book) positive_names.append(normalised_book_names[i]) positive_vs_negative["Positive Books"].append(normalised_book_names[i]) else: negative_books.append(book) negative_names.append(normalised_book_names[i]) positive_vs_negative["Negative Books"].append(normalised_book_names[i]) # Assign book to appropriate bin in the pie chart for j in range(num_bins): if bin_edges[j] <= change < bin_edges[j + 1]: category = f"Change: {bin_edges[j]:.1f} to {bin_edges[j + 1]:.1f}" book_change_categories[category].append(normalised_book_names[i]) break # Convert categories to count data category_counts = {key: len(value) for key, value in book_change_categories.items()} positive_vs_negative_counts = {key: len(value) for key, value in positive_vs_negative.items()} # Function to plot books def plot_books(books, names, title): fig = go.Figure() for i, book in enumerate(books): fig.add_trace(go.Scatter( x=np.arange(1, target_points + 1), y=book, mode='lines', name=names[i], hoverinfo="x+y+name" )) fig.update_layout( title=title, xaxis_title="Normalised Chapter Index (0 to 100)", yaxis_title="Cumulative Change", legend_title="Books", hovermode="x unified" ) return fig # Display the plots st.write("### 📈 Overall Normalised Cumulative Chapter Changes") st.plotly_chart(plot_books(normalised_books, normalised_book_names, "All Books")) st.write("### 📗 Positive End Books (Started At or Above End Level)") st.plotly_chart(plot_books(positive_books, positive_names, "Positive End Books")) st.write("### 📕 Negative End Books (Ended Lower Than Start)") st.plotly_chart(plot_books(negative_books, negative_names, "Negative End Books")) # Create a Pie Chart for Change in Points (Start to End) fig_pie_change = go.Figure( data=[go.Pie( labels=list(category_counts.keys()), values=list(category_counts.values()), hole=0.3 )] ) fig_pie_change.update_layout( title="📊 Book Change Distribution (Start to End Difference)", clickmode='event+select' # Enable click events ) st.write("### 📊 Book Change Categories (Start vs. End Points)") st.plotly_chart(fig_pie_change, use_container_width=True) # Create a Pie Chart for Positive vs. Negative Books fig_pie_pos_neg = go.Figure( data=[go.Pie( labels=list(positive_vs_negative_counts.keys()), values=list(positive_vs_negative_counts.values()), hole=0.3, marker=dict(colors=["green", "red"]) # Green for positive, Red for negative )] ) fig_pie_pos_neg.update_layout( title="📊 Positive vs. Negative Books", clickmode='event+select' # Enable click events ) st.write("### 📊 Positive vs. Negative Books") st.plotly_chart(fig_pie_pos_neg, use_container_width=True) # Dropdown for selecting book categories selected_category = st.selectbox("Select a category to see book names:", ["None"] + list(book_change_categories.keys()) + list(positive_vs_negative.keys())) if selected_category != "None": st.write(f"### 📚 Books in {selected_category}:") if selected_category in book_change_categories: st.write(book_change_categories[selected_category]) else: st.write(positive_vs_negative[selected_category]) # Streamlit UI st.title("📖 Book Analysis: Cumulative Chapter Changes (Interactive)") # File uploader uploaded_file = st.file_uploader("📂 Upload a CSV File", type=["csv"]) if uploaded_file is not None: df = pd.read_csv(uploaded_file) # Read uploaded file st.write("### 📊 Data Preview:") st.dataframe(df) # Show uploaded data st.write("### 📈 Interactive Plots for Normalised Chapter Changes:") process_and_plot_csv(df) # Process and plot the data