%load_ext autoreload
%autoreload 2

import os
import polars as pl
import warnings

warnings.filterwarnings("ignore")

df = pl.read_delta("../../../megalab_recordings/data/inference")


df = df.with_columns(
    pl.col("video_file").map_elements(lambda f: int(os.path.splitext(f)[0].split('_')[-1]), ).alias("video_id"),
)

# sort by video_id and frame_id
df = df.sort(["video_id", "frame_id"])

# assuming each video to 10 seconds long, and 300 frames
# adding timestamp column from the frame_id column
df = df.with_columns(
    pl.struct(["video_id", "frame_id"]).map_elements(lambda x: (x['video_id'] * 10) + (x['frame_id'] * (10 / 300))).alias("timestamp")
)

df

import cv2
import matplotlib.pyplot as plt

total = 0
for video_file, gdf in df.group_by("video_file"):
    video_file = video_file[0]

    capture = cv2.VideoCapture(f"../../../megalab_recordings/recordings/{video_file}")

    for frame_id, ggdf in gdf.group_by("frame_id"):
        capture.set(cv2.CAP_PROP_POS_FRAMES, frame_id[0])
        _, frame = capture.read()

        for row in ggdf.iter_rows(named=True):
            bbox = row["box"]

            x, y, w, h = bbox

            # we inferenced the frame at size 1024x1024
            # we need to resize the bounding box to the original frame size
            orig_h, orig_w, _ = frame.shape
            x = int(x * orig_w / 1024)
            y = int(y * orig_h / 1024)
            w = int(w * orig_w / 1024)
            h = int(h * orig_h / 1024)

            frame = cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)

        plt.figure(figsize=(10, 5))
        plt.title(f"Video: {video_file}, Frame: {frame_id[0]}, Detections: {len(ggdf)}")
        plt.imshow(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
        plt.axis("off")
        plt.show()

        break

    capture.release()
    
    total += 1
    if total >= 3:
        break

import matplotlib.pyplot as plt
import seaborn as sns

sns.set_style("whitegrid")

df = df.with_columns([
    pl.col("box").map_elements(lambda box: box[2] * box[3], return_dtype=pl.Float64).alias("area"),
    pl.col("box").map_elements(lambda box: box[2], return_dtype=pl.Float64).alias("width"),
    pl.col("box").map_elements(lambda box: box[3], return_dtype=pl.Float64).alias("height"),
])

median_conf = df.select(pl.col("score").median()).to_series()[0]
std_conf = df.select(pl.col("score").std()).to_series()[0]
median_area = df.select(pl.col("area").median()).to_series()[0]
std_area = df.select(pl.col("area").std()).to_series()[0]
med_width = df.select(pl.col("width").median()).to_series()[0]
std_width = df.select(pl.col("width").std()).to_series()[0]
med_height = df.select(pl.col("height").median()).to_series()[0]
std_height = df.select(pl.col("height").std()).to_series()[0]

plt.figure(figsize=(20, 8))
plt.subplot(2, 2, 1)

plt.hist(df.select("score").to_series(), bins=50, color='blue', alpha=0.7)
plt.axvline(median_conf, color='red', linestyle='dashed', linewidth=1, label='Median')
plt.axvline(median_conf + std_conf, color='orange', linestyle='dashed', linewidth=1)
plt.axvline(median_conf - std_conf, color='orange', linestyle='dashed', linewidth=1)
plt.legend()
plt.title(f"Histogram of Confidence. Median: {median_conf:.2f}, Std: {std_conf:.2f}")
plt.xlabel("Confidence")
plt.ylabel("Frequency")

plt.subplot(2, 2, 2)
plt.hist(df.select("area").to_series(), bins=50, color='green', alpha=0.7)
plt.axvline(median_area, color='red', linestyle='dashed', linewidth=1, label='Median')
plt.axvline(median_area + std_area, color='orange', linestyle='dashed', linewidth=1)
plt.axvline(median_area - std_area, color='orange', linestyle='dashed', linewidth=1)
plt.legend()
plt.title(f"Histogram of Box Areas. Median: {median_area:.2f}, Std: {std_area:.2f}")
plt.xlabel("Area (Width * Height)")
plt.ylabel("Frequency")

plt.subplot(2, 2, 3)
plt.hist(df.select("width").to_series(), bins=50, color='purple', alpha=0.7)
plt.axvline(med_width, color='red', linestyle='dashed', linewidth=1, label='Median')
plt.axvline(med_width + std_width, color='orange', linestyle='dashed', linewidth=1)
plt.axvline(med_width - std_width, color='orange', linestyle='dashed', linewidth=1)
plt.legend()
plt.title(f"Histogram of Box Widths. Median: {med_width:.2f}, Std: {std_width:.2f}")
plt.xlabel("Width")
plt.ylabel("Frequency")

plt.subplot(2, 2, 4)
plt.hist(df.select("height").to_series(), bins=50, color='brown', alpha=0.7)
plt.axvline(med_height, color='red', linestyle='dashed', linewidth=1, label='Median')
plt.axvline(med_height + std_height, color='orange', linestyle='dashed', linewidth=1)
plt.axvline(med_height - std_height, color='orange', linestyle='dashed', linewidth=1)
plt.legend()
plt.title(f"Histogram of Box Heights. Median: {med_height:.2f}, Std: {std_height:.2f}")
plt.xlabel("Height")
plt.ylabel("Frequency")

plt.tight_layout()
plt.show()

df = df.with_columns([
    pl.col("width").log().alias("log_width"),
    pl.col("height").log().alias("log_height"),
])

median_log_width = df.select(pl.col("log_width").median()).to_series()[0]
std_log_width = df.select(pl.col("log_width").std()).to_series()[0]
median_log_height = df.select(pl.col("log_height").median()).to_series()[0]
std_log_height = df.select(pl.col("log_height").std()).to_series()[0]

plt.figure(figsize=(20, 15))
plt.subplot(3, 2, 1)
plt.hist(df.select("log_width").to_series(), bins=50, color='purple', alpha=0.7)
plt.axvline(median_log_width, color='red', linestyle='dashed', linewidth=1, label='Median')
plt.axvline(median_log_width + std_log_width, color='orange', linestyle='dashed', linewidth=1)
plt.axvline(median_log_width - std_log_width, color='orange', linestyle='dashed', linewidth=1)
plt.legend()
plt.title("Histogram of Log-Transformed Box Widths")
plt.xlabel("Log(Width)")
plt.ylabel("Frequency")

plt.subplot(3, 2, 2)
plt.hist(df.select("log_height").to_series(), bins=50, color='brown', alpha=0.7)
plt.axvline(median_log_height, color='red', linestyle='dashed', linewidth=1, label='Median')
plt.axvline(median_log_height + std_log_height, color='orange', linestyle='dashed', linewidth=1)
plt.axvline(median_log_height - std_log_height, color='orange', linestyle='dashed', linewidth=1)
plt.legend()
plt.title("Histogram of Log-Transformed Box Heights")
plt.xlabel("Log(Height)")
plt.ylabel("Frequency")

# filter out boxs with plus or minus std from median
fdf = df.filter(
    (pl.col("log_width") >= (median_log_width - std_log_width*2)) &
    (pl.col("log_width") <= (median_log_width + std_log_width*2)) &
    (pl.col("log_height") >= (median_log_height - std_log_height*2)) &
    (pl.col("log_height") <= (median_log_height + std_log_height*2)) &
    (pl.col("width") >= 32) &
    (pl.col("height") >= 32)
)

print(f"Filtered out {df.height - fdf.height} / {((df.height - fdf.height) / df.height) * 100:.2f}% outliers")
print(f"Remaining detections: {fdf.height}")

# plot filtered distributions
plt.subplot(3, 2, 3)
plt.hist(fdf.select("width").to_series(), bins=50, color='purple', alpha=0.7)
plt.title("Filtered Histogram of Box Widths")
plt.xlabel("Width")
plt.ylabel("Frequency")

plt.subplot(3, 2, 4)
plt.hist(fdf.select("height").to_series(), bins=50, color='brown', alpha=0.7)
plt.title("Filtered Histogram of Box Heights")
plt.xlabel("Height")
plt.ylabel("Frequency")

# plot confidence and area distributions after filtering
plt.subplot(3, 2, 5)
plt.hist(fdf.select("score").to_series(), bins=50, color='blue', alpha=0.7)
plt.title("Filtered Histogram of Confidence")
plt.xlabel("Confidence")
plt.ylabel("Frequency")

plt.subplot(3, 2, 6)
plt.hist(fdf.select("area").to_series(), bins=50, color='green', alpha=0.7)
plt.title("Filtered Histogram of Box Areas")
plt.xlabel("Area")
plt.ylabel("Frequency")

plt.tight_layout()
plt.show()

Filtered out 1077047 / 52.07% outliers
Remaining detections: 991602

fdf

fdf =fdf.group_by("video_file", maintain_order=True).map_groups(
    lambda group: group.sample(fraction=0.1, seed=42)
)

fdf.write_parquet("megalab_filtered_detections.parquet")

fdf

import polars as pl

fdf = pl.read_parquet("megalab_filtered_detections.parquet")

import megalab_clustering

table_name = megalab_clustering.extract_features(fdf)

! ls -lh {table_name}

2026-01-19 07:31:53.935716: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2026-01-19 07:31:54.204261: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2026-01-19 07:31:55.373481: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
100%|██████████| 98454/98454 [2:14:41<00:00, 12.18it/s]

total 674M
-rw-rw-r-- 1 jack jack  69M Jan 19 07:44 0-f4b09add-e608-4fe3-992e-c8916c89c112-0.parquet
-rw-rw-r-- 1 jack jack  69M Jan 19 07:58 1-e2fcee58-3ffc-47b5-94f2-38f48da98a09-0.parquet
-rw-rw-r-- 1 jack jack  69M Jan 19 08:12 2-f60eab9a-c21d-4254-970b-6e8d9342fd67-0.parquet
-rw-rw-r-- 1 jack jack  69M Jan 19 08:26 3-162e7b4b-2104-4ba7-a3d2-97809bf40bee-0.parquet
-rw-rw-r-- 1 jack jack  69M Jan 19 08:40 4-81f59260-9fea-49d2-8331-006609c28714-0.parquet
-rw-rw-r-- 1 jack jack  69M Jan 19 08:54 5-1d922747-0c50-4c92-8455-4171a3d764b5-0.parquet
-rw-rw-r-- 1 jack jack  69M Jan 19 09:08 6-01256b21-2ea0-4eb7-b6d8-55447cc5e1c6-0.parquet
-rw-rw-r-- 1 jack jack  69M Jan 19 09:21 7-5c1c68ff-0e2d-44f2-9af1-57602ad38de8-0.parquet
-rw-rw-r-- 1 jack jack  69M Jan 19 09:35 8-5ac2f5a1-2408-4513-83b5-f082c9e01919-0.parquet
-rw-rw-r-- 1 jack jack  56M Jan 19 09:46 9-c93d8a24-b37b-4c9c-84cc-b5c456520755-0.parquet
drwxrwxr-x 2 jack jack 4.0K Jan 19 09:46 _delta_log

rdf = pl.read_delta("megalab_features")
rdf

from sklearn.preprocessing import StandardScaler
import numpy as np

for feature_col in ["mnetv3_feature", "vit_feature", "xcit_feature"]:
    features = np.vstack(rdf.select(feature_col).to_series().to_list())

    scaler = StandardScaler()
    features = scaler.fit_transform(features)

    rdf = rdf.with_columns([
        pl.Series(feature_col, list(features))
    ])

from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import seaborn as sns

sns.set_style("whitegrid")

for feature_col in ["mnetv3_feature", "vit_feature", "xcit_feature"]:
    features = np.vstack(rdf.select(feature_col).to_series().to_list())

    pca = PCA(n_components=0.95)
    pca_features = pca.fit_transform(features)

    rdf = rdf.with_columns([
        pl.Series(f"pca_{feature_col}", list(pca_features))
    ])

    plt.figure(figsize=(5, 3))
    sns.lineplot(
        x=np.arange(1, len(pca.explained_variance_ratio_)+1),
        y=np.cumsum(pca.explained_variance_ratio_)
    )
    plt.title(f"PCA for {feature_col} | 95% variance | {pca.n_components_} components")
    plt.xlabel("Number of Components")
    plt.ylabel("Cumulative Explained Variance")
    plt.show()

    percent_reduction = (1 - pca.n_components_ / features.shape[1]) * 100
    print(f"Feature: {feature_col}")
    print(f"Original feature dimension: {features.shape[1]}")
    print(f"Reduced feature dimension: {pca.n_components_}")
    print(f"Percent reduction in feature dimension: {percent_reduction:.2f}%")
    print()

Feature: mnetv3_feature
Original feature dimension: 1280
Reduced feature dimension: 613
Percent reduction in feature dimension: 52.11%

Feature: vit_feature
Original feature dimension: 384
Reduced feature dimension: 247
Percent reduction in feature dimension: 35.68%

Feature: xcit_feature
Original feature dimension: 384
Reduced feature dimension: 220
Percent reduction in feature dimension: 42.71%

from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score

def find_optimal_k(data, max_k):
    intertias = []
    scores = []
    for k in range(2, max_k + 1):
        kmeans = KMeans(n_clusters=k, random_state=42)
        kmeans = kmeans.fit(data)
        intertias.append(kmeans.inertia_)
        scores.append(silhouette_score(data, kmeans.labefls_))
    return intertias, scores

for feature_col in ["pca_mnetv3_feature", "pca_vit_feature", "pca_xcit_feature"]:

    pca_features = np.vstack(rdf.select(feature_col).sample(10_000).to_series().to_list())
    intertias, scores = find_optimal_k(pca_features, max_k=40)

    plt.figure(figsize=(10, 5))
    fig, ax1 = plt.subplots()
    ax2 = ax1.twinx()
    ax1.plot(range(2, 41), intertias, 'b-', label='Inertia')
    ax2.plot(range(2, 41), scores, 'r-', label='Silhouette Score')
    ax1.set_xlabel('Number of Clusters (k)')
    ax1.set_ylabel('Inertia')
    ax2.set_ylabel('Silhouette Score')

    plt.title(f"KMeans Elbow Method on '{feature_col}' Features")
    plt.tight_layout()
    plt.show()

<Figure size 1000x500 with 0 Axes>

<Figure size 1000x500 with 0 Axes>

<Figure size 1000x500 with 0 Axes>

from umap import UMAP

for feature_col in ["mnetv3_feature", "vit_feature", "xcit_feature"]:
    features = np.vstack(rdf.select(feature_col).to_series().to_list())

    umap = UMAP(n_neighbors=15, n_components=4, metric='euclidean')
    umap_features = umap.fit_transform(features)

    rdf = rdf.with_columns([
        pl.Series(f"umap_{feature_col}", umap_features.tolist())
    ])

for feature_col in ["pca_mnetv3_feature", "pca_vit_feature", "pca_xcit_feature"]:
    features = np.vstack(rdf.select(feature_col).to_series().to_list())

    kmeans = KMeans(n_clusters=4, random_state=42)
    cluster_labels = kmeans.fit_predict(features)
    
    rdf = rdf.with_columns([
        pl.Series(f"kmeans_{feature_col}", cluster_labels.tolist())
    ])

import pandas as pd

for feature_col in ["mnetv3_feature", "vit_feature", "xcit_feature"]:

    plot_samples = rdf\
        .select([f'umap_{feature_col}', f"kmeans_pca_{feature_col}"])\
        .to_pandas()

    plot_samples = plot_samples\
        .join(plot_samples[f'umap_{feature_col}'].apply(pd.Series).add_prefix(f"{feature_col}_"))\
        .drop(columns=[f'umap_{feature_col}'])

    sns.pairplot(
        plot_samples,
        hue=f"kmeans_pca_{feature_col}",
        palette="viridis",
        kind="hist",
        diag_kind="kde",
    )
    plt.suptitle(f"UMAP Components Pairplot of {feature_col} Features", y=1.02)
    plt.show()

import cv2

def get_fish_crop(video_file, frame_id, box):
    capture = cv2.VideoCapture(f"../../../megalab_recordings/recordings/{video_file}")
    capture.set(cv2.CAP_PROP_POS_FRAMES, frame_id)
    _, frame = capture.read()
    capture.release()
    x, y, w, h = box
    orig_h, orig_w, _ = frame.shape
    x = int(x * orig_w / 1024)
    y = int(y * orig_h / 1024)
    w = int(w * orig_w / 1024)
    h = int(h * orig_h / 1024)
    crop = frame[y:y+h, x:x+w]
    crop = cv2.cvtColor(crop, cv2.COLOR_BGR2RGB)

    # resize to max dimension 128
    max_dim = max(crop.shape[0], crop.shape[1])
    scale = 128 / max_dim
    new_w = int(crop.shape[1] * scale)
    new_h = int(crop.shape[0] * scale)
    crop = cv2.resize(crop, (new_w, new_h))

    return crop

for cluster_id in range(4):

    rows = 5
    cols = 10
    image = np.zeros((rows * 128, cols * 128, 3), dtype=np.uint8)

    cluster_df = rdf.filter(pl.col("kmeans_pca_vit_feature") == cluster_id)
    sample_rows = cluster_df.sample(n=min(rows*cols, cluster_df.height)).to_dicts()

    for i, row in enumerate(sample_rows):
        video_file = row["video_file"]
        frame_id = row["frame_id"]
        box = fdf.filter(
            (pl.col("video_file") == video_file) & (pl.col("frame_id") == frame_id)
        )\
        .select("box")\
        .to_series()\
        .to_list()[0]
        
        crop = get_fish_crop(video_file, frame_id, box)

        image[
            (i // cols) * 128 : (i // cols) * 128 + crop.shape[0],
            (i % cols) * 128 : (i % cols) * 128 + crop.shape[1],
        ] = crop
    
    plt.figure(figsize=(20, 10))
    plt.imshow(image)
    plt.axis("off")
    plt.title(f"Cluster {cluster_id} Sample Fish Crops")
    plt.show()

video_file	frame_id	box	score	video_id	timestamp
str	i64	list[f64]	f64	i64	f64
"megalab_0000.mp4"	0	[49.625584, 308.454529, … 25.061401]	0.447875	0	0.0
"megalab_0000.mp4"	1	[111.323929, 160.203247, … 22.092972]	0.440812	0	0.033333
"megalab_0000.mp4"	2	[111.131042, 156.693939, … 23.211853]	0.505729	0	0.066667
"megalab_0000.mp4"	2	[45.196251, 304.073059, … 26.24408]	0.440835	0	0.066667
"megalab_0000.mp4"	3	[112.592758, 154.698364, … 22.159836]	0.559952	0	0.1
…	…	…	…	…	…
"megalab_1588.mp4"	297	[333.339081, 598.729156, … 170.905487]	0.722037	1588	15889.9
"megalab_1588.mp4"	298	[330.434692, 592.485535, … 169.059097]	0.750856	1588	15889.933333
"megalab_1588.mp4"	298	[324.720764, 244.884262, … 93.155594]	0.853287	1588	15889.933333
"megalab_1588.mp4"	299	[328.688354, 585.071289, … 168.657837]	0.626334	1588	15889.966667
"megalab_1588.mp4"	299	[322.843872, 242.395752, … 97.742188]	0.849616	1588	15889.966667

video_file	frame_id	box	score	video_id	timestamp	area	width	height	log_width	log_height
str	i64	list[f64]	f64	i64	f64	f64	f64	f64	f64	f64
"megalab_0000.mp4"	11	[170.581802, 162.269897, … 37.856995]	0.628561	0	0.366667	1227.526452	32.425354	37.856995	3.478941	3.633816
"megalab_0000.mp4"	12	[170.368607, 163.154022, … 37.43277]	0.608063	0	0.4	1241.068586	33.154602	37.43277	3.501182	3.622547
"megalab_0000.mp4"	13	[169.404541, 163.256226, … 37.132385]	0.599879	0	0.433333	1235.315859	33.267883	37.132385	3.504592	3.61449
"megalab_0000.mp4"	14	[169.029236, 163.517548, … 36.884888]	0.621387	0	0.466667	1243.674035	33.717712	36.884888	3.518023	3.607802
"megalab_0000.mp4"	15	[168.896088, 163.629883, … 36.281189]	0.631152	0	0.5	1216.057586	33.517578	36.281189	3.51207	3.591299
…	…	…	…	…	…	…	…	…	…	…
"megalab_1588.mp4"	295	[670.556519, 686.834961, … 61.450317]	0.4827	1588	15889.833333	3838.912045	62.471802	61.450317	4.134715	4.118229
"megalab_1588.mp4"	296	[329.527618, 246.254089, … 97.992889]	0.855988	1588	15889.866667	10126.249896	103.336578	97.992889	4.637991	4.584895
"megalab_1588.mp4"	297	[327.502502, 245.356262, … 94.24176]	0.850686	1588	15889.9	9715.351083	103.089661	94.24176	4.635599	4.545863
"megalab_1588.mp4"	298	[324.720764, 244.884262, … 93.155594]	0.853287	1588	15889.933333	9772.12073	104.901062	93.155594	4.653018	4.534271
"megalab_1588.mp4"	299	[322.843872, 242.395752, … 97.742188]	0.849616	1588	15889.966667	10164.605843	103.994049	97.742188	4.644334	4.582333

video_file	frame_id	box	score	video_id	timestamp	area	width	height	log_width	log_height
str	i64	list[f64]	f64	i64	f64	f64	f64	f64	f64	f64
"megalab_0000.mp4"	150	[3.175149, 90.095612, … 48.685226]	0.799926	0	5.0	2462.767346	50.585517	48.685226	3.923665	3.885376
"megalab_0000.mp4"	205	[80.909744, 77.6297, … 50.73703]	0.787254	0	6.833333	2819.448614	55.569839	50.73703	4.017641	3.926656
"megalab_0000.mp4"	180	[235.98291, 355.891418, … 89.744873]	0.862818	0	6.0	8421.984475	93.843628	89.744873	4.54163	4.496971
"megalab_0000.mp4"	172	[202.524048, 375.623779, … 117.721046]	0.882466	0	5.733333	11551.902197	98.129456	117.721046	4.586288	4.768318
"megalab_0000.mp4"	144	[91.562485, 49.712978, … 38.932774]	0.61799	0	4.8	1418.692543	36.439545	38.932774	3.595655	3.661836
…	…	…	…	…	…	…	…	…	…	…
"megalab_1588.mp4"	177	[772.430511, 826.083618, … 54.022339]	0.507881	1588	15885.9	3987.642589	73.814697	54.022339	4.301558	3.989398
"megalab_1588.mp4"	176	[770.521362, 823.45816, … 54.20639]	0.414012	1588	15885.866667	3835.257619	70.752869	54.20639	4.259193	3.992799
"megalab_1588.mp4"	64	[416.594849, 797.126343, … 51.356079]	0.79985	1588	15882.133333	3023.835217	58.879791	51.356079	4.075498	3.938783
"megalab_1588.mp4"	166	[776.010132, 614.337189, … 66.82724]	0.842968	1588	15885.533333	6107.937948	91.398926	66.82724	4.515234	4.202111
"megalab_1588.mp4"	174	[830.628113, 615.067276, … 69.218124]	0.85362	1588	15885.8	6620.985671	95.653931	69.218124	4.560737	4.237263

video_file	frame_id	xcit_feature	vit_feature	mnetv3_feature
str	i64	list[f64]	list[f64]	list[f64]
"megalab_0873.mp4"	254	[-1.370655, 0.251851, … -1.125934]	[-1.973908, -6.005847, … -3.995991]	[2.291016, -0.32666, … -0.324219]
"megalab_0873.mp4"	177	[0.916819, -2.164785, … -0.959855]	[-2.870665, -6.252252, … -0.819254]	[1.082031, -0.297852, … -0.297119]
"megalab_0873.mp4"	96	[-0.771788, -0.493543, … -0.633564]	[-0.463634, -6.275813, … -4.233441]	[2.027344, -0.27124, … -0.262207]
"megalab_0873.mp4"	79	[-0.446059, -2.936343, … -1.072671]	[-1.530079, -5.404996, … -0.751862]	[1.464844, -0.219238, … -0.270264]
"megalab_0873.mp4"	4	[-0.145698, -2.020963, … -1.313443]	[-3.347079, -6.476562, … 2.167943]	[1.782227, -0.375, … -0.356445]
…	…	…	…	…
"megalab_1294.mp4"	92	[0.137672, 0.819715, … -1.240862]	[-1.632193, -4.162421, … 1.525466]	[0.706055, -0.323242, … 0.129028]
"megalab_1294.mp4"	170	[0.196154, -1.0209, … 0.481639]	[-1.646796, -2.136511, … 0.242005]	[1.306641, -0.005623, … -0.134155]
"megalab_1294.mp4"	177	[-0.425422, 0.356714, … -1.511721]	[-1.420421, -6.523434, … 2.322006]	[1.390625, -0.37085, … 0.140137]
"megalab_1294.mp4"	42	[-0.126437, -1.722595, … -0.599533]	[-1.770346, -2.047084, … -3.004429]	[3.3125, -0.375, … -0.259277]
"megalab_1294.mp4"	278	[0.436486, -1.1367, … 0.239529]	[-2.913258, -1.103637, … 2.995327]	[1.012695, 0.250244, … 0.093201]

Feature Extraction and Analysis on Fish in MegaLab Camera¶

Conclusion and Next Steps¶