Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
min_samples=self.min_samples,
prediction_data=True,
).fit(self.embeddings)
# Assign soft clusters to embeddings
self.soft_clusters = hdbscan.all_points_membership_vectors(clusterer)
# Store clusterer in S3
store_on_s3(clusterer, self.s3_bucket, self.clusterer_name)
else:
logging.info("Loading fitted HDBSCAN from S3.")
# Load clusterer from S3
clusterer = load_from_s3(self.s3_bucket, self.clusterer_name)
# Predict soft labels
self.soft_clusters = hdbscan.prediction.membership_vector(
clusterer, np.array(self.embeddings)
)
# Group arXiv paper IDs with clusters
id_clusters_mapping = self._create_mappings(
self.ids, self.soft_clusters, "clusters"
)
# Store mapping in DB
s.bulk_insert_mappings(ArticleCluster, id_clusters_mapping)
s.commit()
self.next(self.end)