以下是根据您的要求提供的Python代码示例:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans, AgglomerativeClustering, DBSCAN
from sklearn.metrics import silhouette_score
# 读取数据
data = pd.read_csv('moon.csv')
# 绘制散点图(不同标签颜色不同)
plt.scatter(data['X1'], data['X2'], c=data['Label'])
plt.xlabel('X1')
plt.ylabel('X2')
plt.title('Scatter Plot of Data')
plt.show()
# 使用K-means聚类分析并生成新的标签
kmeans = KMeans(n_clusters=2)
k_labels = kmeans.fit_predict(data[['X1', 'X2']])
# 将新的聚类标签添加到原始数据中,并保存到CSV文件中
data['K_Label'] = k_labels
data.to_csv('moon_with_labels.csv', index=False)
# 统计正确分类比例和轮廓系数(K-means)
k_correct_count = sum(data['Label'] == data['K_Label'])
k_accuracy = k_correct_count / len(data)
k_silhouette_avg = silhouette_score(data[['X1', 'X2']], k_labels)
print("K-means正确分类比例:", k_accuracy)
print("K-means轮廓系数:", k_silhouette_avg)
# 层次聚类
hierarchical = AgglomerativeClustering(n_clusters=2)
h_labels = hierarchical.fit_predict(data[['X1', 'X2']])
# 将新的聚类标签添加到原始数据中,并保存到CSV文件中
data['H_Label'] = h_labels
data.to_csv('moon_with_labels.csv', index=False)
# 统计正确分类比例和轮廓系数(层次聚类)
h_correct_count = sum(data['Label'] == data['H_Label'])
h_accuracy = h_correct_count / len(data)
h_silhouette_avg = silhouette_score(data[['X1', 'X2']], h_labels)
print("层次聚类正确分类比例:", h_accuracy)
print("层次聚类轮廓系数:", h_silhouette_avg)
# DBSCAN聚类
dbscan = DBSCAN(eps=0.3, min_samples=5)
db_labels = dbscan.fit_predict(data[['X1', 'X2']])
# 将新的聚类标签添加到原始数据中,并保存到CSV文件中
data['DB_Label'] = db_labels
data.to_csv('moon_with_labels.csv', index=False)
# 统计正确分类比例和轮廓系数(DBSCAN)
db_core_mask = db_labels != -1 # 排除噪声点(标签为-1)
db_core_correct_count = sum(data['Label'][db_core_mask] == data['DB_Label'][db_core_mask])
db_accuracy = db_core_correct_count / sum(db_core_mask)
db_silhouette_avg = silhouette_score(data[['X1', 'X2']][db_core_mask], db_labels[db_core_mask])
print("DBSCAN正确分类比例:", db_accuracy)
print("DBSCAN轮廓系数:", db_silhouette_avg)
请确保在运行代码之前已经安装了相应的库(pandas、matplotlib、sklearn)。此代码将生成一个散点图显示数据,并分别使用K-means、层次聚类和DBSCAN进行聚类分析,然后计算每种算法的正确分类比例和轮廓系数。最后,将带有新聚类标签的数据保存到moon_with_labels.csv文件中。
请根据实际情况调整代码中的参数(如eps、min_samples等)以及数据文件路径。
希望对您有所帮助!如果还有其他问题,请随时提问。
内容由零声教学AI助手提供,问题来源于学员提问




