utils.py 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195
  1. from sqlalchemy.ext.declarative import declarative_base
  2. from sqlalchemy import Column, Integer, String, Float, DateTime, select, create_engine
  3. import uuid
  4. from datetime import datetime, timezone
  5. import pandas as pd
  6. from .database_models import CurrentReduce, CurrentReflux
  7. from sqlalchemy.schema import MetaData, Table
  8. Base = declarative_base()
  9. def create_dynamic_table(dataset_id, columns):
  10. """动态创建数据表"""
  11. # 动态构建列
  12. dynamic_columns = {
  13. 'id': Column(Integer, primary_key=True, autoincrement=True) # 为每个表添加一个主键
  14. }
  15. # 根据 columns 字典动态创建字段
  16. for col_name, col_type in columns.items():
  17. if col_type == 'str':
  18. dynamic_columns[col_name] = Column(String(255))
  19. elif col_type == 'int':
  20. dynamic_columns[col_name] = Column(Integer)
  21. elif col_type == 'float':
  22. dynamic_columns[col_name] = Column(Float)
  23. elif col_type == 'datetime':
  24. dynamic_columns[col_name] = Column(DateTime)
  25. # 动态生成模型类,表名使用 dataset_{dataset_id}
  26. table_name = f"dataset_{dataset_id}"
  27. # 在生成的类中添加 `__tablename__`
  28. dynamic_columns['__tablename__'] = table_name
  29. # 动态创建类
  30. dynamic_class = type(table_name, (Base,), dynamic_columns)
  31. # 打印调试信息
  32. print("table_name:", table_name)
  33. print("dynamic_columns:", dynamic_columns)
  34. # 创建数据库引擎
  35. engine = create_engine('sqlite:///SoilAcidification.db') # 这里需要替换为你的数据库引擎
  36. Base.metadata.create_all(engine) # 创建所有表格
  37. return dynamic_class
  38. # 判断文件类型是否允许
  39. def allowed_file(filename):
  40. ALLOWED_EXTENSIONS = {'xlsx', 'xls'}
  41. return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
  42. # 生成唯一文件名
  43. def generate_unique_filename(filename):
  44. # 获取文件的扩展名
  45. ext = filename.rsplit('.', 1)[1].lower()
  46. # 使用 UUID 和当前时间戳生成唯一文件名(使用 UTC 时区)
  47. unique_filename = f"{uuid.uuid4().hex}_{datetime.now(timezone.utc).strftime('%Y%m%d%H%M%S')}.{ext}"
  48. return unique_filename
  49. def infer_column_types(df):
  50. type_map = {
  51. 'object': 'str',
  52. 'int64': 'int',
  53. 'float64': 'float',
  54. 'datetime64[ns]': 'datetime' # 适应Pandas datetime类型
  55. }
  56. # 提取列和其数据类型
  57. return {col: type_map.get(str(df[col].dtype), 'str') for col in df.columns}
  58. def clean_column_names(dataframe):
  59. # Strip whitespace and replace non-breaking spaces and other non-printable characters
  60. dataframe.columns = [col.strip().replace('\xa0', '') for col in dataframe.columns]
  61. return dataframe
  62. # 建立excel文件的列名和数据库模型字段之间的映射
  63. def rename_columns_for_model(dataframe, dataset_type):
  64. if dataset_type == 'reduce':
  65. rename_map = {
  66. '1/b': 'Q_over_b',
  67. 'pH': 'pH',
  68. 'OM': 'OM',
  69. 'CL': 'CL',
  70. 'H': 'H',
  71. 'Al': 'Al'
  72. }
  73. elif dataset_type == 'reflux':
  74. rename_map = {
  75. 'OM': 'OM',
  76. 'CL': 'CL',
  77. 'CEC': 'CEC',
  78. 'H+': 'H_plus',
  79. 'N': 'N',
  80. 'Al3+': 'Al3_plus',
  81. 'ΔpH': 'Delta_pH'
  82. }
  83. # 使用 rename() 方法更新列名
  84. dataframe = dataframe.rename(columns=rename_map)
  85. return dataframe
  86. # 建立前端参数和模型预测字段之间的映射
  87. def rename_columns_for_model_predict(dataframe, dataset_type):
  88. if dataset_type == 'reduce':
  89. rename_map = {
  90. 'init_pH': 'pH',
  91. 'OM': 'OM',
  92. 'CL': 'CL',
  93. 'H': 'H',
  94. 'Al': 'Al'
  95. }
  96. elif dataset_type == 'reflux':
  97. rename_map = {
  98. "OM": "OM",
  99. "CL": "CL",
  100. "CEC": "CEC",
  101. "H+": "H_plus",
  102. "N": "N",
  103. "Al3+": "Al3_plus"
  104. }
  105. # 使用 rename() 方法更新列名
  106. dataframe = dataframe.rename(columns=rename_map)
  107. return dataframe
  108. def insert_data_into_existing_table(session, dataframe, model_class):
  109. """Insert data from a DataFrame into an existing SQLAlchemy model table."""
  110. for index, row in dataframe.iterrows():
  111. record = model_class(**row.to_dict())
  112. session.add(record)
  113. def insert_data_into_dynamic_table(session, dataset_df, dynamic_table_class):
  114. for _, row in dataset_df.iterrows():
  115. record_data = row.to_dict()
  116. session.execute(dynamic_table_class.__table__.insert(), [record_data])
  117. def insert_data_by_type(session, dataset_df, dataset_type):
  118. if dataset_type == 'reduce':
  119. for _, row in dataset_df.iterrows():
  120. record = CurrentReduce(**row.to_dict())
  121. session.add(record)
  122. elif dataset_type == 'reflux':
  123. for _, row in dataset_df.iterrows():
  124. record = CurrentReflux(**row.to_dict())
  125. session.add(record)
  126. def get_current_data(session, data_type):
  127. # 根据数据类型选择相应的表模型
  128. if data_type == 'reduce':
  129. model = CurrentReduce
  130. elif data_type == 'reflux':
  131. model = CurrentReflux
  132. else:
  133. raise ValueError("Invalid data type provided. Choose 'reduce' or 'reflux'.")
  134. # 从数据库中查询所有记录
  135. result = session.execute(select(model))
  136. # 将结果转换为DataFrame
  137. dataframe = pd.DataFrame([dict(row) for row in result])
  138. return dataframe
  139. def get_dataset_by_id(session, dataset_id):
  140. # 动态获取表的元数据
  141. metadata = MetaData(bind=session.bind)
  142. dataset_table = Table(dataset_id, metadata, autoload=True, autoload_with=session.bind)
  143. # 从数据库中查询整个表的数据
  144. query = select(dataset_table)
  145. result = session.execute(query).fetchall()
  146. # 检查是否有数据返回
  147. if not result:
  148. raise ValueError(f"No data found for dataset {dataset_id}.")
  149. # 将结果转换为DataFrame
  150. dataframe = pd.DataFrame(result, columns=[column.name for column in dataset_table.columns])
  151. return dataframe
  152. def predict_to_Q(predictions, init_ph, target_ph):
  153. # 将预测结果转换为Q
  154. Q = predictions * (target_ph - init_ph)
  155. return Q
  156. # 说明:Q指生石灰投加量,单位是%,例如1%代表100g土壤中施加1g生石灰。
  157. # 其中,土壤是指表层20cm土壤。# 如果Q的单位换算为吨/公顷,即t/ha,则需要乘以25。
  158. # ΔpH=目标pH-初始pH
  159. def Q_to_t_ha(Q):
  160. return Q * 25