Spaces:
Sleeping
Sleeping
qqubb
commited on
Commit
·
a0e7778
1
Parent(s):
03820e0
initial code to handle multiple cards
Browse files- __pycache__/compliance_analysis.cpython-310.pyc +0 -0
- __pycache__/utils.cpython-310.pyc +0 -0
- compliance_analysis.py +41 -24
- data_cc.yaml +4 -2
- model_cc.yaml +2 -0
- project_cc.yaml +4 -3
- run.py +88 -0
- utils.py +0 -24
__pycache__/compliance_analysis.cpython-310.pyc
CHANGED
|
Binary files a/__pycache__/compliance_analysis.cpython-310.pyc and b/__pycache__/compliance_analysis.cpython-310.pyc differ
|
|
|
__pycache__/utils.cpython-310.pyc
CHANGED
|
Binary files a/__pycache__/utils.cpython-310.pyc and b/__pycache__/utils.cpython-310.pyc differ
|
|
|
compliance_analysis.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
import yaml
|
| 2 |
-
from utils import
|
| 3 |
|
| 4 |
# Create some variables we will use throughout our analysis
|
| 5 |
|
|
@@ -31,22 +31,40 @@ dispositive_variables = {
|
|
| 31 |
|
| 32 |
def check_overall_compliance(dispositive_variables, cc_files):
|
| 33 |
|
|
|
|
| 34 |
# check intended purposes
|
| 35 |
-
dispositive_variables = check_intended_purpose(dispositive_variables, cc_files)
|
| 36 |
|
| 37 |
# for each model_cc and data_cc - run analysis with ref to project_cc
|
| 38 |
|
| 39 |
-
dispositive_variables = run_compliance_analysis_on_data(dispositive_variables, data_cc_yaml)
|
| 40 |
-
dispositive_variables = run_compliance_analysis_on_model(dispositive_variables, model_cc_yaml)
|
| 41 |
|
| 42 |
dispositive_variables = run_compliance_analysis_on_project(dispositive_variables, project_cc_yaml)
|
| 43 |
|
| 44 |
return dispositive_variables
|
| 45 |
|
| 46 |
-
def run_compliance_analysis_on_project(
|
| 47 |
-
|
| 48 |
-
#
|
| 49 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
set_operator_role_and_location(dispositive_variables, project_cc_yaml)
|
| 51 |
set_eu_market_status(dispositive_variables, project_cc_yaml)
|
| 52 |
|
|
@@ -106,35 +124,35 @@ def run_compliance_analysis_on_project(dispositive_variables, project_cc_yaml):
|
|
| 106 |
|
| 107 |
if gpai_model:
|
| 108 |
|
| 109 |
-
|
| 110 |
|
| 111 |
-
|
| 112 |
|
| 113 |
-
|
| 114 |
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
|
| 119 |
-
|
| 120 |
|
| 121 |
for key, value in project_cc_yaml['gpai_model_obligations']:
|
| 122 |
if not value:
|
| 123 |
msg = ("GPAI model fails the transparency requirements under Article 53.")
|
| 124 |
|
| 125 |
|
| 126 |
-
if gpai_model_systematic_risk:
|
| 127 |
-
for key, value in project_cc_yaml['gpai_models_with_systemic_risk_obligations']:
|
| 128 |
|
| 129 |
|
| 130 |
-
|
| 131 |
-
|
| 132 |
# TODO to be included in project_cc
|
| 133 |
-
|
| 134 |
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
|
| 139 |
return dispositive_variables
|
| 140 |
|
|
@@ -159,7 +177,6 @@ def run_compliance_analysis_on_data(dispositive_variables, data_cc_yaml):
|
|
| 159 |
# if not value:
|
| 160 |
# msg = (f"Because of the dataset represented by {filename}, this GPAI fails the transparency requirements under Article 53.")
|
| 161 |
|
| 162 |
-
|
| 163 |
# TODO: No matter where we land with an orchestrator function, this function must also check to the value that has been set for both
|
| 164 |
# GPAI models with and without systemic risk and then check to see if the relevant requirements have met if either of these values applies.
|
| 165 |
# Right now it is only checking high-risk AI system requirements. Another thing that we likely have to add here is the cross-comparison of the
|
|
|
|
| 1 |
import yaml
|
| 2 |
+
from utils import set_operator_role_and_location, set_eu_market_status, check_within_scope_cc, check_within_scope_act
|
| 3 |
|
| 4 |
# Create some variables we will use throughout our analysis
|
| 5 |
|
|
|
|
| 31 |
|
| 32 |
def check_overall_compliance(dispositive_variables, cc_files):
|
| 33 |
|
| 34 |
+
|
| 35 |
# check intended purposes
|
| 36 |
+
# dispositive_variables = check_intended_purpose(dispositive_variables, cc_files)
|
| 37 |
|
| 38 |
# for each model_cc and data_cc - run analysis with ref to project_cc
|
| 39 |
|
| 40 |
+
# dispositive_variables = run_compliance_analysis_on_data(dispositive_variables, data_cc_yaml)
|
| 41 |
+
# dispositive_variables = run_compliance_analysis_on_model(dispositive_variables, model_cc_yaml)
|
| 42 |
|
| 43 |
dispositive_variables = run_compliance_analysis_on_project(dispositive_variables, project_cc_yaml)
|
| 44 |
|
| 45 |
return dispositive_variables
|
| 46 |
|
| 47 |
+
def run_compliance_analysis_on_project(project_cc_yaml):
|
| 48 |
+
|
| 49 |
+
# Project Type
|
| 50 |
+
if project_cc_yaml['ai_system']['ai_system']['value']:
|
| 51 |
+
dispositive_variables['ai_project_type']['ai_system'] = True
|
| 52 |
+
if project_cc_yaml['gpai_model']['gpai_model']['value']:
|
| 53 |
+
dispositive_variables['ai_project_type']['gpai_model'] = True
|
| 54 |
+
if dispositive_variables['ai_project_type']['ai_system'] and dispositive_variables['ai_project_type']['gpai_model']:
|
| 55 |
+
dispositive_variables['msg'] = "Your project cannot be both an AI system and a GPAI model. Please revise your Project CC accordingly."
|
| 56 |
+
return dispositive_variables
|
| 57 |
+
|
| 58 |
+
if ai_system == True:
|
| 59 |
+
for key, value in project_cc_yaml['high_risk_ai_system']:
|
| 60 |
+
if value and sum(map(bool, [project_cc_yaml['high_risk_ai_system']['filter_exception_rights'],project_cc_yaml['high_risk_ai_system']['filter_exception_narrow'],project_cc_yaml['high_risk_ai_system']['filter_exception_human'],project_cc_yaml['high_risk_ai_system']['filter_exception_deviation'], project_cc_yaml['high_risk_ai_system']['filter_exception_prep']])) < 1:
|
| 61 |
+
project_type = "high_risk_ai_system"
|
| 62 |
+
|
| 63 |
+
if gpai_model == True:
|
| 64 |
+
if project_cc_yaml['gpai_model_systematic_risk']['evaluation'] or project_cc_yaml['gpai_model_systematic_risk']['flops']:
|
| 65 |
+
project_type = "gpai_model_systematic_risk"
|
| 66 |
+
|
| 67 |
+
# Operator Type
|
| 68 |
set_operator_role_and_location(dispositive_variables, project_cc_yaml)
|
| 69 |
set_eu_market_status(dispositive_variables, project_cc_yaml)
|
| 70 |
|
|
|
|
| 124 |
|
| 125 |
if gpai_model:
|
| 126 |
|
| 127 |
+
# # If the project is a GPAI model with systematic risk, check that is has additionally met all the requirements for such systems:
|
| 128 |
|
| 129 |
+
# if gpai_model_systematic_risk:
|
| 130 |
|
| 131 |
+
# # Do this by examining the Project CC
|
| 132 |
|
| 133 |
+
# for key, value in project_cc_yaml['gpai_obligations_for_systemic_risk_models']:
|
| 134 |
+
# if not value:
|
| 135 |
+
# msg = ("GPAI model with systematic risk fails the transparency requirements under Article 55.")
|
| 136 |
|
| 137 |
+
# Do this by examining the Project CC
|
| 138 |
|
| 139 |
for key, value in project_cc_yaml['gpai_model_obligations']:
|
| 140 |
if not value:
|
| 141 |
msg = ("GPAI model fails the transparency requirements under Article 53.")
|
| 142 |
|
| 143 |
|
| 144 |
+
# if gpai_model_systematic_risk:
|
| 145 |
+
# for key, value in project_cc_yaml['gpai_models_with_systemic_risk_obligations']:
|
| 146 |
|
| 147 |
|
| 148 |
+
# if ai_system:
|
| 149 |
+
# for key, value in project_cc_yaml['']:
|
| 150 |
# TODO to be included in project_cc
|
| 151 |
+
|
| 152 |
|
| 153 |
+
# TODO: No matter where we land with an orchestrator function, this function must also check to the value it has set for both
|
| 154 |
+
# GPAI models with and without systemic risk and then check to see if the relevant requirement have met if either of these values applies.
|
| 155 |
+
# This will look a lot like what is happening above for high-risk AI systems.
|
| 156 |
|
| 157 |
return dispositive_variables
|
| 158 |
|
|
|
|
| 177 |
# if not value:
|
| 178 |
# msg = (f"Because of the dataset represented by {filename}, this GPAI fails the transparency requirements under Article 53.")
|
| 179 |
|
|
|
|
| 180 |
# TODO: No matter where we land with an orchestrator function, this function must also check to the value that has been set for both
|
| 181 |
# GPAI models with and without systemic risk and then check to see if the relevant requirements have met if either of these values applies.
|
| 182 |
# Right now it is only checking high-risk AI system requirements. Another thing that we likely have to add here is the cross-comparison of the
|
data_cc.yaml
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
|
|
|
|
|
| 1 |
intended_purpose:
|
| 2 |
safety_component:
|
| 3 |
article: 'Art. 6(1)(a)'
|
|
@@ -109,8 +111,8 @@ data_and_data_governance:
|
|
| 109 |
article: 'Art. 10(3)'
|
| 110 |
verbose: 'Training data possesses the appropriate statistical properties, including, where applicable, as regards the people in relation to whom the system is intended to be used'
|
| 111 |
value: !!bool false
|
| 112 |
-
contextual:
|
| 113 |
-
article:
|
| 114 |
verbose: 'Training data takes into account, to the extent required by the intended purpose, the characteristics or elements that are particular to the specific geographical, contextual, behavioural or functional setting within which the system is intended to be used'
|
| 115 |
value: !!bool false
|
| 116 |
personal_data_necessary:
|
|
|
|
| 1 |
+
card_type: "data" # "project", "data" or "model"
|
| 2 |
+
|
| 3 |
intended_purpose:
|
| 4 |
safety_component:
|
| 5 |
article: 'Art. 6(1)(a)'
|
|
|
|
| 111 |
article: 'Art. 10(3)'
|
| 112 |
verbose: 'Training data possesses the appropriate statistical properties, including, where applicable, as regards the people in relation to whom the system is intended to be used'
|
| 113 |
value: !!bool false
|
| 114 |
+
contextual:
|
| 115 |
+
article: 'Art. 10(4)'
|
| 116 |
verbose: 'Training data takes into account, to the extent required by the intended purpose, the characteristics or elements that are particular to the specific geographical, contextual, behavioural or functional setting within which the system is intended to be used'
|
| 117 |
value: !!bool false
|
| 118 |
personal_data_necessary:
|
model_cc.yaml
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
|
|
|
|
|
| 1 |
intended_purpose:
|
| 2 |
safety_component:
|
| 3 |
article: 'Art. 6(1)(a)'
|
|
|
|
| 1 |
+
card_type: "model" # "project", "data" or "model"
|
| 2 |
+
|
| 3 |
intended_purpose:
|
| 4 |
safety_component:
|
| 5 |
article: 'Art. 6(1)(a)'
|
project_cc.yaml
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
|
| 2 |
# Information related to high-level characteristics of AI project, including the role of the operator, their location, and where the output is used
|
| 3 |
-
|
|
|
|
| 4 |
operator_details:
|
| 5 |
provider:
|
| 6 |
article: 'Art. 2'
|
|
@@ -29,13 +30,13 @@ ai_system:
|
|
| 29 |
ai_system:
|
| 30 |
article: 'Art. 3(1)'
|
| 31 |
verbose: 'AI project is a machine-based system that is designed to operate with varying levels of autonomy and that may exhibit adaptiveness after deployment, and that, for explicit or implicit objectives, infers, from the input it receives, how to generate outputs such as predictions, content, recommendations, or decisions that can influence physical or virtual environments'
|
| 32 |
-
value: !!bool
|
| 33 |
|
| 34 |
gpai_model:
|
| 35 |
gpai_model:
|
| 36 |
article: 'Art. 3(63)'
|
| 37 |
verbose: 'AI project is an AI model, including where such an AI model is trained with a large amount of data using self-supervision at scale, that displays significant generality and is capable of competently performing a wide range of distinct tasks regardless of the way the model is placed on the market and that can be integrated into a variety of downstream systems or applications, except AI models that are used for research, development or prototyping activities before they are placed on the market'
|
| 38 |
-
value: !!bool
|
| 39 |
|
| 40 |
gpai_model_systematic_risk:
|
| 41 |
evaluation:
|
|
|
|
| 1 |
|
| 2 |
# Information related to high-level characteristics of AI project, including the role of the operator, their location, and where the output is used
|
| 3 |
+
card_type: "project" # "project", "data" or "model"
|
| 4 |
+
|
| 5 |
operator_details:
|
| 6 |
provider:
|
| 7 |
article: 'Art. 2'
|
|
|
|
| 30 |
ai_system:
|
| 31 |
article: 'Art. 3(1)'
|
| 32 |
verbose: 'AI project is a machine-based system that is designed to operate with varying levels of autonomy and that may exhibit adaptiveness after deployment, and that, for explicit or implicit objectives, infers, from the input it receives, how to generate outputs such as predictions, content, recommendations, or decisions that can influence physical or virtual environments'
|
| 33 |
+
value: !!bool true
|
| 34 |
|
| 35 |
gpai_model:
|
| 36 |
gpai_model:
|
| 37 |
article: 'Art. 3(63)'
|
| 38 |
verbose: 'AI project is an AI model, including where such an AI model is trained with a large amount of data using self-supervision at scale, that displays significant generality and is capable of competently performing a wide range of distinct tasks regardless of the way the model is placed on the market and that can be integrated into a variety of downstream systems or applications, except AI models that are used for research, development or prototyping activities before they are placed on the market'
|
| 39 |
+
value: !!bool true
|
| 40 |
|
| 41 |
gpai_model_systematic_risk:
|
| 42 |
evaluation:
|
run.py
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import yaml
|
| 2 |
+
import json
|
| 3 |
+
from pathlib import Path
|
| 4 |
+
import pandas as pd
|
| 5 |
+
from compliance_analysis import run_compliance_analysis_on_project, run_compliance_analysis_on_data, run_compliance_analysis_on_model
|
| 6 |
+
|
| 7 |
+
pd.set_option('display.max_columns', None)
|
| 8 |
+
pd.set_option('display.max_rows', None)
|
| 9 |
+
|
| 10 |
+
files = ["./project_cc.yaml", "./data_cc.yaml", "./data_cc.yaml", "./model_cc.yaml", "./model_cc.yaml", "./model_cc.yaml"]
|
| 11 |
+
|
| 12 |
+
# def load_data(files):
|
| 13 |
+
# cards = []
|
| 14 |
+
# for file in files:
|
| 15 |
+
# with open(file, 'r') as f:
|
| 16 |
+
# if Path(f.name).name == "project_cc.yaml":
|
| 17 |
+
# content = f.read()
|
| 18 |
+
# project_cc_yaml = yaml.safe_load(content)
|
| 19 |
+
# data = project_cc_yaml
|
| 20 |
+
# card_type = "project"
|
| 21 |
+
# cards.append((card_type, data))
|
| 22 |
+
# if Path(f.name).name == "data_cc.yaml":
|
| 23 |
+
# data_cc_yaml = yaml.safe_load(content)
|
| 24 |
+
# data = data_cc_yaml
|
| 25 |
+
# card_type = "data"
|
| 26 |
+
# cards.append((card_type, data))
|
| 27 |
+
# if Path(f.name).name == "model_cc.yaml":
|
| 28 |
+
# model_cc_yaml = yaml.safe_load(content)
|
| 29 |
+
# data = model_cc_yaml
|
| 30 |
+
# card_type = "model"
|
| 31 |
+
# cards.append((card_type, data))
|
| 32 |
+
# return cards
|
| 33 |
+
|
| 34 |
+
# cards = load_data(files)
|
| 35 |
+
|
| 36 |
+
def gather_cards(files):
|
| 37 |
+
cards = {}
|
| 38 |
+
cards['project_file'] = ''
|
| 39 |
+
cards['data_files'] = []
|
| 40 |
+
cards['model_files'] = []
|
| 41 |
+
for file in files:
|
| 42 |
+
with open(file, 'r') as f:
|
| 43 |
+
content = yaml.safe_load(f.read())
|
| 44 |
+
if content['card_type'] == "project":
|
| 45 |
+
cards['project_file'] = f.name
|
| 46 |
+
if content['card_type'] == "data":
|
| 47 |
+
cards['data_files'].append(f.name)
|
| 48 |
+
if content['card_type'] == "model":
|
| 49 |
+
cards['model_files'].append(f.name)
|
| 50 |
+
return cards
|
| 51 |
+
|
| 52 |
+
cards = gather_cards(files)
|
| 53 |
+
print(cards)
|
| 54 |
+
|
| 55 |
+
# def load_data(files):
|
| 56 |
+
# cards = []
|
| 57 |
+
# for file in files:
|
| 58 |
+
# with open(file, 'r') as f:
|
| 59 |
+
# if Path(f.name).name == "project_cc.yaml":
|
| 60 |
+
# content = f.read()
|
| 61 |
+
# project_cc_yaml = yaml.safe_load(content)
|
| 62 |
+
# data = project_cc_yaml
|
| 63 |
+
# card_type = "project"
|
| 64 |
+
# cards.append((card_type, data))
|
| 65 |
+
# if Path(f.name).name == "data_cc.yaml":
|
| 66 |
+
# data_cc_yaml = yaml.safe_load(content)
|
| 67 |
+
# data = data_cc_yaml
|
| 68 |
+
# card_type = "data"
|
| 69 |
+
# cards.append((card_type, data))
|
| 70 |
+
# if Path(f.name).name == "model_cc.yaml":
|
| 71 |
+
# model_cc_yaml = yaml.safe_load(content)
|
| 72 |
+
# data = model_cc_yaml
|
| 73 |
+
# card_type = "model"
|
| 74 |
+
# cards.append((card_type, data))
|
| 75 |
+
# return cards
|
| 76 |
+
|
| 77 |
+
# def process_files(data):
|
| 78 |
+
# results = []
|
| 79 |
+
# dispositive_variables = check_overall_compliance(yaml.safe_load(data))
|
| 80 |
+
# results.append(dispositive_variables['msg'])
|
| 81 |
+
# return results
|
| 82 |
+
|
| 83 |
+
# for card in cards:
|
| 84 |
+
# data = card[1]
|
| 85 |
+
# yaml_data = yaml.dump(data, sort_keys=False)
|
| 86 |
+
# process_files(yaml_data)
|
| 87 |
+
# print(process_files(yaml_data))
|
| 88 |
+
|
utils.py
CHANGED
|
@@ -3,30 +3,6 @@ import yaml
|
|
| 3 |
# We could probably combine set_type, set_operator_role_and_location, and set_eu_market_status into a single function that sets all project_variables
|
| 4 |
# We will have to add a couple other things to that function as well
|
| 5 |
|
| 6 |
-
def set_type(dispositive_variables, project_cc_yaml):
|
| 7 |
-
|
| 8 |
-
project_type = None
|
| 9 |
-
|
| 10 |
-
ai_system = dispositive_variables['ai_project_type']['ai_system']
|
| 11 |
-
gpai_model = dispositive_variables['ai_project_type']['gpai_model']
|
| 12 |
-
|
| 13 |
-
if project_cc_yaml['ai_system']['ai_system']['value']:
|
| 14 |
-
ai_system = True
|
| 15 |
-
if project_cc_yaml['gpai_model']['gpai_model']['value']:
|
| 16 |
-
gpai_model = True
|
| 17 |
-
if ai_system and gpai_model:
|
| 18 |
-
msg = ("Your project cannot be both an AI system and a GPAI model. Please revise your Project CC accordingly.")
|
| 19 |
-
if ai_system == True:
|
| 20 |
-
for key, value in project_cc_yaml['high_risk_ai_system']:
|
| 21 |
-
if value and sum(map(bool, [project_cc_yaml['high_risk_ai_system']['filter_exception_rights'],project_cc_yaml['high_risk_ai_system']['filter_exception_narrow'],project_cc_yaml['high_risk_ai_system']['filter_exception_human'],project_cc_yaml['high_risk_ai_system']['filter_exception_deviation'], project_cc_yaml['high_risk_ai_system']['filter_exception_prep']])) < 1:
|
| 22 |
-
project_type = "high_risk_ai_system"
|
| 23 |
-
|
| 24 |
-
if gpai_model == True:
|
| 25 |
-
if project_cc_yaml['gpai_model_systematic_risk']['evaluation'] or project_cc_yaml['gpai_model_systematic_risk']['flops']:
|
| 26 |
-
project_type = "gpai_model_systematic_risk"
|
| 27 |
-
|
| 28 |
-
return project_type
|
| 29 |
-
|
| 30 |
def set_operator_role_and_location(dispositive_variables, project_cc_yaml):
|
| 31 |
operators = 0
|
| 32 |
|
|
|
|
| 3 |
# We could probably combine set_type, set_operator_role_and_location, and set_eu_market_status into a single function that sets all project_variables
|
| 4 |
# We will have to add a couple other things to that function as well
|
| 5 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
def set_operator_role_and_location(dispositive_variables, project_cc_yaml):
|
| 7 |
operators = 0
|
| 8 |
|