Spaces:

camlsys
/

compliancecards

Sleeping

App Files Files Community

qqubb commited on Aug 28, 2024

Commit

a0e7778

1 Parent(s): 03820e0

initial code to handle multiple cards

Browse files

Files changed (8) hide show

__pycache__/compliance_analysis.cpython-310.pyc +0 -0
__pycache__/utils.cpython-310.pyc +0 -0
compliance_analysis.py +41 -24
data_cc.yaml +4 -2
model_cc.yaml +2 -0
project_cc.yaml +4 -3
run.py +88 -0
utils.py +0 -24

__pycache__/compliance_analysis.cpython-310.pyc CHANGED Viewed

Binary files a/__pycache__/compliance_analysis.cpython-310.pyc and b/__pycache__/compliance_analysis.cpython-310.pyc differ

__pycache__/utils.cpython-310.pyc CHANGED Viewed

Binary files a/__pycache__/utils.cpython-310.pyc and b/__pycache__/utils.cpython-310.pyc differ

compliance_analysis.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import yaml
-from utils import set_type, set_operator_role_and_location, set_eu_market_status, check_within_scope_cc, check_within_scope_act
 # Create some variables we will use throughout our analysis
@@ -31,22 +31,40 @@ dispositive_variables = {
 def check_overall_compliance(dispositive_variables, cc_files):
     # check intended purposes
-    dispositive_variables = check_intended_purpose(dispositive_variables, cc_files)
     # for each model_cc and data_cc - run analysis with ref to project_cc
-    dispositive_variables = run_compliance_analysis_on_data(dispositive_variables, data_cc_yaml)
-    dispositive_variables = run_compliance_analysis_on_model(dispositive_variables, model_cc_yaml)
     dispositive_variables = run_compliance_analysis_on_project(dispositive_variables, project_cc_yaml)
     return dispositive_variables
-def run_compliance_analysis_on_project(dispositive_variables, project_cc_yaml):
-    # Determine project type (AI system vs. GPAI model) as well as operator type. We will use these for different things.
-    project_type = set_type(dispositive_variables, project_cc_yaml)
     set_operator_role_and_location(dispositive_variables, project_cc_yaml)
     set_eu_market_status(dispositive_variables, project_cc_yaml)
@@ -106,35 +124,35 @@ def run_compliance_analysis_on_project(dispositive_variables, project_cc_yaml):
     if gpai_model:
-    # # If the project is a GPAI model with systematic risk, check that is has additionally met all the requirements for such systems:
-    # if gpai_model_systematic_risk:
-    # # Do this by examining the Project CC
-    #     for key, value in project_cc_yaml['gpai_obligations_for_systemic_risk_models']:
-    #         if not value:
-    #             msg = ("GPAI model with systematic risk fails the transparency requirements under Article 55.")
-    # Do this by examining the Project CC
         for key, value in project_cc_yaml['gpai_model_obligations']:
             if not value:
                 msg = ("GPAI model fails the transparency requirements under Article 53.")
-    if gpai_model_systematic_risk:
-        for key, value in project_cc_yaml['gpai_models_with_systemic_risk_obligations']:
-    # if ai_system:
-    #     for key, value in project_cc_yaml['']:
         # TODO to be included in project_cc
-    # TODO: No matter where we land with an orchestrator function, this function must also check to the value it has set for both
-    # GPAI models with and without systemic risk and then check to see if the relevant requirement have met if either of these values applies.
-    # This will look a lot like what is happening above for high-risk AI systems.
     return dispositive_variables
@@ -159,7 +177,6 @@ def run_compliance_analysis_on_data(dispositive_variables, data_cc_yaml):
     #                 if not value:
     #                     msg = (f"Because of the dataset represented by {filename}, this GPAI fails the transparency requirements under Article 53.")
     # TODO: No matter where we land with an orchestrator function, this function must also check to the value that has been set for both
     # GPAI models with and without systemic risk and then check to see if the relevant requirements have met if either of these values applies.
     # Right now it is only checking high-risk AI system requirements. Another thing that we likely have to add here is the cross-comparison of the

 import yaml
+from utils import set_operator_role_and_location, set_eu_market_status, check_within_scope_cc, check_within_scope_act
 # Create some variables we will use throughout our analysis
 def check_overall_compliance(dispositive_variables, cc_files):
     # check intended purposes
+    # dispositive_variables = check_intended_purpose(dispositive_variables, cc_files)
     # for each model_cc and data_cc - run analysis with ref to project_cc
+    # dispositive_variables = run_compliance_analysis_on_data(dispositive_variables, data_cc_yaml)
+    # dispositive_variables = run_compliance_analysis_on_model(dispositive_variables, model_cc_yaml)
     dispositive_variables = run_compliance_analysis_on_project(dispositive_variables, project_cc_yaml)
     return dispositive_variables
+def run_compliance_analysis_on_project(project_cc_yaml):
+    # Project Type
+    if project_cc_yaml['ai_system']['ai_system']['value']:
+        dispositive_variables['ai_project_type']['ai_system'] = True
+    if project_cc_yaml['gpai_model']['gpai_model']['value']:
+        dispositive_variables['ai_project_type']['gpai_model'] = True
+    if dispositive_variables['ai_project_type']['ai_system'] and dispositive_variables['ai_project_type']['gpai_model']:
+        dispositive_variables['msg'] = "Your project cannot be both an AI system and a GPAI model. Please revise your Project CC accordingly."
+        return dispositive_variables
+    if ai_system == True:
+        for key, value in project_cc_yaml['high_risk_ai_system']:
+            if value and sum(map(bool, [project_cc_yaml['high_risk_ai_system']['filter_exception_rights'],project_cc_yaml['high_risk_ai_system']['filter_exception_narrow'],project_cc_yaml['high_risk_ai_system']['filter_exception_human'],project_cc_yaml['high_risk_ai_system']['filter_exception_deviation'], project_cc_yaml['high_risk_ai_system']['filter_exception_prep']])) < 1:
+                project_type = "high_risk_ai_system"
+    if gpai_model == True:
+        if project_cc_yaml['gpai_model_systematic_risk']['evaluation'] or project_cc_yaml['gpai_model_systematic_risk']['flops']:
+            project_type = "gpai_model_systematic_risk"
+    # Operator Type
     set_operator_role_and_location(dispositive_variables, project_cc_yaml)
     set_eu_market_status(dispositive_variables, project_cc_yaml)
     if gpai_model:
+        # # If the project is a GPAI model with systematic risk, check that is has additionally met all the requirements for such systems:
+        # if gpai_model_systematic_risk:
+        # # Do this by examining the Project CC
+        #     for key, value in project_cc_yaml['gpai_obligations_for_systemic_risk_models']:
+        #         if not value:
+        #             msg = ("GPAI model with systematic risk fails the transparency requirements under Article 55.")
+        # Do this by examining the Project CC
         for key, value in project_cc_yaml['gpai_model_obligations']:
             if not value:
                 msg = ("GPAI model fails the transparency requirements under Article 53.")
+    # if gpai_model_systematic_risk:
+        # for key, value in project_cc_yaml['gpai_models_with_systemic_risk_obligations']:
+        # if ai_system:
+        #     for key, value in project_cc_yaml['']:
         # TODO to be included in project_cc
+        # TODO: No matter where we land with an orchestrator function, this function must also check to the value it has set for both
+        # GPAI models with and without systemic risk and then check to see if the relevant requirement have met if either of these values applies.
+        # This will look a lot like what is happening above for high-risk AI systems.
     return dispositive_variables
     #                 if not value:
     #                     msg = (f"Because of the dataset represented by {filename}, this GPAI fails the transparency requirements under Article 53.")
     # TODO: No matter where we land with an orchestrator function, this function must also check to the value that has been set for both
     # GPAI models with and without systemic risk and then check to see if the relevant requirements have met if either of these values applies.
     # Right now it is only checking high-risk AI system requirements. Another thing that we likely have to add here is the cross-comparison of the

data_cc.yaml CHANGED Viewed

@@ -1,3 +1,5 @@
 intended_purpose:
   safety_component:
     article: 'Art. 6(1)(a)'
@@ -109,8 +111,8 @@ data_and_data_governance:
     article: 'Art. 10(3)'
     verbose: 'Training data possesses the appropriate statistical properties, including, where applicable, as regards the people in relation to whom the system is intended to be used'
     value: !!bool false
-  contextual: 'Art. 10(4)'
-    article:
     verbose: 'Training data takes into account, to the extent required by the intended purpose, the characteristics or elements that are particular to the specific geographical, contextual, behavioural or functional setting within which the system is intended to be used'
     value: !!bool false
   personal_data_necessary:

+card_type: "data" # "project", "data" or "model"
 intended_purpose:
   safety_component:
     article: 'Art. 6(1)(a)'
     article: 'Art. 10(3)'
     verbose: 'Training data possesses the appropriate statistical properties, including, where applicable, as regards the people in relation to whom the system is intended to be used'
     value: !!bool false
+  contextual:
+    article: 'Art. 10(4)'
     verbose: 'Training data takes into account, to the extent required by the intended purpose, the characteristics or elements that are particular to the specific geographical, contextual, behavioural or functional setting within which the system is intended to be used'
     value: !!bool false
   personal_data_necessary:

model_cc.yaml CHANGED Viewed

@@ -1,3 +1,5 @@
 intended_purpose:
   safety_component:
     article: 'Art. 6(1)(a)'

+card_type: "model" # "project", "data" or "model"
 intended_purpose:
   safety_component:
     article: 'Art. 6(1)(a)'

project_cc.yaml CHANGED Viewed

@@ -1,6 +1,7 @@
 # Information related to high-level characteristics of AI project, including the role of the operator, their location, and where the output is used
 operator_details:
   provider:
     article: 'Art. 2'
@@ -29,13 +30,13 @@ ai_system:
   ai_system:
     article: 'Art. 3(1)'
     verbose: 'AI project is a machine-based system that is designed to operate with varying levels of autonomy and that may exhibit adaptiveness after deployment, and that, for explicit or implicit objectives, infers, from the input it receives, how to generate outputs such as predictions, content, recommendations, or decisions that can influence physical or virtual environments'
-    value: !!bool false
 gpai_model:
   gpai_model:
     article: 'Art. 3(63)'
     verbose: 'AI project is an AI model, including where such an AI model is trained with a large amount of data using self-supervision at scale, that displays significant generality and is capable of competently performing a wide range of distinct tasks regardless of the way the model is placed on the market and that can be integrated into a variety of downstream systems or applications, except AI models that are used for research, development or prototyping activities before they are placed on the market'
-    value: !!bool false
 gpai_model_systematic_risk:
   evaluation:

 # Information related to high-level characteristics of AI project, including the role of the operator, their location, and where the output is used
+card_type: "project" # "project", "data" or "model"
 operator_details:
   provider:
     article: 'Art. 2'
   ai_system:
     article: 'Art. 3(1)'
     verbose: 'AI project is a machine-based system that is designed to operate with varying levels of autonomy and that may exhibit adaptiveness after deployment, and that, for explicit or implicit objectives, infers, from the input it receives, how to generate outputs such as predictions, content, recommendations, or decisions that can influence physical or virtual environments'
+    value: !!bool true
 gpai_model:
   gpai_model:
     article: 'Art. 3(63)'
     verbose: 'AI project is an AI model, including where such an AI model is trained with a large amount of data using self-supervision at scale, that displays significant generality and is capable of competently performing a wide range of distinct tasks regardless of the way the model is placed on the market and that can be integrated into a variety of downstream systems or applications, except AI models that are used for research, development or prototyping activities before they are placed on the market'
+    value: !!bool true
 gpai_model_systematic_risk:
   evaluation:

run.py ADDED Viewed

	@@ -0,0 +1,88 @@

+import yaml
+import json
+from pathlib import Path
+import pandas as pd
+from compliance_analysis import run_compliance_analysis_on_project, run_compliance_analysis_on_data, run_compliance_analysis_on_model
+pd.set_option('display.max_columns', None)
+pd.set_option('display.max_rows', None)
+files = ["./project_cc.yaml", "./data_cc.yaml", "./data_cc.yaml", "./model_cc.yaml", "./model_cc.yaml", "./model_cc.yaml"]
+# def load_data(files):
+#     cards = []
+#     for file in files:
+#         with open(file, 'r') as f:
+#             if Path(f.name).name == "project_cc.yaml":
+#                 content = f.read()
+#                 project_cc_yaml = yaml.safe_load(content)
+#                 data = project_cc_yaml
+#                 card_type = "project"
+#                 cards.append((card_type, data))
+#             if Path(f.name).name == "data_cc.yaml":
+#                 data_cc_yaml = yaml.safe_load(content)
+#                 data = data_cc_yaml
+#                 card_type = "data"
+#                 cards.append((card_type, data))
+#             if Path(f.name).name == "model_cc.yaml":
+#                 model_cc_yaml = yaml.safe_load(content)
+#                 data = model_cc_yaml
+#                 card_type = "model"
+#                 cards.append((card_type, data))
+#     return cards
+# cards = load_data(files)
+def gather_cards(files):
+    cards = {}
+    cards['project_file'] = ''
+    cards['data_files'] = []
+    cards['model_files'] = []
+    for file in files:
+        with open(file, 'r') as f:
+            content = yaml.safe_load(f.read())
+            if content['card_type'] == "project":
+                cards['project_file'] = f.name
+            if content['card_type'] == "data":
+                cards['data_files'].append(f.name)
+            if content['card_type'] == "model":
+                cards['model_files'].append(f.name)
+    return cards
+cards = gather_cards(files)
+print(cards)
+# def load_data(files):
+#     cards = []
+#     for file in files:
+#         with open(file, 'r') as f:
+#             if Path(f.name).name == "project_cc.yaml":
+#                 content = f.read()
+#                 project_cc_yaml = yaml.safe_load(content)
+#                 data = project_cc_yaml
+#                 card_type = "project"
+#                 cards.append((card_type, data))
+#             if Path(f.name).name == "data_cc.yaml":
+#                 data_cc_yaml = yaml.safe_load(content)
+#                 data = data_cc_yaml
+#                 card_type = "data"
+#                 cards.append((card_type, data))
+#             if Path(f.name).name == "model_cc.yaml":
+#                 model_cc_yaml = yaml.safe_load(content)
+#                 data = model_cc_yaml
+#                 card_type = "model"
+#                 cards.append((card_type, data))
+#     return cards
+# def process_files(data):
+#     results = []
+#     dispositive_variables = check_overall_compliance(yaml.safe_load(data))
+#     results.append(dispositive_variables['msg'])
+#     return results
+# for card in cards:
+#     data = card[1]
+#     yaml_data = yaml.dump(data, sort_keys=False)
+#     process_files(yaml_data)
+#     print(process_files(yaml_data))

utils.py CHANGED Viewed

@@ -3,30 +3,6 @@ import yaml
 # We could probably combine set_type, set_operator_role_and_location, and set_eu_market_status into a single function that sets all project_variables
 # We will have to add a couple other things to that function as well
-def set_type(dispositive_variables, project_cc_yaml):
-    project_type = None
-    ai_system = dispositive_variables['ai_project_type']['ai_system']
-    gpai_model = dispositive_variables['ai_project_type']['gpai_model']
-    if project_cc_yaml['ai_system']['ai_system']['value']:
-        ai_system = True
-    if project_cc_yaml['gpai_model']['gpai_model']['value']:
-        gpai_model = True
-    if ai_system and gpai_model:
-        msg = ("Your project cannot be both an AI system and a GPAI model. Please revise your Project CC accordingly.")
-    if ai_system == True:
-        for key, value in project_cc_yaml['high_risk_ai_system']:
-            if value and sum(map(bool, [project_cc_yaml['high_risk_ai_system']['filter_exception_rights'],project_cc_yaml['high_risk_ai_system']['filter_exception_narrow'],project_cc_yaml['high_risk_ai_system']['filter_exception_human'],project_cc_yaml['high_risk_ai_system']['filter_exception_deviation'], project_cc_yaml['high_risk_ai_system']['filter_exception_prep']])) < 1:
-                project_type = "high_risk_ai_system"
-    if gpai_model == True:
-        if project_cc_yaml['gpai_model_systematic_risk']['evaluation'] or project_cc_yaml['gpai_model_systematic_risk']['flops']:
-            project_type = "gpai_model_systematic_risk"
-    return project_type
 def set_operator_role_and_location(dispositive_variables, project_cc_yaml):
     operators = 0

 # We could probably combine set_type, set_operator_role_and_location, and set_eu_market_status into a single function that sets all project_variables
 # We will have to add a couple other things to that function as well
 def set_operator_role_and_location(dispositive_variables, project_cc_yaml):
     operators = 0